summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/r300/r300_state.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/r300_state.c')
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c849
1 files changed, 635 insertions, 214 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index e11b5af..550f710 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -60,7 +60,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_state.h"
#include "r300_reg.h"
#include "r300_emit.h"
-#include "r300_fragprog.h"
#include "r300_tex.h"
#include "drirenderbuffer.h"
@@ -189,7 +188,7 @@ static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn,
*/
#if 0
if (new_ablend == new_cblend) {
- new_cblend |= R300_BLEND_NO_SEPARATE;
+ new_cblend |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
}
#endif
new_cblend |= cbits;
@@ -295,7 +294,9 @@ static void r300SetBlendState(GLcontext * ctx)
r300SetBlendCntl(r300,
func, eqn,
- R300_BLEND_UNKNOWN | R300_BLEND_ENABLE, funcA, eqnA);
+ (R300_SEPARATE_ALPHA_ENABLE |
+ R300_READ_ENABLE |
+ R300_ALPHA_BLEND_ENABLE), funcA, eqnA);
}
static void r300BlendEquationSeparate(GLcontext * ctx,
@@ -401,42 +402,40 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
}
}
-static void r300SetEarlyZState(GLcontext * ctx)
+static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
{
- /* updates register R300_RB3D_EARLY_Z (0x4F14)
- if depth test is not enabled it should be R300_EARLY_Z_DISABLE
- if depth is enabled and alpha not it should be R300_EARLY_Z_ENABLE
- if depth and alpha is enabled it should be R300_EARLY_Z_DISABLE
- */
r300ContextPtr r300 = R300_CONTEXT(ctx);
- R300_STATECHANGE(r300, zstencil_format);
- switch (ctx->Visual.depthBits) {
- case 16:
- r300->hw.zstencil_format.cmd[1] = ZB_FORMAR_DEPTHFORMAT_16BIT_INT_Z;
- break;
- case 24:
- r300->hw.zstencil_format.cmd[1] = ZB_FORMAR_DEPTHFORMAT_24BIT_INT_Z;
- break;
- default:
- fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits);
- _mesa_exit(-1);
+ if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+ struct r300_fragment_program *fp = (struct r300_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+ return (fp && fp->WritesDepth);
+ } else {
+ struct r500_fragment_program* fp =
+ (struct r500_fragment_program*)(char*)
+ ctx->FragmentProgram._Current;
+ return (fp && fp->writes_depth);
}
+}
+
+static void r300SetEarlyZState(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLuint topZ = R300_ZTOP_ENABLE;
if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS)
- /* disable early Z */
- r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE;
- else {
- if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER)
- /* enable early Z */
- r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE;
- else
- /* disable early Z */
- r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE;
+ topZ = R300_ZTOP_DISABLE;
+ if (current_fragment_program_writes_depth(ctx))
+ topZ = R300_ZTOP_DISABLE;
+
+ if (topZ != r300->hw.zstencil_format.cmd[2]) {
+ /* Note: This completely reemits the stencil format.
+ * I have not tested whether this is strictly necessary,
+ * or if emitting a write to ZB_ZTOP is enough.
+ */
+ R300_STATECHANGE(r300, zstencil_format);
+ r300->hw.zstencil_format.cmd[2] = topZ;
}
-
- r300->hw.zstencil_format.cmd[3] = 0x00000003;
- r300->hw.zstencil_format.cmd[4] = 0x00000000;
}
static void r300SetAlphaState(GLcontext * ctx)
@@ -450,25 +449,25 @@ static void r300SetAlphaState(GLcontext * ctx)
switch (ctx->Color.AlphaFunc) {
case GL_NEVER:
- pp_misc |= FG_ALPHA_FUNC_NEVER;
+ pp_misc |= R300_FG_ALPHA_FUNC_NEVER;
break;
case GL_LESS:
- pp_misc |= FG_ALPHA_FUNC_LESS;
+ pp_misc |= R300_FG_ALPHA_FUNC_LESS;
break;
case GL_EQUAL:
- pp_misc |= FG_ALPHA_FUNC_EQUAL;
+ pp_misc |= R300_FG_ALPHA_FUNC_EQUAL;
break;
case GL_LEQUAL:
- pp_misc |= FG_ALPHA_FUNC_LE;
+ pp_misc |= R300_FG_ALPHA_FUNC_LE;
break;
case GL_GREATER:
- pp_misc |= FG_ALPHA_FUNC_GREATER;
+ pp_misc |= R300_FG_ALPHA_FUNC_GREATER;
break;
case GL_NOTEQUAL:
- pp_misc |= FG_ALPHA_FUNC_NOTEQUAL;
+ pp_misc |= R300_FG_ALPHA_FUNC_NOTEQUAL;
break;
case GL_GEQUAL:
- pp_misc |= FG_ALPHA_FUNC_GE;
+ pp_misc |= R300_FG_ALPHA_FUNC_GE;
break;
case GL_ALWAYS:
/*pp_misc |= FG_ALPHA_FUNC_ALWAYS; */
@@ -477,8 +476,9 @@ static void r300SetAlphaState(GLcontext * ctx)
}
if (really_enabled) {
- pp_misc |= FG_ALPHA_FUNC_ENABLE;
- pp_misc |= (refByte & R300_REF_ALPHA_MASK);
+ pp_misc |= R300_FG_ALPHA_FUNC_ENABLE;
+ pp_misc |= R500_FG_ALPHA_FUNC_8BIT;
+ pp_misc |= (refByte & R300_FG_ALPHA_FUNC_VAL_MASK);
} else {
pp_misc = 0x0;
}
@@ -525,24 +525,24 @@ static void r300SetDepthState(GLcontext * ctx)
r300ContextPtr r300 = R300_CONTEXT(ctx);
R300_STATECHANGE(r300, zs);
- r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE;
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE; // XXX
r300->hw.zs.cmd[R300_ZS_CNTL_1] &=
- ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT);
+ ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT);
if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) {
if (ctx->Depth.Mask)
r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
- R300_RB3D_Z_TEST_AND_WRITE;
+ R300_Z_ENABLE | R300_Z_WRITE_ENABLE | R300_STENCIL_FRONT_BACK; // XXX
else
- r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST;
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE | R300_STENCIL_FRONT_BACK; // XXX
r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
translate_func(ctx->Depth.
- Func) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
+ Func) << R300_Z_FUNC_SHIFT;
} else {
- r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1;
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; // XXX
r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
- translate_func(GL_NEVER) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT;
+ translate_func(GL_NEVER) << R300_Z_FUNC_SHIFT;
}
r300SetEarlyZState(ctx);
@@ -556,10 +556,10 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state)
R300_STATECHANGE(r300, zs);
if (state) {
r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
- R300_RB3D_STENCIL_ENABLE;
+ R300_STENCIL_ENABLE;
} else {
r300->hw.zs.cmd[R300_ZS_CNTL_0] &=
- ~R300_RB3D_STENCIL_ENABLE;
+ ~R300_STENCIL_ENABLE;
}
} else {
#if R200_MERGED
@@ -571,7 +571,7 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state)
static void r300UpdatePolygonMode(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- uint32_t hw_mode = GA_POLY_MODE_DISABLE;
+ uint32_t hw_mode = R300_GA_POLY_MODE_DISABLE;
/* Only do something if a polygon mode is wanted, default is GL_FILL */
if (ctx->Polygon.FrontMode != GL_FILL ||
@@ -590,29 +590,29 @@ static void r300UpdatePolygonMode(GLcontext * ctx)
}
/* Enable polygon mode */
- hw_mode |= GA_POLY_MODE_DUAL;
+ hw_mode |= R300_GA_POLY_MODE_DUAL;
switch (f) {
case GL_LINE:
- hw_mode |= GA_POLY_MODE_FRONT_PTYPE_LINE;
+ hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_LINE;
break;
case GL_POINT:
- hw_mode |= GA_POLY_MODE_FRONT_PTYPE_POINT;
+ hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_POINT;
break;
case GL_FILL:
- hw_mode |= GA_POLY_MODE_FRONT_PTYPE_TRI;
+ hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
break;
}
switch (b) {
case GL_LINE:
- hw_mode |= GA_POLY_MODE_BACK_PTYPE_LINE;
+ hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_LINE;
break;
case GL_POINT:
- hw_mode |= GA_POLY_MODE_BACK_PTYPE_POINT;
+ hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_POINT;
break;
case GL_FILL:
- hw_mode |= GA_POLY_MODE_BACK_PTYPE_TRI;
+ hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_TRI;
break;
}
}
@@ -716,8 +716,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
R300_STATECHANGE(r300, fogs);
r300->hw.fogs.cmd[R300_FOGS_STATE] =
(r300->hw.fogs.
- cmd[R300_FOGS_STATE] & ~FG_FOG_BLEND_FN_MASK) |
- FG_FOG_BLEND_FN_LINEAR;
+ cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) |
+ R300_FG_FOG_BLEND_FN_LINEAR;
if (ctx->Fog.Start == ctx->Fog.End) {
fogScale.f = -1.0;
@@ -734,8 +734,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
R300_STATECHANGE(r300, fogs);
r300->hw.fogs.cmd[R300_FOGS_STATE] =
(r300->hw.fogs.
- cmd[R300_FOGS_STATE] & ~FG_FOG_BLEND_FN_MASK) |
- FG_FOG_BLEND_FN_EXP;
+ cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) |
+ R300_FG_FOG_BLEND_FN_EXP;
fogScale.f = 0.0933 * ctx->Fog.Density;
fogStart.f = 0.0;
break;
@@ -743,8 +743,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
R300_STATECHANGE(r300, fogs);
r300->hw.fogs.cmd[R300_FOGS_STATE] =
(r300->hw.fogs.
- cmd[R300_FOGS_STATE] & ~FG_FOG_BLEND_FN_MASK) |
- FG_FOG_BLEND_FN_EXP2;
+ cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) |
+ R300_FG_FOG_BLEND_FN_EXP2;
fogScale.f = 0.3 * ctx->Fog.Density;
fogStart.f = 0.0;
default:
@@ -808,7 +808,7 @@ static void r300SetFogState(GLcontext * ctx, GLboolean state)
R300_STATECHANGE(r300, fogs);
if (state) {
- r300->hw.fogs.cmd[R300_FOGS_STATE] |= FG_FOG_BLEND_ENABLE;
+ r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FG_FOG_BLEND_ENABLE;
r300Fogfv(ctx, GL_FOG_MODE, NULL);
r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
@@ -816,7 +816,7 @@ static void r300SetFogState(GLcontext * ctx, GLboolean state)
r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
} else {
- r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~FG_FOG_BLEND_ENABLE;
+ r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FG_FOG_BLEND_ENABLE;
}
}
@@ -914,36 +914,36 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
r300ContextPtr rmesa = R300_CONTEXT(ctx);
GLuint refmask =
(((ctx->Stencil.
- Ref[0] & 0xff) << ZB_STENCILREFMASK_STENCILREF_SHIFT) | ((ctx->
- Stencil.
- ValueMask
- [0] &
- 0xff)
- <<
- ZB_STENCILREFMASK_STENCILMASK_SHIFT));
+ Ref[0] & 0xff) << R300_STENCILREF_SHIFT) | ((ctx->
+ Stencil.
+ ValueMask
+ [0] &
+ 0xff)
+ <<
+ R300_STENCILMASK_SHIFT));
GLuint flag;
R300_STATECHANGE(rmesa, zs);
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK <<
- R300_RB3D_ZS1_FRONT_FUNC_SHIFT)
+ R300_S_FRONT_FUNC_SHIFT)
| (R300_ZS_MASK <<
- R300_RB3D_ZS1_BACK_FUNC_SHIFT));
+ R300_S_BACK_FUNC_SHIFT));
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
- ~((ZB_STENCILREFMASK_STENCIL_MASK << ZB_STENCILREFMASK_STENCILREF_SHIFT) |
- (ZB_STENCILREFMASK_STENCIL_MASK << ZB_STENCILREFMASK_STENCILMASK_SHIFT));
+ ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) |
+ (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT));
flag = translate_func(ctx->Stencil.Function[0]);
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
- (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT);
+ (flag << R300_S_FRONT_FUNC_SHIFT);
if (ctx->Stencil._TestTwoSide)
flag = translate_func(ctx->Stencil.Function[1]);
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
- (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
+ (flag << R300_S_BACK_FUNC_SHIFT);
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask;
}
@@ -953,12 +953,12 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
R300_STATECHANGE(rmesa, zs);
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
- ~(ZB_STENCILREFMASK_STENCIL_MASK <<
- ZB_STENCILREFMASK_STENCILWRITEMASK_SHIFT);
+ ~(R300_STENCILREF_MASK <<
+ R300_STENCILWRITEMASK_SHIFT);
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |=
(ctx->Stencil.
- WriteMask[0] & ZB_STENCILREFMASK_STENCIL_MASK) <<
- ZB_STENCILREFMASK_STENCILWRITEMASK_SHIFT;
+ WriteMask[0] & R300_STENCILREF_MASK) <<
+ R300_STENCILWRITEMASK_SHIFT;
}
static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
@@ -969,34 +969,34 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
R300_STATECHANGE(rmesa, zs);
/* It is easier to mask what's left.. */
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &=
- (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT) |
- (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) |
- (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT);
+ (R300_ZS_MASK << R300_Z_FUNC_SHIFT) |
+ (R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) |
+ (R300_ZS_MASK << R300_S_BACK_FUNC_SHIFT);
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
(translate_stencil_op(ctx->Stencil.FailFunc[0]) <<
- R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT)
+ R300_S_FRONT_SFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) <<
- R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT)
+ R300_S_FRONT_ZFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) <<
- R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT);
+ R300_S_FRONT_ZPASS_OP_SHIFT);
if (ctx->Stencil._TestTwoSide) {
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
(translate_stencil_op(ctx->Stencil.FailFunc[1]) <<
- R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
+ R300_S_BACK_SFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZFailFunc[1]) <<
- R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
+ R300_S_BACK_ZFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZPassFunc[1]) <<
- R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
+ R300_S_BACK_ZPASS_OP_SHIFT);
} else {
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
(translate_stencil_op(ctx->Stencil.FailFunc[0]) <<
- R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT)
+ R300_S_BACK_SFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) <<
- R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT)
+ R300_S_BACK_ZFAIL_OP_SHIFT)
| (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) <<
- R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT);
+ R300_S_BACK_ZPASS_OP_SHIFT);
}
}
@@ -1005,10 +1005,10 @@ static void r300ClearStencil(GLcontext * ctx, GLint s)
r300ContextPtr rmesa = R300_CONTEXT(ctx);
rmesa->state.stencil.clear =
- ((GLuint) (ctx->Stencil.Clear & ZB_STENCILREFMASK_STENCIL_MASK) |
- (ZB_STENCILREFMASK_STENCIL_MASK << ZB_STENCILREFMASK_STENCILMASK_SHIFT) |
- ((ctx->Stencil.WriteMask[0] & ZB_STENCILREFMASK_STENCIL_MASK) <<
- ZB_STENCILREFMASK_STENCILMASK_SHIFT));
+ ((GLuint) (ctx->Stencil.Clear & R300_STENCILREF_MASK) |
+ (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT) |
+ ((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) <<
+ R300_STENCILMASK_SHIFT));
}
/* =============================================================
@@ -1322,6 +1322,82 @@ static unsigned long gen_fixed_filter(unsigned long f)
return f;
}
+static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int i;
+ struct r300_fragment_program *fp = (struct r300_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+
+ R300_STATECHANGE(r300, fpt);
+
+ for (i = 0; i < fp->tex.length; i++) {
+ int unit;
+ int opcode;
+ unsigned long val;
+
+ unit = fp->tex.inst[i] >> R300_TEX_ID_SHIFT;
+ unit &= 15;
+
+ val = fp->tex.inst[i];
+ val &= ~R300_TEX_ID_MASK;
+
+ opcode =
+ (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT;
+ if (opcode == R300_TEX_OP_KIL) {
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ } else {
+ if (tmu_mappings[unit] >= 0) {
+ val |=
+ tmu_mappings[unit] <<
+ R300_TEX_ID_SHIFT;
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ } else {
+ // We get here when the corresponding texture image is incomplete
+ // (e.g. incomplete mipmaps etc.)
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ }
+ }
+ }
+
+ r300->hw.fpt.cmd[R300_FPT_CMD_0] =
+ cmdpacket0(R300_US_TEX_INST_0, fp->tex.length);
+}
+
+static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+{
+ int i;
+ struct r500_fragment_program *fp = (struct r500_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+
+ /* find all the texture instructions and relocate the texture units */
+ for (i = 0; i < fp->inst_end + 1; i++) {
+ if ((fp->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) {
+ uint32_t val;
+ int unit, opcode, new_unit;
+
+ val = fp->inst[i].inst1;
+
+ unit = (val >> 16) & 0xf;
+
+ val &= ~(0xf << 16);
+
+ opcode = val & (0x7 << 22);
+ if (opcode == R500_TEX_INST_TEXKILL) {
+ new_unit = 0;
+ } else {
+ if (tmu_mappings[unit] >= 0) {
+ new_unit = tmu_mappings[unit];
+ } else {
+ new_unit = 0;
+ }
+ }
+ val |= R500_TEX_ID(new_unit);
+ fp->inst[i].inst1 = val;
+ }
+ }
+}
+
static void r300SetupTextures(GLcontext * ctx)
{
int i, mtu;
@@ -1436,39 +1512,18 @@ static void r300SetupTextures(GLcontext * ctx)
if (!fp) /* should only happenen once, just after context is created */
return;
- R300_STATECHANGE(r300, fpt);
-
- for (i = 0; i < fp->tex.length; i++) {
- int unit;
- int opcode;
- unsigned long val;
-
- unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT;
- unit &= 15;
-
- val = fp->tex.inst[i];
- val &= ~R300_FPITX_IMAGE_MASK;
-
- opcode =
- (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT;
- if (opcode == R300_FPITX_OP_KIL) {
- r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
- } else {
- if (tmu_mappings[unit] >= 0) {
- val |=
- tmu_mappings[unit] <<
- R300_FPITX_IMAGE_SHIFT;
- r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
- } else {
- // We get here when the corresponding texture image is incomplete
- // (e.g. incomplete mipmaps etc.)
- r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
- }
+ if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+ if (fp->mesa_program.UsesKill && last_hw_tmu < 0) {
+ // The KILL operation requires the first texture unit
+ // to be enabled.
+ r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
+ r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(R300_TX_FILTER0_0, 1);
}
- }
-
- r300->hw.fpt.cmd[R300_FPT_CMD_0] =
- cmdpacket0(R300_PFS_TEXI_0, fp->tex.length);
+ r300SetupFragmentShaderTextures(ctx, tmu_mappings);
+ } else
+ r500SetupFragmentShaderTextures(ctx, tmu_mappings);
if (RADEON_DEBUG & DEBUG_STATE)
fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n",
@@ -1488,21 +1543,17 @@ static void r300SetupRSUnit(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
/* I'm still unsure if these are needed */
- GLuint interp_magic[8] = {
- 0x00,
- R300_RS_COL_PTR(1),
- R300_RS_COL_PTR(2),
- R300_RS_COL_PTR(3),
- 0x00,
- 0x00,
- 0x00,
- 0x00
- };
+ GLuint interp_col[8];
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
union r300_outputs_written OutputsWritten;
GLuint InputsRead;
int fp_reg, high_rr;
- int in_texcoords, col_interp_nr;
- int i;
+ int col_interp_nr;
+ int rs_tex_count = 0, rs_col_count = 0;
+ int i, count;
+
+ memset(interp_col, 0, sizeof(interp_col));
if (hw_tcl_on)
OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
@@ -1520,7 +1571,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
R300_STATECHANGE(r300, rc);
R300_STATECHANGE(r300, rr);
- fp_reg = in_texcoords = col_interp_nr = high_rr = 0;
+ fp_reg = col_interp_nr = high_rr = 0;
r300->hw.rr.cmd[R300_RR_INST_1] = 0;
@@ -1538,12 +1589,50 @@ static void r300SetupRSUnit(GLcontext * ctx)
InputsRead &= ~FRAG_BIT_WPOS;
}
+ if (InputsRead & FRAG_BIT_COL0) {
+ count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
+ interp_col[0] |= R300_RS_COL_PTR(rs_col_count);
+ if (count == 3)
+ interp_col[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1);
+ rs_col_count += count;
+ }
+ else
+ interp_col[0] = R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+
+ if (InputsRead & FRAG_BIT_COL1) {
+ count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
+ if (count == 3)
+ interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB0);
+ interp_col[1] |= R300_RS_COL_PTR(1);
+ rs_col_count += count;
+ }
+
+
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT)
- | interp_magic[i];
+ int swiz;
+
+ /* with TCL we always seem to route 4 components */
+ if (hw_tcl_on)
+ count = 4;
+ else
+ count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
+
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count;
+ switch(count) {
+ case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break;
+ case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
+ default:
+ case 1:
+ case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
+ };
+
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + i] |= swiz;
r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0;
if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+
+ rs_tex_count += count;
+
//assert(r300->state.texture.tc_count != 0);
r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R300_RS_INST_TEX_CN_WRITE | i /* source INTERP */
| (fp_reg << R300_RS_INST_TEX_ADDR_SHIFT);
@@ -1557,10 +1646,6 @@ static void r300SetupRSUnit(GLcontext * ctx)
WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
}
}
- /* Need to count all coords enabled at vof */
- if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
- in_texcoords++;
- }
}
if (InputsRead & FRAG_BIT_COL0) {
@@ -1586,23 +1671,186 @@ static void r300SetupRSUnit(GLcontext * ctx)
}
/* Need at least one. This might still lock as the values are undefined... */
- if (in_texcoords == 0 && col_interp_nr == 0) {
+ if (rs_tex_count == 0 && col_interp_nr == 0) {
r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT);
col_interp_nr++;
}
- r300->hw.rc.cmd[1] = 0 | ((in_texcoords << 2) << R300_IT_COUNT_SHIFT)
+ r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT)
| (col_interp_nr << R300_IC_COUNT_SHIFT)
| R300_HIRES_EN;
assert(high_rr >= 0);
r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1);
+ r300->hw.rc.cmd[2] = high_rr;
+
+ if (InputsRead)
+ WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
+}
+
+static void r500SetupRSUnit(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ /* I'm still unsure if these are needed */
+ GLuint interp_col[8];
+ union r300_outputs_written OutputsWritten;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint InputsRead;
+ int fp_reg, high_rr;
+ int rs_col_count = 0;
+ int in_texcoords, col_interp_nr;
+ int i, count;
+
+ memset(interp_col, 0, sizeof(interp_col));
+ if (hw_tcl_on)
+ OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
+ else
+ RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset);
+
+ if (ctx->FragmentProgram._Current)
+ InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+ else {
+ fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+ return; /* This should only ever happen once.. */
+ }
+
+ R300_STATECHANGE(r300, ri);
+ R300_STATECHANGE(r300, rc);
+ R300_STATECHANGE(r300, rr);
+
+ fp_reg = col_interp_nr = high_rr = in_texcoords = 0;
+
+ r300->hw.rr.cmd[R300_RR_INST_1] = 0;
+
+ if (InputsRead & FRAG_BIT_WPOS) {
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+ if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
+ break;
+
+ if (i == ctx->Const.MaxTextureUnits) {
+ fprintf(stderr, "\tno free texcoord found...\n");
+ _mesa_exit(-1);
+ }
+
+ InputsRead |= (FRAG_BIT_TEX0 << i);
+ InputsRead &= ~FRAG_BIT_WPOS;
+ }
+
+ if (InputsRead & FRAG_BIT_COL0) {
+ count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
+ interp_col[0] |= R500_RS_COL_PTR(rs_col_count);
+ if (count == 3)
+ interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1);
+ rs_col_count += count;
+ }
+ else
+ interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+
+ if (InputsRead & FRAG_BIT_COL1) {
+ count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
+ interp_col[1] |= R500_RS_COL_PTR(1);
+ if (count == 3)
+ interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB0);
+ rs_col_count += count;
+ }
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ GLuint swiz = 0;
+
+ /* with TCL we always seem to route 4 components */
+ if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+
+ if (hw_tcl_on)
+ count = 4;
+ else
+ count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
+
+ /* always have on texcoord */
+ swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT;
+ if (count >= 2)
+ swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT;
+ else
+ swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
+
+ if (count >= 3)
+ swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT;
+ else
+ swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
+
+ if (count == 4)
+ swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT;
+ else
+ swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
+
+ } else
+ swiz = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+ (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+ (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
+
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | swiz;
+
+ r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0;
+ if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+ //assert(r300->state.texture.tc_count != 0);
+ r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */
+ | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT);
+ high_rr = fp_reg;
+
+ /* Passing invalid data here can lock the GPU. */
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+ InputsRead &= ~(FRAG_BIT_TEX0 << i);
+ fp_reg++;
+ } else {
+ WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+ }
+ }
+ }
+
+ if (InputsRead & FRAG_BIT_COL0) {
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+ r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
+ InputsRead &= ~FRAG_BIT_COL0;
+ col_interp_nr++;
+ } else {
+ WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+ }
+ }
+
+ if (InputsRead & FRAG_BIT_COL1) {
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+ r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
+ InputsRead &= ~FRAG_BIT_COL1;
+ if (high_rr < 1)
+ high_rr = 1;
+ col_interp_nr++;
+ } else {
+ WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+ }
+ }
+
+ /* Need at least one. This might still lock as the values are undefined... */
+ if (in_texcoords == 0 && col_interp_nr == 0) {
+ r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
+ col_interp_nr++;
+ }
+
+ r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_IT_COUNT_SHIFT)
+ | (col_interp_nr << R300_IC_COUNT_SHIFT)
+ | R300_HIRES_EN;
+
+ assert(high_rr >= 0);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1);
r300->hw.rc.cmd[2] = 0xC0 | high_rr;
if (InputsRead)
WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
}
+
+
+
#define bump_vpu_count(ptr, new_count) do{\
drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
int _nc=(new_count)/4; \
@@ -1648,10 +1896,67 @@ static inline void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest,
}
}
+#define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c))
+
+
+static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
+ GLuint output_count, GLuint temp_count)
+{
+ int vtx_mem_size;
+ int pvs_num_slots;
+ int pvs_num_cntrls;
+
+ /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS.
+ * See r500 docs 6.5.2 - done in emit */
+
+ /* avoid division by zero */
+ if (input_count == 0) input_count = 1;
+ if (output_count == 0) output_count = 1;
+ if (temp_count == 0) temp_count = 1;
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ vtx_mem_size = 128;
+ else
+ vtx_mem_size = 72;
+
+ pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count);
+ pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count);
+
+ R300_STATECHANGE(rmesa, vap_cntl);
+ if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] =
+ (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) |
+ (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (12 << R300_VF_MAX_VTX_NUM_SHIFT);
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= R500_TCL_STATE_OPTIMIZATION;
+ } else
+ /* not sure about non-tcl */
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (5 << R300_VF_MAX_VTX_NUM_SHIFT));
+
+ if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560))
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+ else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+ else
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT);
+
+}
+
static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
{
struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader);
GLuint o_reg = 0;
+ GLuint i_reg = 0;
int i;
int inst_count = 0;
int param_count = 0;
@@ -1664,26 +1969,37 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
program_end += 4;
+ i_reg++;
}
}
prog->program.length = program_end;
- r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM,
+ r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START,
&(prog->program));
inst_count = (prog->program.length / 4) - 1;
+ r300VapCntl(rmesa, i_reg, o_reg, 0);
+
R300_STATECHANGE(rmesa, pvs);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
- (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) |
- (inst_count << R300_PVS_CNTL_1_POS_END_SHIFT) |
- (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT);
+ (0 << R300_PVS_FIRST_INST_SHIFT) |
+ (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (inst_count << R300_PVS_LAST_INST_SHIFT);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
- (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
- (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT);
+ (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
+ (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
- (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) |
- (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT);
+ (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+}
+
+static int bit_count (int x)
+{
+ x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U);
+ x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U);
+ x = (x >> 16) + (x & 0xffff);
+ x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f);
+ return (x >> 8) + (x & 0x00ff);
}
static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
@@ -1704,20 +2020,22 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
param_count /= 4;
- r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, &(prog->program));
+ r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program));
inst_count = (prog->program.length / 4) - 1;
+ r300VapCntl(rmesa, bit_count(prog->key.InputsRead),
+ bit_count(prog->key.OutputsWritten), prog->num_temporaries);
+
R300_STATECHANGE(rmesa, pvs);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
- (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) |
- (inst_count << R300_PVS_CNTL_1_POS_END_SHIFT) |
- (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT);
+ (0 << R300_PVS_FIRST_INST_SHIFT) |
+ (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (inst_count << R300_PVS_LAST_INST_SHIFT);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] =
- (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
- (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT);
+ (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
+ (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] =
- (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) |
- (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT);
+ (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
}
static void r300SetupVertexProgram(r300ContextPtr rmesa)
@@ -1740,13 +2058,6 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa)
r300SetupDefaultVertexProgram(rmesa);
}
-
- /* FIXME: This is done for vertex shader fragments, but also needs to be
- * done for vap_pvs, so I leave it as a reminder. */
-#if 0
- reg_start(R300_VAP_PVS_WAITIDLE, 0);
- e32(0x00000000);
-#endif
}
/**
@@ -1848,11 +2159,6 @@ static void r300ResetHwState(r300ContextPtr r300)
r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
- if (!has_tcl)
- r300->hw.vap_cntl.cmd[1] = 0x0014045a;
- else
- r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */
-
r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
| R300_VPORT_X_OFFSET_ENA
| R300_VPORT_Y_SCALE_ENA
@@ -1878,7 +2184,7 @@ static void r300ResetHwState(r300ContextPtr r300)
/* XXX: Other families? */
if (has_tcl) {
- r300->hw.vap_clip_cntl.cmd[1] = R300_221C_NORMAL;
+ r300->hw.vap_clip_cntl.cmd[1] = R300_PS_UCP_MODE_DIST_COP;
r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */
r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */
@@ -1902,23 +2208,25 @@ static void r300ResetHwState(r300ContextPtr r300)
r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666;
r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666;
- /* XXX: Other families? */
r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
- R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16;
- switch (r300->radeon.radeonScreen->chip_family) {
- case CHIP_FAMILY_R300:
- case CHIP_FAMILY_R350:
- case CHIP_FAMILY_RV410:
+ R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/;
+ switch (r300->radeon.radeonScreen->num_gb_pipes) {
+ case 1:
+ default:
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+ R300_GB_TILE_PIPE_COUNT_RV300;
+ break;
+ case 2:
r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
R300_GB_TILE_PIPE_COUNT_R300;
break;
- case CHIP_FAMILY_R420:
+ case 3:
r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
- R300_GB_TILE_PIPE_COUNT_R420;
+ R300_GB_TILE_PIPE_COUNT_R420_3P;
break;
- default:
+ case 4:
r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
- R300_GB_TILE_DISABLE; /* TODO: This disables tiling totally. I guess it happened accidentially. */
+ R300_GB_TILE_PIPE_COUNT_R420;
break;
}
@@ -1967,11 +2275,15 @@ static void r300ResetHwState(r300ContextPtr r300)
r300->hw.sc_screendoor.cmd[1] = 0x00FFFFFF;
- r300->hw.us_out_fmt.cmd[1] = 0x00001B01;
- r300->hw.us_out_fmt.cmd[2] = 0x00001B0F;
- r300->hw.us_out_fmt.cmd[3] = 0x00001B0F;
- r300->hw.us_out_fmt.cmd[4] = 0x00001B0F;
- r300->hw.us_out_fmt.cmd[5] = 0x00000001;
+ r300->hw.us_out_fmt.cmd[1] = R500_OUT_FMT_C4_8 |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[2] = R500_OUT_FMT_UNUSED |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[3] = R500_OUT_FMT_UNUSED |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W24;
r300Enable(ctx, GL_FOG, ctx->Fog.Enabled);
r300Fogfv(ctx, GL_FOG_MODE, NULL);
@@ -2023,15 +2335,32 @@ static void r300ResetHwState(r300ContextPtr r300)
if (r300->radeon.sarea->tiling_enabled) {
/* XXX: Turn off when clearing buffers ? */
- r300->hw.zb.cmd[R300_ZB_PITCH] |= ZB_DEPTHPITCH_DEPTHMACROTILE_ENABLE;
+ r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE;
if (ctx->Visual.depthBits == 24)
r300->hw.zb.cmd[R300_ZB_PITCH] |=
- ZB_DEPTHPITCH_DEPTHMICROTILE_TILED;
+ R300_DEPTHMICROTILE_TILED;
}
r300->hw.zb_depthclearvalue.cmd[1] = 0;
+ switch (ctx->Visual.depthBits) {
+ case 16:
+ r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_16BIT_INT_Z;
+ break;
+ case 24:
+ r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ break;
+ default:
+ fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits);
+ _mesa_exit(-1);
+ }
+
+ r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE;
+ r300->hw.zstencil_format.cmd[3] = 0x00000003;
+ r300->hw.zstencil_format.cmd[4] = 0x00000000;
+ r300SetEarlyZState(ctx);
+
r300->hw.unk4F30.cmd[1] = 0;
r300->hw.unk4F30.cmd[2] = 0;
@@ -2039,6 +2368,7 @@ static void r300ResetHwState(r300ContextPtr r300)
r300->hw.zb_hiz_pitch.cmd[1] = 0;
+ r300VapCntl(r300, 0, 0, 0);
if (has_tcl) {
r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0;
r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0;
@@ -2084,10 +2414,11 @@ void r300UpdateShaders(r300ContextPtr rmesa)
hw_tcl_on = future_hw_tcl_on = 0;
r300ResetHwState(rmesa);
+ r300UpdateStateParameters(ctx, _NEW_PROGRAM);
return;
}
- r300UpdateStateParameters(ctx, _NEW_PROGRAM);
}
+ r300UpdateStateParameters(ctx, _NEW_PROGRAM);
}
static void r300SetupPixelShader(r300ContextPtr rmesa)
@@ -2107,26 +2438,28 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
return;
}
+ r300SetupTextures(ctx);
+
R300_STATECHANGE(rmesa, fpi[0]);
- rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, fp->alu_end + 1);
+ rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, fp->alu_end + 1);
for (i = 0; i <= fp->alu_end; i++) {
rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst0;
}
R300_STATECHANGE(rmesa, fpi[1]);
- rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, fp->alu_end + 1);
+ rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, fp->alu_end + 1);
for (i = 0; i <= fp->alu_end; i++) {
rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst1;
}
R300_STATECHANGE(rmesa, fpi[2]);
- rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, fp->alu_end + 1);
+ rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, fp->alu_end + 1);
for (i = 0; i <= fp->alu_end; i++) {
rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst2;
}
R300_STATECHANGE(rmesa, fpi[3]);
- rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, fp->alu_end + 1);
+ rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, fp->alu_end + 1);
for (i = 0; i <= fp->alu_end; i++) {
rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst3;
}
@@ -2143,10 +2476,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) {
if (i < (fp->cur_node + 1)) {
rmesa->hw.fp.cmd[R300_FP_NODE0 + k] =
- (fp->node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT) |
- (fp->node[i].alu_end << R300_PFS_NODE_ALU_END_SHIFT) |
- (fp->node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT) |
- (fp->node[i].tex_end << R300_PFS_NODE_TEX_END_SHIFT) |
+ (fp->node[i].alu_offset << R300_ALU_START_SHIFT) |
+ (fp->node[i].alu_end << R300_ALU_SIZE_SHIFT) |
+ (fp->node[i].tex_offset << R300_TEX_START_SHIFT) |
+ (fp->node[i].tex_end << R300_TEX_SIZE_SHIFT) |
fp->node[i].flags;
} else {
rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0;
@@ -2163,19 +2496,107 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
}
}
+#define bump_r500fp_count(ptr, new_count) do{\
+ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
+ int _nc=(new_count)/6; \
+ assert(_nc < 256); \
+ if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
+} while(0)
+
+#define bump_r500fp_const_count(ptr, new_count) do{\
+ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
+ int _nc=(new_count)/4; \
+ assert(_nc < 256); \
+ if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
+} while(0)
+
+static void r500SetupPixelShader(r300ContextPtr rmesa)
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+ struct r500_fragment_program *fp = (struct r500_fragment_program *)
+ (char *)ctx->FragmentProgram._Current;
+ int i;
+
+ if (!fp) /* should only happenen once, just after context is created */
+ return;
+
+ ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0;
+
+ r500TranslateFragmentShader(rmesa, fp);
+ if (!fp->translated) {
+ fprintf(stderr, "%s: No valid fragment shader, exiting\n",
+ __FUNCTION__);
+ return;
+ }
+
+ r300SetupTextures(ctx);
+
+ R300_STATECHANGE(rmesa, fp);
+ rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = fp->max_temp_idx;
+
+ rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] =
+ R500_US_CODE_START_ADDR(fp->inst_offset) |
+ R500_US_CODE_END_ADDR(fp->inst_end);
+ rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] =
+ R500_US_CODE_RANGE_ADDR(fp->inst_offset) |
+ R500_US_CODE_RANGE_SIZE(fp->inst_end);
+ rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] =
+ R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */
+
+ R300_STATECHANGE(rmesa, r500fp);
+ /* Emit our shader... */
+ for (i = 0; i < fp->inst_end+1; i++) {
+ rmesa->hw.r500fp.cmd[i*6+1] = fp->inst[i].inst0;
+ rmesa->hw.r500fp.cmd[i*6+2] = fp->inst[i].inst1;
+ rmesa->hw.r500fp.cmd[i*6+3] = fp->inst[i].inst2;
+ rmesa->hw.r500fp.cmd[i*6+4] = fp->inst[i].inst3;
+ rmesa->hw.r500fp.cmd[i*6+5] = fp->inst[i].inst4;
+ rmesa->hw.r500fp.cmd[i*6+6] = fp->inst[i].inst5;
+ }
+
+ bump_r500fp_count(rmesa->hw.r500fp.cmd, (fp->inst_end + 1) * 6);
+
+ R300_STATECHANGE(rmesa, r500fp_const);
+ for (i = 0; i < fp->const_nr; i++) {
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(fp->constant[i][0]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(fp->constant[i][1]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(fp->constant[i][2]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(fp->constant[i][3]);
+ }
+ bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->const_nr * 4);
+
+}
+
void r300UpdateShaderStates(r300ContextPtr rmesa)
{
GLcontext *ctx;
ctx = rmesa->radeon.glCtx;
r300UpdateTextureState(ctx);
+ r300SetEarlyZState(ctx);
- r300SetupPixelShader(rmesa);
- r300SetupTextures(ctx);
+ GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
+ if (current_fragment_program_writes_depth(ctx))
+ fgdepthsrc = R300_FG_DEPTH_SRC_SHADER;
+ if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) {
+ R300_STATECHANGE(rmesa, fg_depth_src);
+ rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc;
+ }
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ r500SetupPixelShader(rmesa);
+ else
+ r300SetupPixelShader(rmesa);
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ r500SetupRSUnit(ctx);
+ else
+ r300SetupRSUnit(ctx);
if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
r300SetupVertexProgram(rmesa);
- r300SetupRSUnit(ctx);
+
}
/**
@@ -2215,12 +2636,12 @@ void r300InitState(r300ContextPtr r300)
switch (ctx->Visual.depthBits) {
case 16:
r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
- depth_fmt = ZB_FORMAR_DEPTHFORMAT_16BIT_INT_Z;
+ depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z;
r300->state.stencil.clear = 0x00000000;
break;
case 24:
r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff;
- depth_fmt = ZB_FORMAR_DEPTHFORMAT_24BIT_INT_Z;
+ depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
r300->state.stencil.clear = 0x00ff0000;
break;
default:
@@ -2249,11 +2670,11 @@ void r300UpdateClipPlanes( GLcontext *ctx )
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
GLuint p;
-
+
for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
-
+
R300_STATECHANGE( rmesa, vpucp[p] );
rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];