summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2010-09-30 19:18:25 -0700
committerEric Anholt <eric@anholt.net>2010-09-30 20:23:40 -0700
commit1d073cb2d920d1c0b8c6d598055b14048fedc96e (patch)
treee2500f4f7351f4d5adebc6b2a4bbddf39291e2f4 /src
parent5f237a1ccb28399fbbceecea694f5d18ebba9938 (diff)
downloadexternal_mesa3d-1d073cb2d920d1c0b8c6d598055b14048fedc96e.zip
external_mesa3d-1d073cb2d920d1c0b8c6d598055b14048fedc96e.tar.gz
external_mesa3d-1d073cb2d920d1c0b8c6d598055b14048fedc96e.tar.bz2
i965: Split the gen4 and gen5 sampler handling apart.
Trying to track the insanity of the different argument layouts for normal/shadow crossed with normal/lod/bias one generation at a time is enough. Fixes: glsl1-texture2D() with bias. (first test passing in this code that doesn't pass without it!)
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp126
1 files changed, 103 insertions, 23 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c49b27b..78cdfed 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -469,6 +469,8 @@ public:
void emit_fragcoord_interpolation(ir_variable *ir);
void emit_general_interpolation(ir_variable *ir);
void emit_interpolation_setup();
+ fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, int base_mrf);
+ fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, int base_mrf);
void emit_fb_writes();
void emit_assignment_writes(fs_reg &l, fs_reg &r,
const glsl_type *type, bool predicated);
@@ -1220,48 +1222,93 @@ fs_visitor::visit(ir_assignment *ir)
}
}
-void
-fs_visitor::visit(ir_texture *ir)
+fs_inst *
+fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, int base_mrf)
{
- int base_mrf = 2;
- fs_inst *inst = NULL;
- unsigned int mlen = 0;
+ /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
+ int mlen = 3;
- ir->coordinate->accept(this);
- fs_reg coordinate = this->result;
+ if (ir->shadow_comparitor) {
+ if (ir->op == ir_tex) {
+ /* There's no plain shadow compare message, so we use shadow
+ * compare with a bias of 0.0.
+ */
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+ fs_reg(0.0f)));
+ mlen++;
+ } else if (ir->op == ir_txb) {
+ ir->lod_info.bias->accept(this);
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+ this->result));
+ mlen++;
+ } else {
+ assert(ir->op == ir_txl);
+ ir->lod_info.lod->accept(this);
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+ this->result));
+ mlen++;
+ }
- /* Should be lowered by do_lower_texture_projection */
- assert(!ir->projector);
+ ir->shadow_comparitor->accept(this);
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+ mlen++;
+ } else {
+ /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare sampler
+ * instructions. We'll need to do SIMD16 here.
+ */
+ abort();
+ }
- for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
- coordinate.reg_offset++;
+ fs_inst *inst = NULL;
+ switch (ir->op) {
+ case ir_tex:
+ inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
+ break;
+ case ir_txb:
+ inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
+ break;
+ case ir_txl:
+ inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
+ break;
+ case ir_txd:
+ case ir_txf:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
}
+ inst->mlen = mlen;
- /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
- if (intel->gen < 5)
- mlen = 3;
+ return inst;
+}
+
+fs_inst *
+fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, int base_mrf)
+{
+ /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
+ * optional parameters like shadow comparitor or LOD bias. If
+ * optional parameters aren't present, those base slots are
+ * optional and don't need to be included in the message.
+ *
+ * We don't fill in the unnecessary slots regardless, which may
+ * look surprising in the disassembly.
+ */
+ int mlen = ir->coordinate->type->vector_elements;
if (ir->shadow_comparitor) {
- /* For shadow comparisons, we have to supply u,v,r. */
- mlen = 3;
+ mlen = MAX2(mlen, 4);
ir->shadow_comparitor->accept(this);
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
mlen++;
}
- /* Do we ever want to handle writemasking on texture samples? Is it
- * performance relevant?
- */
- fs_reg dst = fs_reg(this, glsl_type::vec4_type);
-
+ fs_inst *inst = NULL;
switch (ir->op) {
case ir_tex:
inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
break;
case ir_txb:
ir->lod_info.bias->accept(this);
+ mlen = MAX2(mlen, 4);
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
mlen++;
@@ -1269,6 +1316,7 @@ fs_visitor::visit(ir_texture *ir)
break;
case ir_txl:
ir->lod_info.lod->accept(this);
+ mlen = MAX2(mlen, 4);
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
mlen++;
@@ -1279,6 +1327,39 @@ fs_visitor::visit(ir_texture *ir)
assert(!"GLSL 1.30 features unsupported");
break;
}
+ inst->mlen = mlen;
+
+ return inst;
+}
+
+void
+fs_visitor::visit(ir_texture *ir)
+{
+ int base_mrf = 2;
+ fs_inst *inst = NULL;
+ unsigned int mlen = 0;
+
+ ir->coordinate->accept(this);
+ fs_reg coordinate = this->result;
+
+ /* Should be lowered by do_lower_texture_projection */
+ assert(!ir->projector);
+
+ for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
+ coordinate.reg_offset++;
+ }
+
+ /* Writemasking doesn't eliminate channels on SIMD8 texture
+ * samples, so don't worry about them.
+ */
+ fs_reg dst = fs_reg(this, glsl_type::vec4_type);
+
+ if (intel->gen < 5) {
+ inst = emit_texture_gen4(ir, dst, base_mrf);
+ } else {
+ inst = emit_texture_gen5(ir, dst, base_mrf);
+ }
inst->sampler =
_mesa_get_sampler_uniform_value(ir->sampler,
@@ -1290,7 +1371,6 @@ fs_visitor::visit(ir_texture *ir)
if (ir->shadow_comparitor)
inst->shadow_compare = true;
- inst->mlen = mlen;
}
void