diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_fp.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 784 |
1 files changed, 784 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp new file mode 100644 index 0000000..be00f6e --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp @@ -0,0 +1,784 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file brw_fs_fp.cpp + * + * Implementation of the compiler for GL_ARB_fragment_program shaders on top + * of the GLSL compiler backend. + */ + +#include "brw_context.h" +#include "brw_fs.h" + +static fs_reg +regoffset(fs_reg reg, int i) +{ + reg.reg_offset += i; + return reg; +} + +void +fs_visitor::emit_fp_alu1(enum opcode opcode, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src) +{ + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) + emit(opcode, regoffset(dst, i), regoffset(src, i)); + } +} + +void +fs_visitor::emit_fp_alu2(enum opcode opcode, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src0, fs_reg src1) +{ + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) + emit(opcode, regoffset(dst, i), + regoffset(src0, i), regoffset(src1, i)); + } +} + +void +fs_visitor::emit_fp_minmax(const prog_instruction *fpi, + fs_reg dst, fs_reg src0, fs_reg src1) +{ + uint32_t conditionalmod; + if (fpi->Opcode == OPCODE_MIN) + conditionalmod = BRW_CONDITIONAL_L; + else + conditionalmod = BRW_CONDITIONAL_GE; + + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + emit_minmax(conditionalmod, regoffset(dst, i), + regoffset(src0, i), regoffset(src1, i)); + } + } +} + +void +fs_visitor::emit_fp_sop(uint32_t conditional_mod, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src0, fs_reg src1, + fs_reg one) +{ + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + fs_inst *inst; + + inst = emit(BRW_OPCODE_CMP, fs_reg(brw_null_reg()), + regoffset(src0, i), regoffset(src1, i)); + inst->conditional_mod = conditional_mod; + + inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f)); + inst->predicated = true; + } + } +} + +void +fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi, + fs_reg dst, fs_reg src) +{ + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) + emit(BRW_OPCODE_MOV, regoffset(dst, i), src); + } +} + +void +fs_visitor::emit_fp_scalar_math(enum opcode opcode, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src) +{ + fs_reg temp = fs_reg(this, glsl_type::float_type); + emit_math(opcode, temp, src); + emit_fp_scalar_write(fpi, dst, temp); +} + +void +fs_visitor::emit_fragment_program_code() +{ + setup_fp_regs(); + + fs_reg null = fs_reg(brw_null_reg()); + + /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just + * be: + * + * sel.f0 dst 1.0 0.0 + * + * instead of + * + * mov dst 0.0 + * mov.f0 dst 1.0 + */ + fs_reg one = fs_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_MOV, one, fs_reg(1.0f)); + + for (unsigned int insn = 0; insn < fp->Base.NumInstructions; insn++) { + const struct prog_instruction *fpi = &fp->Base.Instructions[insn]; + base_ir = fpi; + + //_mesa_print_instruction(fpi); + + fs_reg dst; + fs_reg src[3]; + + /* We always emit into a temporary destination register to avoid + * aliasing issues. + */ + dst = fs_reg(this, glsl_type::vec4_type); + + for (int i = 0; i < 3; i++) + src[i] = get_fp_src_reg(&fpi->SrcReg[i]); + + switch (fpi->Opcode) { + case OPCODE_ABS: + src[0].abs = true; + src[0].negate = false; + emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); + break; + + case OPCODE_ADD: + emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]); + break; + + case OPCODE_CMP: + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + fs_inst *inst; + + inst = emit(BRW_OPCODE_CMP, null, + regoffset(src[0], i), fs_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), + regoffset(src[1], i), regoffset(src[2], i)); + inst->predicated = true; + } + } + break; + + case OPCODE_COS: + emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]); + break; + + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_DPH: { + fs_reg mul = fs_reg(this, glsl_type::float_type); + fs_reg acc = fs_reg(this, glsl_type::float_type); + int count; + + switch (fpi->Opcode) { + case OPCODE_DP2: count = 2; break; + case OPCODE_DP3: count = 3; break; + case OPCODE_DP4: count = 4; break; + case OPCODE_DPH: count = 3; break; + default: assert(!"not reached"); count = 0; break; + } + + emit(BRW_OPCODE_MUL, acc, + regoffset(src[0], 0), regoffset(src[1], 0)); + for (int i = 1; i < count; i++) { + emit(BRW_OPCODE_MUL, mul, + regoffset(src[0], i), regoffset(src[1], i)); + emit(BRW_OPCODE_ADD, acc, acc, mul); + } + + if (fpi->Opcode == OPCODE_DPH) + emit(BRW_OPCODE_ADD, acc, acc, regoffset(src[1], 3)); + + emit_fp_scalar_write(fpi, dst, acc); + break; + } + + case OPCODE_DST: + if (fpi->DstReg.WriteMask & WRITEMASK_X) + emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f)); + if (fpi->DstReg.WriteMask & WRITEMASK_Y) { + emit(BRW_OPCODE_MUL, regoffset(dst, 1), + regoffset(src[0], 1), regoffset(src[1], 1)); + } + if (fpi->DstReg.WriteMask & WRITEMASK_Z) + emit(BRW_OPCODE_MOV, regoffset(dst, 2), regoffset(src[0], 2)); + if (fpi->DstReg.WriteMask & WRITEMASK_W) + emit(BRW_OPCODE_MOV, regoffset(dst, 3), regoffset(src[1], 3)); + break; + + case OPCODE_EX2: + emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]); + break; + + case OPCODE_FLR: + emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]); + break; + + case OPCODE_FRC: + emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]); + break; + + case OPCODE_KIL: { + for (int i = 0; i < 4; i++) { + /* In most cases the argument to a KIL will be something like + * TEMP[0].wwww, so there's no point in checking whether .w is < 0 + * 4 times in a row. + */ + if (i > 0 && + GET_SWZ(fpi->SrcReg[0].Swizzle, i) == + GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) && + ((fpi->SrcReg[0].Negate >> i) & 1) == + ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) { + continue; + } + + fs_inst *inst = emit(BRW_OPCODE_CMP, null, + regoffset(src[0], i), 0.0f); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_IF); + inst->predicated = true; + emit(FS_OPCODE_DISCARD); + emit(BRW_OPCODE_ENDIF); + } + break; + } + + case OPCODE_LG2: + emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]); + break; + + case OPCODE_LIT: + /* From the ARB_fragment_program spec: + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * Note that we don't do the clamping to +/- 128. We didn't in + * brw_wm_emit.c either. + */ + if (fpi->DstReg.WriteMask & WRITEMASK_X) + emit(BRW_OPCODE_MOV, regoffset(dst, 0), fs_reg(1.0f)); + + if (fpi->DstReg.WriteMask & WRITEMASK_YZ) { + fs_inst *inst; + inst = emit(BRW_OPCODE_CMP, null, + regoffset(src[0], 0), fs_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_LE; + + if (fpi->DstReg.WriteMask & WRITEMASK_Y) { + emit(BRW_OPCODE_MOV, regoffset(dst, 1), regoffset(src[0], 0)); + inst = emit(BRW_OPCODE_MOV, regoffset(dst, 1), fs_reg(0.0f)); + inst->predicated = true; + } + + if (fpi->DstReg.WriteMask & WRITEMASK_Z) { + emit_math(SHADER_OPCODE_POW, regoffset(dst, 2), + regoffset(src[0], 1), regoffset(src[0], 3)); + + inst = emit(BRW_OPCODE_MOV, regoffset(dst, 2), fs_reg(0.0f)); + inst->predicated = true; + } + } + + if (fpi->DstReg.WriteMask & WRITEMASK_W) + emit(BRW_OPCODE_MOV, regoffset(dst, 3), fs_reg(1.0f)); + + break; + + case OPCODE_LRP: + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + fs_reg neg_src0 = regoffset(src[0], i); + neg_src0.negate = !neg_src0.negate; + fs_reg temp = fs_reg(this, glsl_type::float_type); + fs_reg temp2 = fs_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_ADD, temp, neg_src0, fs_reg(1.0f)); + emit(BRW_OPCODE_MUL, temp, temp, regoffset(src[2], i)); + emit(BRW_OPCODE_MUL, temp2, + regoffset(src[0], i), regoffset(src[1], i)); + emit(BRW_OPCODE_ADD, regoffset(dst, i), temp, temp2); + } + } + break; + + case OPCODE_MAD: + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + fs_reg temp = fs_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_MUL, temp, + regoffset(src[0], i), regoffset(src[1], i)); + emit(BRW_OPCODE_ADD, regoffset(dst, i), + temp, regoffset(src[2], i)); + } + } + break; + + case OPCODE_MAX: + emit_fp_minmax(fpi, dst, src[0], src[1]); + break; + + case OPCODE_MOV: + emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); + break; + + case OPCODE_MIN: + emit_fp_minmax(fpi, dst, src[0], src[1]); + break; + + case OPCODE_MUL: + emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]); + break; + + case OPCODE_POW: { + fs_reg temp = fs_reg(this, glsl_type::float_type); + emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]); + emit_fp_scalar_write(fpi, dst, temp); + break; + } + + case OPCODE_RCP: + emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]); + break; + + case OPCODE_RSQ: + emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]); + break; + + case OPCODE_SCS: + if (fpi->DstReg.WriteMask & WRITEMASK_X) { + emit_math(SHADER_OPCODE_COS, regoffset(dst, 0), + regoffset(src[0], 0)); + } + + if (fpi->DstReg.WriteMask & WRITEMASK_Y) { + emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1), + regoffset(src[0], 1)); + } + break; + + case OPCODE_SGE: + emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one); + break; + + case OPCODE_SIN: + emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]); + break; + + case OPCODE_SLT: + emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one); + break; + + case OPCODE_SUB: { + fs_reg neg_src1 = src[1]; + neg_src1.negate = !src[1].negate; + + emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1); + break; + } + + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: { + /* We piggy-back on the GLSL IR support for texture setup. To do so, + * we have to cook up an ir_texture that has the coordinate field + * with appropriate type, and shadow_comparitor set or not. All the + * other properties of ir_texture are passed in as arguments to the + * emit_texture_gen* function. + */ + ir_texture *ir = NULL; + + fs_reg lod; + fs_reg dpdy; + fs_reg coordinate = src[0]; + fs_reg shadow_c; + + switch (fpi->Opcode) { + case OPCODE_TEX: + ir = new(mem_ctx) ir_texture(ir_tex); + break; + case OPCODE_TXP: { + ir = new(mem_ctx) ir_texture(ir_tex); + + coordinate = fs_reg(this, glsl_type::vec3_type); + fs_reg invproj = fs_reg(this, glsl_type::float_type); + emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3)); + for (int i = 0; i < 3; i++) { + emit(BRW_OPCODE_MUL, regoffset(coordinate, i), + regoffset(src[0], i), invproj); + } + break; + } + case OPCODE_TXB: + ir = new(mem_ctx) ir_texture(ir_txb); + lod = regoffset(src[0], 3); + break; + default: + assert(!"not reached"); + break; + } + + const glsl_type *coordinate_type; + switch (fpi->TexSrcTarget) { + case TEXTURE_1D_INDEX: + coordinate_type = glsl_type::float_type; + break; + + case TEXTURE_2D_INDEX: + case TEXTURE_1D_ARRAY_INDEX: + case TEXTURE_RECT_INDEX: + case TEXTURE_EXTERNAL_INDEX: + coordinate_type = glsl_type::vec2_type; + break; + + case TEXTURE_3D_INDEX: + case TEXTURE_2D_ARRAY_INDEX: + coordinate_type = glsl_type::vec3_type; + break; + + case TEXTURE_CUBE_INDEX: { + coordinate_type = glsl_type::vec3_type; + + fs_reg temp = fs_reg(this, glsl_type::float_type); + fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type); + fs_reg abscoord = coordinate; + abscoord.negate = false; + abscoord.abs = true; + emit_minmax(BRW_CONDITIONAL_GE, temp, + regoffset(abscoord, 0), regoffset(abscoord, 1)); + emit_minmax(BRW_CONDITIONAL_GE, temp, + temp, regoffset(abscoord, 2)); + emit_math(SHADER_OPCODE_RCP, temp, temp); + for (int i = 0; i < 3; i++) { + emit(BRW_OPCODE_MUL, regoffset(cubecoord, i), + regoffset(coordinate, i), temp); + } + + coordinate = cubecoord; + break; + } + + default: + assert(!"not reached"); + coordinate_type = glsl_type::vec2_type; + break; + } + + ir_constant_data junk_data; + ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data); + + coordinate = rescale_texcoord(ir, coordinate, + fpi->TexSrcTarget == TEXTURE_RECT_INDEX, + fpi->TexSrcUnit, fpi->TexSrcUnit); + + if (fpi->TexShadow) { + shadow_c = regoffset(coordinate, 2); + ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f); + } + + fs_inst *inst; + if (intel->gen >= 7) { + inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy); + } else if (intel->gen >= 5) { + inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy); + } else { + inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy); + } + + inst->sampler = fpi->TexSrcUnit; + inst->shadow_compare = fpi->TexShadow; + + /* Reuse the GLSL swizzle_result() handler. */ + swizzle_result(ir, dst, fpi->TexSrcUnit); + dst = this->result; + + break; + } + + case OPCODE_SWZ: + /* Note that SWZ's extended swizzles are handled in the general + * get_src_reg() code. + */ + emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); + break; + + case OPCODE_XPD: + for (int i = 0; i < 3; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + int i1 = (i + 1) % 3; + int i2 = (i + 2) % 3; + + fs_reg temp = fs_reg(this, glsl_type::float_type); + fs_reg neg_src1_1 = regoffset(src[1], i1); + neg_src1_1.negate = !neg_src1_1.negate; + emit(BRW_OPCODE_MUL, temp, + regoffset(src[0], i2), neg_src1_1); + emit(BRW_OPCODE_MUL, regoffset(dst, i), + regoffset(src[0], i1), regoffset(src[1], i2)); + emit(BRW_OPCODE_ADD, regoffset(dst, i), + regoffset(dst, i), temp); + } + } + break; + + case OPCODE_END: + break; + + default: + _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n", + _mesa_opcode_string(fpi->Opcode)); + } + + /* To handle saturates, we emit a MOV with a saturate bit, which + * optimization should fold into the preceding instructions when safe. + */ + if (fpi->Opcode != OPCODE_END) { + fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg); + + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + fs_inst *inst = emit(BRW_OPCODE_MOV, + regoffset(real_dst, i), + regoffset(dst, i)); + inst->saturate = fpi->SaturateMode; + } + } + } + } + + /* Epilogue: + * + * Fragment depth has this strange convention of being the .z component of + * a vec4. emit_fb_write() wants to see a float value, instead. + */ + this->current_annotation = "result.depth write"; + if (frag_depth.file != BAD_FILE) { + fs_reg temp = fs_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_MOV, temp, regoffset(frag_depth, 2)); + frag_depth = temp; + } +} + +void +fs_visitor::setup_fp_regs() +{ + /* PROGRAM_TEMPORARY */ + int num_temp = fp->Base.NumTemporaries; + fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp); + for (int i = 0; i < num_temp; i++) + fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type); + + /* PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM, etc. */ + if (c->dispatch_width == 8) { + for (unsigned p = 0; + p < c->fp->program.Base.Parameters->NumParameters; p++) { + for (unsigned int i = 0; i < 4; i++) { + this->param_index[c->prog_data.nr_params] = p; + this->param_offset[c->prog_data.nr_params] = i; + c->prog_data.nr_params++; + } + } + } + + fp_input_regs = rzalloc_array(mem_ctx, fs_reg, FRAG_ATTRIB_MAX); + for (int i = 0; i < FRAG_ATTRIB_MAX; i++) { + if (fp->Base.InputsRead & BITFIELD64_BIT(i)) { + /* Make up a dummy instruction to reuse code for emitting + * interpolation. + */ + ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type, + "fp_input", + ir_var_in); + ir->location = i; + + this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d", + i); + + switch (i) { + case FRAG_ATTRIB_WPOS: + ir->pixel_center_integer = fp->PixelCenterInteger; + ir->origin_upper_left = fp->OriginUpperLeft; + fp_input_regs[i] = *emit_fragcoord_interpolation(ir); + break; + case FRAG_ATTRIB_FACE: + fp_input_regs[i] = *emit_frontfacing_interpolation(ir); + break; + default: + fp_input_regs[i] = *emit_general_interpolation(ir); + + if (i == FRAG_ATTRIB_FOGC) { + emit(BRW_OPCODE_MOV, + regoffset(fp_input_regs[i], 1), fs_reg(0.0f)); + emit(BRW_OPCODE_MOV, + regoffset(fp_input_regs[i], 2), fs_reg(0.0f)); + emit(BRW_OPCODE_MOV, + regoffset(fp_input_regs[i], 3), fs_reg(1.0f)); + } + + break; + } + + this->current_annotation = NULL; + } + } +} + +fs_reg +fs_visitor::get_fp_dst_reg(const prog_dst_register *dst) +{ + switch (dst->File) { + case PROGRAM_TEMPORARY: + return fp_temp_regs[dst->Index]; + + case PROGRAM_OUTPUT: + if (dst->Index == FRAG_RESULT_DEPTH) { + if (frag_depth.file == BAD_FILE) + frag_depth = fs_reg(this, glsl_type::vec4_type); + return frag_depth; + } else if (dst->Index == FRAG_RESULT_COLOR) { + if (outputs[0].file == BAD_FILE) { + outputs[0] = fs_reg(this, glsl_type::vec4_type); + output_components[0] = 4; + + /* Tell emit_fb_writes() to smear fragment.color across all the + * color attachments. + */ + for (int i = 1; i < c->key.nr_color_regions; i++) { + outputs[i] = outputs[0]; + output_components[i] = output_components[0]; + } + } + return outputs[0]; + } else { + int output_index = dst->Index - FRAG_RESULT_DATA0; + if (outputs[output_index].file == BAD_FILE) { + outputs[output_index] = fs_reg(this, glsl_type::vec4_type); + } + output_components[output_index] = 4; + return outputs[output_index]; + } + + case PROGRAM_UNDEFINED: + return fs_reg(); + + default: + _mesa_problem(ctx, "bad dst register file: %s\n", + _mesa_register_file_name((gl_register_file)dst->File)); + return fs_reg(this, glsl_type::vec4_type); + } +} + +fs_reg +fs_visitor::get_fp_src_reg(const prog_src_register *src) +{ + struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; + + fs_reg result; + + assert(!src->Abs); + + switch (src->File) { + case PROGRAM_UNDEFINED: + return fs_reg(); + case PROGRAM_TEMPORARY: + result = fp_temp_regs[src->Index]; + break; + + case PROGRAM_INPUT: + result = fp_input_regs[src->Index]; + break; + + case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + case PROGRAM_CONSTANT: + case PROGRAM_NAMED_PARAM: + /* We actually want to look at the type in the Parameters list for this, + * because this lets us upload constant builtin uniforms, as actual + * constants. + */ + switch (plist->Parameters[src->Index].Type) { + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: { + result = fs_reg(this, glsl_type::vec4_type); + + for (int i = 0; i < 4; i++) { + emit(BRW_OPCODE_MOV, regoffset(result, i), + fs_reg(plist->ParameterValues[src->Index][i].f)); + } + break; + } + + case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + result = fs_reg(UNIFORM, src->Index * 4); + break; + + default: + _mesa_problem(ctx, "bad uniform src register file: %s\n", + _mesa_register_file_name((gl_register_file)src->File)); + return fs_reg(this, glsl_type::vec4_type); + } + break; + + default: + _mesa_problem(ctx, "bad src register file: %s\n", + _mesa_register_file_name((gl_register_file)src->File)); + return fs_reg(this, glsl_type::vec4_type); + } + + if (src->Swizzle != SWIZZLE_NOOP || src->Negate) { + fs_reg unswizzled = result; + result = fs_reg(this, glsl_type::vec4_type); + for (int i = 0; i < 4; i++) { + bool negate = src->Negate & (1 << i); + /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ, + * but it costs us nothing to support it. + */ + int src_swiz = GET_SWZ(src->Swizzle, i); + if (src_swiz == SWIZZLE_ZERO) { + emit(BRW_OPCODE_MOV, regoffset(result, i), fs_reg(0.0f)); + } else if (src_swiz == SWIZZLE_ONE) { + emit(BRW_OPCODE_MOV, regoffset(result, i), + negate ? fs_reg(-1.0f) : fs_reg(1.0f)); + } else { + fs_reg src = regoffset(unswizzled, src_swiz); + if (negate) + src.negate = !src.negate; + emit(BRW_OPCODE_MOV, regoffset(result, i), src); + } + } + } + + return result; +} |