1 files changed, 784 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
new file mode 100644
index 0000000..be00f6e
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
@@ -0,0 +1,784 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_fs_fp.cpp
+ *
+ * Implementation of the compiler for GL_ARB_fragment_program shaders on top
+ * of the GLSL compiler backend.
+ */
+
+#include "brw_context.h"
+#include "brw_fs.h"
+
+static fs_reg
+regoffset(fs_reg reg, int i)
+{
+   reg.reg_offset += i;
+   return reg;
+}
+
+void
+fs_visitor::emit_fp_alu1(enum opcode opcode,
+                         const struct prog_instruction *fpi,
+                         fs_reg dst, fs_reg src)
+{
+   for (int i = 0; i < 4; i++) {
+      if (fpi->DstReg.WriteMask & (1 << i))
+         emit(opcode, regoffset(dst, i), regoffset(src, i));
+   }
+}
+
+void
+fs_visitor::emit_fp_alu2(enum opcode opcode,
+                         const struct prog_instruction *fpi,
+                         fs_reg dst, fs_reg src0, fs_reg src1)
+{
+   for (int i = 0; i < 4; i++) {
+      if (fpi->DstReg.WriteMask & (1 << i))
+         emit(opcode, regoffset(dst, i),
+              regoffset(src0, i), regoffset(src1, i));
+   }
+}
+
+void
+fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
+                           fs_reg dst, fs_reg src0, fs_reg src1)
+{
+   uint32_t conditionalmod;
+   if (fpi->Opcode == OPCODE_MIN)
+      conditionalmod = BRW_CONDITIONAL_L;
+   else
+      conditionalmod = BRW_CONDITIONAL_GE;
+
+   for (int i = 0; i < 4; i++) {
+      if (fpi->DstReg.WriteMask & (1 << i)) {
+         emit_minmax(conditionalmod, regoffset(dst, i),
+                     regoffset(src0, i), regoffset(src1, i));
+      }
+   }
+}
+
+void
+fs_visitor::emit_fp_sop(uint32_t conditional_mod,
+                        const struct prog_instruction *fpi,
+                        fs_reg dst, fs_reg src0, fs_reg src1,
+                        fs_reg one)
+{
+   for (int i = 0; i < 4; i++) {
+      if (fpi->DstReg.WriteMask & (1 << i)) {
+         fs_inst *inst;
+
+         inst = emit(BRW_OPCODE_CMP, fs_reg(brw_null_reg()),
+                     regoffset(src0, i), regoffset(src1, i));
+         inst->conditional_mod = conditional_mod;
+
+         inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
+         inst->predicated = true;
+      }
+   }
+}
+
+void
+fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
+                                 fs_reg dst, fs_reg src)
+{
+   for (int i = 0; i < 4; i++) {
+      if (fpi->DstReg.WriteMask & (1 << i))
+         emit(BRW_OPCODE_MOV, regoffset(dst, i), src);
+   }
+}
+
+void
+fs_visitor::emit_fp_scalar_math(enum opcode opcode,
+                                const struct prog_instruction *fpi,
+                                fs_reg dst, fs_reg src)
+{
+   fs_reg temp = fs_reg(this, glsl_type::float_type);
+   emit_math(opcode, temp, src);
+   emit_fp_scalar_write(fpi, dst, temp);
+}
+
+void
+fs_visitor::emit_fragment_program_code()
+{
+   setup_fp_regs();
+
+   fs_reg null = fs_reg(brw_null_reg());
+
+   /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
+    * be:
+    *
+    * sel.f0 dst 1.0 0.0
+    *
+    * instead of
+    *
+    * mov    dst 0.0
+    * mov.f0 dst 1.0
+    */
+   fs_reg one = fs_reg(this, glsl_type::float_type);
+   emit(BRW_OPCODE_MOV, one, fs_reg(1.0f));
+
+   for (unsigned int insn = 0; insn < fp->Base.NumInstructions; insn++) {
+      const struct prog_instruction *fpi = &fp->Base.Instructions[insn];
+      base_ir = fpi;
+
+      //_mesa_print_instruction(fpi);
+
+      fs_reg dst;
+      fs_reg src[3];
+
+      /* We always emit into a temporary destination register to avoid
+       * aliasing issues.
+       */
+      dst = fs_reg(this, glsl_type::vec4_type);
+
+      for (int i = 0; i < 3; i++)
+         src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
+
+      switch (fpi->Opcode) {
+      case OPCODE_ABS:
+         src[0].abs = true;
+         src[0].negate = false;
+         emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_ADD:
+         emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
+         break;
+
+      case OPCODE_CMP:
+         for (int i = 0; i < 4; i++) {
+            if (fpi->DstReg.WriteMask & (1 << i)) {
+               fs_inst *inst;
+
+               inst = emit(BRW_OPCODE_CMP, null,
+                           regoffset(src[0], i), fs_reg(0.0f));
+               inst->conditional_mod = BRW_CONDITIONAL_L;
+
+               inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
+                           regoffset(src[1], i), regoffset(src[2], i));
+               inst->predicated = true;
+            }
+         }
+         break;
+
+      case OPCODE_COS:
+         emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_DP2:
+      case OPCODE_DP3:
+      case OPCODE_DP4:
+      case OPCODE_DPH: {
+         fs_reg mul = fs_reg(this, glsl_type::float_type);
+         fs_reg acc = fs_reg(this, glsl_type::float_type);
+         int count;
+
+         switch (fpi->Opcode) {
+         case OPCODE_DP2: count = 2; break;
+         case OPCODE_DP3: count = 3; break;
+         case OPCODE_DP4: count = 4; break;
+         case OPCODE_DPH: count = 3; break;
+         default: assert(!"not reached"); count = 0; break;
+         }
+
+         emit(BRW_OPCODE_MUL, acc,
+              regoffset(src[0], 0), regoffset(src[1], 0));
+         for (int i = 1; i < count; i++) {
+            emit(BRW_OPCODE_MUL, mul,
+                 regoffset(src[0], i), regoffset(src[1], i));
+            emit(BRW_OPCODE_ADD, acc, acc, mul);
+         }
+
+         if (fpi->Opcode == OPCODE_DPH)
+            emit(BRW_OPCODE_ADD, acc, acc, regoffset(src[1], 3));
+
+         emit_fp_scalar_write(fpi, dst, acc);
+         break;
+      }
+
+      case OPCODE_DST:
+         if (fpi->DstReg.WriteMask & WRITEMASK_X)
+            emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f));
+         if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
+            emit(BRW_OPCODE_MUL, regoffset(dst, 1),
+                 regoffset(src[0], 1), regoffset(src[1], 1));
+         }
+         if (fpi->DstReg.WriteMask & WRITEMASK_Z)
+            emit(BRW_OPCODE_MOV, regoffset(dst, 2), regoffset(src[0], 2));
+         if (fpi->DstReg.WriteMask & WRITEMASK_W)
+            emit(BRW_OPCODE_MOV, regoffset(dst, 3), regoffset(src[1], 3));
+         break;
+
+      case OPCODE_EX2:
+         emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_FLR:
+         emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_FRC:
+         emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_KIL: {
+         for (int i = 0; i < 4; i++) {
+            /* In most cases the argument to a KIL will be something like
+             * TEMP[0].wwww, so there's no point in checking whether .w is < 0
+             * 4 times in a row.
+             */
+            if (i > 0 &&
+                GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
+                GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
+                ((fpi->SrcReg[0].Negate >> i) & 1) ==
+                ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
+               continue;
+            }
+
+            fs_inst *inst = emit(BRW_OPCODE_CMP, null,
+                                 regoffset(src[0], i), 0.0f);
+            inst->conditional_mod = BRW_CONDITIONAL_L;
+
+            inst = emit(BRW_OPCODE_IF);
+            inst->predicated = true;
+            emit(FS_OPCODE_DISCARD);
+            emit(BRW_OPCODE_ENDIF);
+         }
+         break;
+      }
+
+      case OPCODE_LG2:
+         emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_LIT:
+         /* From the ARB_fragment_program spec:
+          *
+          *      tmp = VectorLoad(op0);
+          *      if (tmp.x < 0) tmp.x = 0;
+          *      if (tmp.y < 0) tmp.y = 0;
+          *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+          *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+          *      result.x = 1.0;
+          *      result.y = tmp.x;
+          *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+          *      result.w = 1.0;
+          *
+          * Note that we don't do the clamping to +/- 128.  We didn't in
+          * brw_wm_emit.c either.
+          */
+         if (fpi->DstReg.WriteMask & WRITEMASK_X)
+            emit(BRW_OPCODE_MOV, regoffset(dst, 0), fs_reg(1.0f));
+
+         if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
+            fs_inst *inst;
+            inst = emit(BRW_OPCODE_CMP, null,
+                        regoffset(src[0], 0), fs_reg(0.0f));
+            inst->conditional_mod = BRW_CONDITIONAL_LE;
+
+            if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
+               emit(BRW_OPCODE_MOV, regoffset(dst, 1), regoffset(src[0], 0));
+               inst = emit(BRW_OPCODE_MOV, regoffset(dst, 1), fs_reg(0.0f));
+               inst->predicated = true;
+            }
+
+            if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
+               emit_math(SHADER_OPCODE_POW, regoffset(dst, 2),
+                         regoffset(src[0], 1), regoffset(src[0], 3));
+
+               inst = emit(BRW_OPCODE_MOV, regoffset(dst, 2), fs_reg(0.0f));
+               inst->predicated = true;
+            }
+         }
+
+         if (fpi->DstReg.WriteMask & WRITEMASK_W)
+            emit(BRW_OPCODE_MOV, regoffset(dst, 3), fs_reg(1.0f));
+
+         break;
+
+      case OPCODE_LRP:
+         for (int i = 0; i < 4; i++) {
+            if (fpi->DstReg.WriteMask & (1 << i)) {
+               fs_reg neg_src0 = regoffset(src[0], i);
+               neg_src0.negate = !neg_src0.negate;
+               fs_reg temp = fs_reg(this, glsl_type::float_type);
+               fs_reg temp2 = fs_reg(this, glsl_type::float_type);
+               emit(BRW_OPCODE_ADD, temp, neg_src0, fs_reg(1.0f));
+               emit(BRW_OPCODE_MUL, temp, temp, regoffset(src[2], i));
+               emit(BRW_OPCODE_MUL, temp2,
+                    regoffset(src[0], i), regoffset(src[1], i));
+               emit(BRW_OPCODE_ADD, regoffset(dst, i), temp, temp2);
+            }
+         }
+         break;
+
+      case OPCODE_MAD:
+         for (int i = 0; i < 4; i++) {
+            if (fpi->DstReg.WriteMask & (1 << i)) {
+               fs_reg temp = fs_reg(this, glsl_type::float_type);
+               emit(BRW_OPCODE_MUL, temp,
+                    regoffset(src[0], i), regoffset(src[1], i));
+               emit(BRW_OPCODE_ADD, regoffset(dst, i),
+                    temp, regoffset(src[2], i));
+            }
+         }
+         break;
+
+      case OPCODE_MAX:
+         emit_fp_minmax(fpi, dst, src[0], src[1]);
+         break;
+
+      case OPCODE_MOV:
+         emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_MIN:
+         emit_fp_minmax(fpi, dst, src[0], src[1]);
+         break;
+
+      case OPCODE_MUL:
+         emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
+         break;
+
+      case OPCODE_POW: {
+         fs_reg temp = fs_reg(this, glsl_type::float_type);
+         emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
+         emit_fp_scalar_write(fpi, dst, temp);
+         break;
+      }
+
+      case OPCODE_RCP:
+         emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_RSQ:
+         emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_SCS:
+         if (fpi->DstReg.WriteMask & WRITEMASK_X) {
+            emit_math(SHADER_OPCODE_COS, regoffset(dst, 0),
+                      regoffset(src[0], 0));
+         }
+
+         if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
+            emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1),
+                      regoffset(src[0], 1));
+         }
+         break;
+
+      case OPCODE_SGE:
+         emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
+         break;
+
+      case OPCODE_SIN:
+         emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_SLT:
+         emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
+         break;
+
+      case OPCODE_SUB: {
+         fs_reg neg_src1 = src[1];
+         neg_src1.negate = !src[1].negate;
+
+         emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
+         break;
+      }
+
+      case OPCODE_TEX:
+      case OPCODE_TXB:
+      case OPCODE_TXP: {
+         /* We piggy-back on the GLSL IR support for texture setup.  To do so,
+          * we have to cook up an ir_texture that has the coordinate field
+          * with appropriate type, and shadow_comparitor set or not.  All the
+          * other properties of ir_texture are passed in as arguments to the
+          * emit_texture_gen* function.
+          */
+         ir_texture *ir = NULL;
+
+         fs_reg lod;
+         fs_reg dpdy;
+         fs_reg coordinate = src[0];
+         fs_reg shadow_c;
+
+         switch (fpi->Opcode) {
+         case OPCODE_TEX:
+            ir = new(mem_ctx) ir_texture(ir_tex);
+            break;
+         case OPCODE_TXP: {
+            ir = new(mem_ctx) ir_texture(ir_tex);
+
+            coordinate = fs_reg(this, glsl_type::vec3_type);
+            fs_reg invproj = fs_reg(this, glsl_type::float_type);
+            emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3));
+            for (int i = 0; i < 3; i++) {
+               emit(BRW_OPCODE_MUL, regoffset(coordinate, i),
+                    regoffset(src[0], i), invproj);
+            }
+            break;
+         }
+         case OPCODE_TXB:
+            ir = new(mem_ctx) ir_texture(ir_txb);
+            lod = regoffset(src[0], 3);
+            break;
+         default:
+            assert(!"not reached");
+            break;
+         }
+
+         const glsl_type *coordinate_type;
+         switch (fpi->TexSrcTarget) {
+         case TEXTURE_1D_INDEX:
+            coordinate_type = glsl_type::float_type;
+            break;
+
+         case TEXTURE_2D_INDEX:
+         case TEXTURE_1D_ARRAY_INDEX:
+         case TEXTURE_RECT_INDEX:
+         case TEXTURE_EXTERNAL_INDEX:
+            coordinate_type = glsl_type::vec2_type;
+            break;
+
+         case TEXTURE_3D_INDEX:
+         case TEXTURE_2D_ARRAY_INDEX:
+            coordinate_type = glsl_type::vec3_type;
+            break;
+
+         case TEXTURE_CUBE_INDEX: {
+            coordinate_type = glsl_type::vec3_type;
+
+            fs_reg temp = fs_reg(this, glsl_type::float_type);
+            fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
+            fs_reg abscoord = coordinate;
+            abscoord.negate = false;
+            abscoord.abs = true;
+            emit_minmax(BRW_CONDITIONAL_GE, temp,
+                        regoffset(abscoord, 0), regoffset(abscoord, 1));
+            emit_minmax(BRW_CONDITIONAL_GE, temp,
+                        temp, regoffset(abscoord, 2));
+            emit_math(SHADER_OPCODE_RCP, temp, temp);
+            for (int i = 0; i < 3; i++) {
+               emit(BRW_OPCODE_MUL, regoffset(cubecoord, i),
+                    regoffset(coordinate, i), temp);
+            }
+
+            coordinate = cubecoord;
+            break;
+         }
+
+         default:
+            assert(!"not reached");
+            coordinate_type = glsl_type::vec2_type;
+            break;
+         }
+
+         ir_constant_data junk_data;
+         ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
+
+         coordinate = rescale_texcoord(ir, coordinate,
+                                       fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
+                                       fpi->TexSrcUnit, fpi->TexSrcUnit);
+
+         if (fpi->TexShadow) {
+            shadow_c = regoffset(coordinate, 2);
+            ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
+         }
+
+         fs_inst *inst;
+         if (intel->gen >= 7) {
+            inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy);
+         } else if (intel->gen >= 5) {
+            inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy);
+         } else {
+            inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
+         }
+
+         inst->sampler = fpi->TexSrcUnit;
+         inst->shadow_compare = fpi->TexShadow;
+
+         /* Reuse the GLSL swizzle_result() handler. */
+         swizzle_result(ir, dst, fpi->TexSrcUnit);
+         dst = this->result;
+
+         break;
+      }
+
+      case OPCODE_SWZ:
+         /* Note that SWZ's extended swizzles are handled in the general
+          * get_src_reg() code.
+          */
+         emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
+         break;
+
+      case OPCODE_XPD:
+         for (int i = 0; i < 3; i++) {
+            if (fpi->DstReg.WriteMask & (1 << i)) {
+               int i1 = (i + 1) % 3;
+               int i2 = (i + 2) % 3;
+
+               fs_reg temp = fs_reg(this, glsl_type::float_type);
+               fs_reg neg_src1_1 = regoffset(src[1], i1);
+               neg_src1_1.negate = !neg_src1_1.negate;
+               emit(BRW_OPCODE_MUL, temp,
+                    regoffset(src[0], i2), neg_src1_1);
+               emit(BRW_OPCODE_MUL, regoffset(dst, i),
+                    regoffset(src[0], i1), regoffset(src[1], i2));
+               emit(BRW_OPCODE_ADD, regoffset(dst, i),
+                    regoffset(dst, i), temp);
+            }
+         }
+         break;
+
+      case OPCODE_END:
+         break;
+
+      default:
+         _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
+                       _mesa_opcode_string(fpi->Opcode));
+      }
+
+      /* To handle saturates, we emit a MOV with a saturate bit, which
+       * optimization should fold into the preceding instructions when safe.
+       */
+      if (fpi->Opcode != OPCODE_END) {
+         fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
+
+         for (int i = 0; i < 4; i++) {
+            if (fpi->DstReg.WriteMask & (1 << i)) {
+               fs_inst *inst = emit(BRW_OPCODE_MOV,
+                                    regoffset(real_dst, i),
+                                    regoffset(dst, i));
+               inst->saturate = fpi->SaturateMode;
+            }
+         }
+      }
+   }
+
+   /* Epilogue:
+    *
+    * Fragment depth has this strange convention of being the .z component of
+    * a vec4.  emit_fb_write() wants to see a float value, instead.
+    */
+   this->current_annotation = "result.depth write";
+   if (frag_depth.file != BAD_FILE) {
+      fs_reg temp = fs_reg(this, glsl_type::float_type);
+      emit(BRW_OPCODE_MOV, temp, regoffset(frag_depth, 2));
+      frag_depth = temp;
+   }
+}
+
+void
+fs_visitor::setup_fp_regs()
+{
+   /* PROGRAM_TEMPORARY */
+   int num_temp = fp->Base.NumTemporaries;
+   fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
+   for (int i = 0; i < num_temp; i++)
+      fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
+
+   /* PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM, etc. */
+   if (c->dispatch_width == 8) {
+      for (unsigned p = 0;
+           p < c->fp->program.Base.Parameters->NumParameters; p++) {
+         for (unsigned int i = 0; i < 4; i++) {
+            this->param_index[c->prog_data.nr_params] = p;
+            this->param_offset[c->prog_data.nr_params] = i;
+            c->prog_data.nr_params++;
+         }
+      }
+   }
+
+   fp_input_regs = rzalloc_array(mem_ctx, fs_reg, FRAG_ATTRIB_MAX);
+   for (int i = 0; i < FRAG_ATTRIB_MAX; i++) {
+      if (fp->Base.InputsRead & BITFIELD64_BIT(i)) {
+         /* Make up a dummy instruction to reuse code for emitting
+          * interpolation.
+          */
+         ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                    "fp_input",
+                                                    ir_var_in);
+         ir->location = i;
+
+         this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
+                                                    i);
+
+         switch (i) {
+         case FRAG_ATTRIB_WPOS:
+            ir->pixel_center_integer = fp->PixelCenterInteger;
+            ir->origin_upper_left = fp->OriginUpperLeft;
+            fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
+            break;
+         case FRAG_ATTRIB_FACE:
+            fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
+            break;
+         default:
+            fp_input_regs[i] = *emit_general_interpolation(ir);
+
+            if (i == FRAG_ATTRIB_FOGC) {
+               emit(BRW_OPCODE_MOV,
+                    regoffset(fp_input_regs[i], 1), fs_reg(0.0f));
+               emit(BRW_OPCODE_MOV,
+                    regoffset(fp_input_regs[i], 2), fs_reg(0.0f));
+               emit(BRW_OPCODE_MOV,
+                    regoffset(fp_input_regs[i], 3), fs_reg(1.0f));
+            }
+
+            break;
+         }
+
+         this->current_annotation = NULL;
+      }
+   }
+}
+
+fs_reg
+fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
+{
+   switch (dst->File) {
+   case PROGRAM_TEMPORARY:
+      return fp_temp_regs[dst->Index];
+
+   case PROGRAM_OUTPUT:
+      if (dst->Index == FRAG_RESULT_DEPTH) {
+         if (frag_depth.file == BAD_FILE)
+            frag_depth = fs_reg(this, glsl_type::vec4_type);
+         return frag_depth;
+      } else if (dst->Index == FRAG_RESULT_COLOR) {
+         if (outputs[0].file == BAD_FILE) {
+            outputs[0] = fs_reg(this, glsl_type::vec4_type);
+            output_components[0] = 4;
+
+            /* Tell emit_fb_writes() to smear fragment.color across all the
+             * color attachments.
+             */
+            for (int i = 1; i < c->key.nr_color_regions; i++) {
+               outputs[i] = outputs[0];
+               output_components[i] = output_components[0];
+            }
+         }
+         return outputs[0];
+      } else {
+         int output_index = dst->Index - FRAG_RESULT_DATA0;
+         if (outputs[output_index].file == BAD_FILE) {
+            outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
+         }
+         output_components[output_index] = 4;
+         return outputs[output_index];
+      }
+
+   case PROGRAM_UNDEFINED:
+      return fs_reg();
+
+   default:
+      _mesa_problem(ctx, "bad dst register file: %s\n",
+                    _mesa_register_file_name((gl_register_file)dst->File));
+      return fs_reg(this, glsl_type::vec4_type);
+   }
+}
+
+fs_reg
+fs_visitor::get_fp_src_reg(const prog_src_register *src)
+{
+   struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
+
+   fs_reg result;
+
+   assert(!src->Abs);
+
+   switch (src->File) {
+   case PROGRAM_UNDEFINED:
+      return fs_reg();
+   case PROGRAM_TEMPORARY:
+      result = fp_temp_regs[src->Index];
+      break;
+
+   case PROGRAM_INPUT:
+      result = fp_input_regs[src->Index];
+      break;
+
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_UNIFORM:
+   case PROGRAM_CONSTANT:
+   case PROGRAM_NAMED_PARAM:
+      /* We actually want to look at the type in the Parameters list for this,
+       * because this lets us upload constant builtin uniforms, as actual
+       * constants.
+       */
+      switch (plist->Parameters[src->Index].Type) {
+      case PROGRAM_NAMED_PARAM:
+      case PROGRAM_CONSTANT: {
+         result = fs_reg(this, glsl_type::vec4_type);
+
+         for (int i = 0; i < 4; i++) {
+            emit(BRW_OPCODE_MOV, regoffset(result, i),
+                 fs_reg(plist->ParameterValues[src->Index][i].f));
+         }
+         break;
+      }
+
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_UNIFORM:
+         result = fs_reg(UNIFORM, src->Index * 4);
+         break;
+
+      default:
+         _mesa_problem(ctx, "bad uniform src register file: %s\n",
+                       _mesa_register_file_name((gl_register_file)src->File));
+         return fs_reg(this, glsl_type::vec4_type);
+      }
+      break;
+
+   default:
+      _mesa_problem(ctx, "bad src register file: %s\n",
+                    _mesa_register_file_name((gl_register_file)src->File));
+      return fs_reg(this, glsl_type::vec4_type);
+   }
+
+   if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
+      fs_reg unswizzled = result;
+      result = fs_reg(this, glsl_type::vec4_type);
+      for (int i = 0; i < 4; i++) {
+         bool negate = src->Negate & (1 << i);
+         /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
+          * but it costs us nothing to support it.
+          */
+         int src_swiz = GET_SWZ(src->Swizzle, i);
+         if (src_swiz == SWIZZLE_ZERO) {
+            emit(BRW_OPCODE_MOV, regoffset(result, i), fs_reg(0.0f));
+         } else if (src_swiz == SWIZZLE_ONE) {
+            emit(BRW_OPCODE_MOV, regoffset(result, i),
+                 negate ? fs_reg(-1.0f) : fs_reg(1.0f));
+         } else {
+            fs_reg src = regoffset(unswizzled, src_swiz);
+            if (negate)
+               src.negate = !src.negate;
+            emit(BRW_OPCODE_MOV, regoffset(result, i), src);
+         }
+      }
+   }
+
+   return result;
+}