i965/fs: Make register spill/unspill only do the regs for that instruction.

Previously, if we were spilling the result of a texture call, we would store all 4 regs, then for each use of one of those regs as the source of an instruction, we would unspill all 4 regs even though only one was needed. In both lightsmark and l4d2 with my current graphics config, the shaders that produce spilling do so on split GRFs, so this doesn't help them out. However, in a capture of the l4d2 shaders with a different snapshot and playing the game instead of using a demo, it reduced one shader from 2817 instructions to 2179, due to choosing a now-cheaper texture result to spill instead of piles of texcoords. v2: Fix comment noted by Ken, and fix the if condition associated with it for the current state of what constitutes a partial write of the destination. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)
author: Eric Anholt <eric@anholt.net> 2012-07-06 17:18:35 -0700
committer: Eric Anholt <eric@anholt.net> 2012-07-18 12:30:06 -0700
commit: a40c1f95229915214be061fbbf9a02e5225fbf01 (patch)
tree: 2797b0d558c8f00054c47cf3e3c74ed24d0b3aa7
parent: a454f8ec6df9334df42249be910cc2d57d913bff (diff)
download: external_mesa3d-a40c1f95229915214be061fbbf9a02e5225fbf01.zip
external_mesa3d-a40c1f95229915214be061fbbf9a02e5225fbf01.tar.gz
external_mesa3d-a40c1f95229915214be061fbbf9a02e5225fbf01.tar.bz2
1 files changed, 33 insertions, 33 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 3f10ca6..7618047 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -281,24 +281,17 @@ fs_visitor::assign_regs()
 void
 fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset)
 {
-   int size = virtual_grf_sizes[dst.reg];
-   dst.reg_offset = 0;
-
-   for (int chan = 0; chan < size; chan++) {
-      fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL,
-						   dst);
-      dst.reg_offset++;
-      unspill_inst->offset = spill_offset + chan * REG_SIZE;
-      unspill_inst->ir = inst->ir;
-      unspill_inst->annotation = inst->annotation;
-
-      /* Choose a MRF that won't conflict with an MRF that's live across the
-       * spill.  Nothing else will make it up to MRF 14/15.
-       */
-      unspill_inst->base_mrf = 14;
-      unspill_inst->mlen = 1; /* header contains offset */
-      inst->insert_before(unspill_inst);
-   }
+   fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
+   unspill_inst->offset = spill_offset;
+   unspill_inst->ir = inst->ir;
+   unspill_inst->annotation = inst->annotation;
+
+   /* Choose a MRF that won't conflict with an MRF that's live across the
+    * spill.  Nothing else will make it up to MRF 14/15.
+    */
+   unspill_inst->base_mrf = 14;
+   unspill_inst->mlen = 1; /* header contains offset */
+   inst->insert_before(unspill_inst);
 }
 
 int
@@ -322,14 +315,12 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF) {
-	    int size = virtual_grf_sizes[inst->src[i].reg];
-	    spill_costs[inst->src[i].reg] += size * loop_scale;
+	    spill_costs[inst->src[i].reg] += loop_scale;
 	 }
       }
 
       if (inst->dst.file == GRF) {
-	 int size = virtual_grf_sizes[inst->dst.reg];
-	 spill_costs[inst->dst.reg] += size * loop_scale;
+	 spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale;
       }
 
       switch (inst->opcode) {
@@ -384,21 +375,30 @@ fs_visitor::spill_reg(int spill_reg)
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF &&
 	     inst->src[i].reg == spill_reg) {
-	    inst->src[i].reg = virtual_grf_alloc(size);
-	    emit_unspill(inst, inst->src[i], spill_offset);
+	    inst->src[i].reg = virtual_grf_alloc(1);
+	    emit_unspill(inst, inst->src[i],
+                         spill_offset + REG_SIZE * inst->src[i].reg_offset);
 	 }
       }
 
       if (inst->dst.file == GRF &&
 	  inst->dst.reg == spill_reg) {
-	 inst->dst.reg = virtual_grf_alloc(size);
-
-	 /* Since we spill/unspill the whole thing even if we access
-	  * just a component, we may need to unspill before the
-	  * instruction we're spilling for.
+         int subset_spill_offset = (spill_offset +
+                                    REG_SIZE * inst->dst.reg_offset);
+         inst->dst.reg = virtual_grf_alloc(inst->regs_written());
+         inst->dst.reg_offset = 0;
+
+	 /* If our write is going to affect just part of the
+          * inst->regs_written(), then we need to unspill the destination
+          * since we write back out all of the regs_written().
 	  */
-	 if (size != 1 || inst->predicated) {
-	    emit_unspill(inst, inst->dst, spill_offset);
+	 if (inst->predicated || inst->force_uncompressed || inst->force_sechalf) {
+            fs_reg unspill_reg = inst->dst;
+            for (int chan = 0; chan < inst->regs_written(); chan++) {
+               emit_unspill(inst, unspill_reg,
+                            subset_spill_offset + REG_SIZE * chan);
+               unspill_reg.reg_offset++;
+            }
 	 }
 
 	 fs_reg spill_src = inst->dst;
@@ -407,11 +407,11 @@ fs_visitor::spill_reg(int spill_reg)
 	 spill_src.negate = false;
 	 spill_src.smear = -1;
 
-	 for (int chan = 0; chan < size; chan++) {
+	 for (int chan = 0; chan < inst->regs_written(); chan++) {
 	    fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
 						       reg_null_f, spill_src);
 	    spill_src.reg_offset++;
-	    spill_inst->offset = spill_offset + chan * REG_SIZE;
+	    spill_inst->offset = subset_spill_offset + chan * REG_SIZE;
 	    spill_inst->ir = inst->ir;
 	    spill_inst->annotation = inst->annotation;
 	    spill_inst->base_mrf = 14;
author	Eric Anholt <eric@anholt.net>	2012-07-06 17:18:35 -0700
committer	Eric Anholt <eric@anholt.net>	2012-07-18 12:30:06 -0700
commit	a40c1f95229915214be061fbbf9a02e5225fbf01 (patch)
tree	2797b0d558c8f00054c47cf3e3c74ed24d0b3aa7
parent	a454f8ec6df9334df42249be910cc2d57d913bff (diff)
download	external_mesa3d-a40c1f95229915214be061fbbf9a02e5225fbf01.zip external_mesa3d-a40c1f95229915214be061fbbf9a02e5225fbf01.tar.gz external_mesa3d-a40c1f95229915214be061fbbf9a02e5225fbf01.tar.bz2