From 8d1a01d37039a76ecbb8fdb4c10ff8d11aa02dfb Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 22 Jan 2010 15:36:28 -0700
Subject: mesa: use new fetch_vector1ui() function for 'unpack' GPU
 instructions

The UP2H, UP2US, UP4B and UP4UB instructions interpret the float
registers as integers.  With gcc -O3 some bits were getting mixed up
somewhere.  This is part of the fix for the piglit fp-unpack-01 test
failure (bug 25973).
---
 src/mesa/shader/prog_execute.c | 66 +++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 24 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c
index 1ef801a..fb29768 100644
--- a/src/mesa/shader/prog_execute.c
+++ b/src/mesa/shader/prog_execute.c
@@ -341,6 +341,28 @@ fetch_vector1(const struct prog_src_register *source,
 }
 
 
+static GLuint
+fetch_vector1ui(const struct prog_src_register *source,
+                const struct gl_program_machine *machine)
+{
+   const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
+   GLuint result;
+
+   ASSERT(src);
+
+   result = src[GET_SWZ(source->Swizzle, 0)];
+
+   if (source->Abs) {
+      result = FABSF(result);
+   }
+   if (source->Negate) {
+      result = -result;
+   }
+
+   return result;
+}
+
+
 /**
  * Fetch texel from texture.  Use partial derivatives when possible.
  */
@@ -1633,12 +1655,11 @@ _mesa_execute_program(GLcontext * ctx,
          break;
       case OPCODE_UP2H:        /* unpack two 16-bit floats */
          {
-            GLfloat a[4], result[4];
-            const GLuint *rawBits = (const GLuint *) a;
+            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+            GLfloat result[4];
             GLhalfNV hx, hy;
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            hx = rawBits[0] & 0xffff;
-            hy = rawBits[0] >> 16;
+            hx = raw & 0xffff;
+            hy = raw >> 16;
             result[0] = result[2] = _mesa_half_to_float(hx);
             result[1] = result[3] = _mesa_half_to_float(hy);
             store_vector4(inst, machine, result);
@@ -1646,12 +1667,11 @@ _mesa_execute_program(GLcontext * ctx,
          break;
       case OPCODE_UP2US:       /* unpack two GLushorts */
          {
-            GLfloat a[4], result[4];
-            const GLuint *rawBits = (const GLuint *) a;
+            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+            GLfloat result[4];
             GLushort usx, usy;
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            usx = rawBits[0] & 0xffff;
-            usy = rawBits[0] >> 16;
+            usx = raw & 0xffff;
+            usy = raw >> 16;
             result[0] = result[2] = usx * (1.0f / 65535.0f);
             result[1] = result[3] = usy * (1.0f / 65535.0f);
             store_vector4(inst, machine, result);
@@ -1659,25 +1679,23 @@ _mesa_execute_program(GLcontext * ctx,
          break;
       case OPCODE_UP4B:        /* unpack four GLbytes */
          {
-            GLfloat a[4], result[4];
-            const GLuint *rawBits = (const GLuint *) a;
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
-            result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
-            result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
-            result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
+            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+            GLfloat result[4];
+            result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
+            result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
+            result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
+            result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
             store_vector4(inst, machine, result);
          }
          break;
       case OPCODE_UP4UB:       /* unpack four GLubytes */
          {
-            GLfloat a[4], result[4];
-            const GLuint *rawBits = (const GLuint *) a;
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
-            result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
-            result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
-            result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
+            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+            GLfloat result[4];
+            result[0] = ((raw >> 0) & 0xff) / 255.0F;
+            result[1] = ((raw >> 8) & 0xff) / 255.0F;
+            result[2] = ((raw >> 16) & 0xff) / 255.0F;
+            result[3] = ((raw >> 24) & 0xff) / 255.0F;
             store_vector4(inst, machine, result);
          }
          break;
-- 
cgit v1.1


From 099e8ec54d4f7b9cd9ae3a481d6a6ae60f4590be Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 22 Jan 2010 15:41:08 -0700
Subject: mesa: re-implement _mesa_ProgramEnvParameter4fvARB() with memcpy()

This is faster and ensures that NaN floats get stored properly.
Before, NaN values (which might be used with UP2H, UP2US, UP4B and
UP4UB) weren't getting stored properly with gcc -O3.

This is the second part of the fix for the piglit fp-unpack-01 failure
(bug 25973).
---
 src/mesa/shader/arbprogram.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

(limited to 'src/mesa')

diff --git a/src/mesa/shader/arbprogram.c b/src/mesa/shader/arbprogram.c
index eb537cd..3cb2610 100644
--- a/src/mesa/shader/arbprogram.c
+++ b/src/mesa/shader/arbprogram.c
@@ -561,6 +561,8 @@ _mesa_ProgramEnvParameter4fARB(GLenum target, GLuint index,
    }
 }
 
+
+
 /**
  * Set a program env parameter register.
  * \note Called from the GL API dispatcher.
@@ -569,10 +571,35 @@ _mesa_ProgramEnvParameter4fARB(GLenum target, GLuint index,
  */
 void GLAPIENTRY
 _mesa_ProgramEnvParameter4fvARB(GLenum target, GLuint index,
-                                   const GLfloat *params)
+                                const GLfloat *params)
 {
-   _mesa_ProgramEnvParameter4fARB(target, index, params[0], params[1],
-                                  params[2], params[3]);
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
+
+   if (target == GL_FRAGMENT_PROGRAM_ARB
+       && ctx->Extensions.ARB_fragment_program) {
+      if (index >= ctx->Const.FragmentProgram.MaxEnvParams) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameter4fv(index)");
+         return;
+      }
+      memcpy(ctx->FragmentProgram.Parameters[index], params,
+             4 * sizeof(GLfloat));
+   }
+   else if (target == GL_VERTEX_PROGRAM_ARB /* == GL_VERTEX_PROGRAM_NV */
+       && (ctx->Extensions.ARB_vertex_program || ctx->Extensions.NV_vertex_program)) {
+      if (index >= ctx->Const.VertexProgram.MaxEnvParams) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameter4fv(index)");
+         return;
+      }
+      memcpy(ctx->VertexProgram.Parameters[index], params,
+             4 * sizeof(GLfloat));
+   }
+   else {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glProgramEnvParameter4fv(target)");
+      return;
+   }
 }
 
 
-- 
cgit v1.1