33 files changed, 305 insertions, 181 deletions
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index 0066f7f..6761238 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -597,6 +597,7 @@ blitframebuffer_texture(struct gl_context *ctx,
                         GLenum filter, GLint flipX, GLint flipY,
                         GLboolean glsl_version, GLboolean do_depth)
 {
+   struct save_state *save = &ctx->Meta->Save[ctx->Meta->SaveStackDepth - 1];
    int att_index = do_depth ? BUFFER_DEPTH : readFb->_ColorReadBufferIndex;
    const struct gl_renderbuffer_attachment *readAtt =
       &readFb->Attachment[att_index];
@@ -709,7 +710,7 @@ blitframebuffer_texture(struct gl_context *ctx,
    fb_tex_blit.samp_obj = _mesa_meta_setup_sampler(ctx, texObj, target, filter,
                                                    srcLevel);
 
-   /* Always do our blits with no net sRGB decode or encode.
+   /* For desktop GL, we do our blits with no net sRGB decode or encode.
     *
     * However, if both the src and dst can be srgb decode/encoded, enable them
     * so that we do any blending (from scaling or from MSAA resolves) in the
@@ -723,18 +724,42 @@ blitframebuffer_texture(struct gl_context *ctx,
     *      scissor test."
     *
     * The GL 4.4 specification disagrees and says that the sRGB part of the
-    * fragment pipeline applies, but this was found to break applications.
+    * fragment pipeline applies, but this was found to break applications
+    * (such as Left 4 Dead 2).
+    *
+    * However, for ES 3.0, we follow the specification and perform sRGB
+    * decoding and encoding.  The specification has always been clear in
+    * the ES world, and hasn't changed over time.
     */
    if (ctx->Extensions.EXT_texture_sRGB_decode) {
-      if (_mesa_get_format_color_encoding(rb->Format) == GL_SRGB &&
-          drawFb->Visual.sRGBCapable) {
+      bool src_srgb = _mesa_get_format_color_encoding(rb->Format) == GL_SRGB;
+      if (save->API == API_OPENGLES2 && ctx->Version >= 30) {
+         /* From the ES 3.0.4 specification, page 198:
+          * "When values are taken from the read buffer, if the value of
+          *  FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING for the framebuffer
+          *  attachment corresponding to the read buffer is SRGB (see section
+          *  6.1.13), the red, green, and blue components are converted from
+          *  the non-linear sRGB color space according to equation 3.24.
+          *
+          *  When values are written to the draw buffers, blit operations
+          *  bypass the fragment pipeline. The only fragment operations which
+          *  affect a blit are the pixel ownership test, the scissor test,
+          *  and sRGB conversion (see section 4.1.8)."
+          */
          _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj,
-                                       GL_DECODE_EXT);
-         _mesa_set_framebuffer_srgb(ctx, GL_TRUE);
+                                       src_srgb ? GL_DECODE_EXT
+                                                : GL_SKIP_DECODE_EXT);
+         _mesa_set_framebuffer_srgb(ctx, drawFb->Visual.sRGBCapable);
       } else {
-         _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj,
-                                       GL_SKIP_DECODE_EXT);
-         /* set_framebuffer_srgb was set by _mesa_meta_begin(). */
+         if (src_srgb && drawFb->Visual.sRGBCapable) {
+            _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj,
+                                          GL_DECODE_EXT);
+            _mesa_set_framebuffer_srgb(ctx, GL_TRUE);
+         } else {
+            _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj,
+                                          GL_SKIP_DECODE_EXT);
+            /* set_framebuffer_srgb was set by _mesa_meta_begin(). */
+         }
       }
    }
 
diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c
index 18b9681..9402a46 100644
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -269,6 +269,9 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
    if (status != GL_FRAMEBUFFER_COMPLETE)
       goto meta_end;
 
+   /* Explicitly disable sRGB encoding */
+   ctx->DrawBuffer->Visual.sRGBCapable = false;
+
    /* Since we've bound a new draw framebuffer, we need to update its
     * derived state -- _Xmin, etc -- for BlitFramebuffer's clipping to
     * be correct.
diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c
index dfd3327..62c3fce 100644
--- a/src/mesa/drivers/common/meta_tex_subimage.c
+++ b/src/mesa/drivers/common/meta_tex_subimage.c
@@ -263,6 +263,9 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
    if (status != GL_FRAMEBUFFER_COMPLETE)
       goto fail;
 
+   /* Explicitly disable sRGB encoding */
+   ctx->DrawBuffer->Visual.sRGBCapable = false;
+
    _mesa_update_state(ctx);
 
    if (_mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
@@ -420,6 +423,9 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
    if (status != GL_FRAMEBUFFER_COMPLETE)
       goto fail;
 
+   /* Explicitly disable sRGB encoding */
+   ctx->DrawBuffer->Visual.sRGBCapable = false;
+
    _mesa_update_state(ctx);
 
    if (_mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index 4497eab..38a3236 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -115,12 +115,11 @@ brw_blorp_surface_info::set(struct brw_context *brw,
       this->brw_surfaceformat = BRW_SURFACEFORMAT_R16_UNORM;
       break;
    default: {
-      mesa_format linear_format = _mesa_get_srgb_format_linear(format);
       if (is_render_target) {
-         assert(brw->format_supported_as_render_target[linear_format]);
-         this->brw_surfaceformat = brw->render_target_format[linear_format];
+         assert(brw->format_supported_as_render_target[format]);
+         this->brw_surfaceformat = brw->render_target_format[format];
       } else {
-         this->brw_surfaceformat = brw_format_for_mesa_format(linear_format);
+         this->brw_surfaceformat = brw_format_for_mesa_format(format);
       }
       break;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index a04a1df..f04e196 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -46,7 +46,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
                         float src_x1, float src_y1,
                         float dst_x0, float dst_y0,
                         float dst_x1, float dst_y1,
-                        GLenum filter, bool mirror_x, bool mirror_y);
+                        GLenum filter, bool mirror_x, bool mirror_y,
+                        bool decode_srgb, bool encode_srgb);
 
 #ifdef __cplusplus
 } /* end extern "C" */
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 05fff91..5fd25f1 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  */
 
+#include "main/context.h"
 #include "main/teximage.h"
 #include "main/fbobject.h"
 
@@ -63,7 +64,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
                         float src_x1, float src_y1,
                         float dst_x0, float dst_y0,
                         float dst_x1, float dst_y1,
-                        GLenum filter, bool mirror_x, bool mirror_y)
+                        GLenum filter, bool mirror_x, bool mirror_y,
+                        bool decode_srgb, bool encode_srgb)
 {
    /* Get ready to blit.  This includes depth resolving the src and dst
     * buffers if necessary.  Note: it's not necessary to do a color resolve on
@@ -89,6 +91,12 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
        dst_level, dst_layer, dst_x0, dst_y0, dst_x1, dst_y1,
        mirror_x, mirror_y);
 
+   if (!decode_srgb && _mesa_get_format_color_encoding(src_format) == GL_SRGB)
+      src_format = _mesa_get_srgb_format_linear(src_format);
+
+   if (!encode_srgb && _mesa_get_format_color_encoding(dst_format) == GL_SRGB)
+      dst_format = _mesa_get_srgb_format_linear(dst_format);
+
    brw_blorp_blit_params params(brw,
                                 src_mt, src_level, src_layer, src_format,
                                 dst_mt, dst_level, dst_layer, dst_format,
@@ -114,6 +122,8 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
    struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
    struct intel_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb);
 
+   const bool es3 = _mesa_is_gles3(&brw->ctx);
+
    /* Do the blit */
    brw_blorp_blit_miptrees(brw,
                            src_mt, src_irb->mt_level, src_irb->mt_layer,
@@ -122,7 +132,8 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
                            dst_format,
                            srcX0, srcY0, srcX1, srcY1,
                            dstX0, dstY0, dstX1, dstY1,
-                           filter, mirror_x, mirror_y);
+                           filter, mirror_x, mirror_y,
+                           es3, es3);
 
    dst_irb->need_downsample = true;
 }
@@ -289,7 +300,8 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
                            dst_image->TexFormat,
                            srcX0, srcY0, srcX1, srcY1,
                            dstX0, dstY0, dstX1, dstY1,
-                           GL_NEAREST, false, mirror_y);
+                           GL_NEAREST, false, mirror_y,
+                           false, false);
 
    /* If we're copying to a packed depth stencil texture and the source
     * framebuffer has separate stencil, we need to also copy the stencil data
@@ -314,7 +326,8 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
                                  dst_mt->format,
                                  srcX0, srcY0, srcX1, srcY1,
                                  dstX0, dstY0, dstX1, dstY1,
-                                 GL_NEAREST, false, mirror_y);
+                                 GL_NEAREST, false, mirror_y,
+                                 false, false);
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c
index a95f51b..b32252f 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -108,6 +108,26 @@ static const struct nir_shader_compiler_options vector_nir_options = {
     */
    .fdot_replicates = true,
 
+   /* Prior to Gen6, there are no three source operations for SIMD4x2. */
+   .lower_flrp = true,
+
+   .lower_pack_snorm_2x16 = true,
+   .lower_pack_unorm_2x16 = true,
+   .lower_unpack_snorm_2x16 = true,
+   .lower_unpack_unorm_2x16 = true,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+};
+
+static const struct nir_shader_compiler_options vector_nir_options_gen6 = {
+   COMMON_OPTIONS,
+
+   /* In the vec4 backend, our dpN instruction replicates its result to all the
+    * components of a vec4.  We would like NIR to give us replicated fdot
+    * instructions because it can optimize better for us.
+    */
+   .fdot_replicates = true,
+
    .lower_pack_snorm_2x16 = true,
    .lower_pack_unorm_2x16 = true,
    .lower_unpack_snorm_2x16 = true,
@@ -160,8 +180,12 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
       if (devinfo->gen < 7)
          compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
 
-      compiler->glsl_compiler_options[i].NirOptions =
-         is_scalar ? &scalar_nir_options : &vector_nir_options;
+      if (is_scalar) {
+         compiler->glsl_compiler_options[i].NirOptions = &scalar_nir_options;
+      } else {
+         compiler->glsl_compiler_options[i].NirOptions =
+            devinfo->gen < 6 ? &vector_nir_options : &vector_nir_options_gen6;
+      }
 
       compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 0b99356..5b14252 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1294,7 +1294,7 @@ pop_if_stack(struct brw_codegen *p)
 static void
 push_loop_stack(struct brw_codegen *p, brw_inst *inst)
 {
-   if (p->loop_stack_array_size < p->loop_stack_depth) {
+   if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
       p->loop_stack_array_size *= 2;
       p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
 			       p->loop_stack_array_size);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 874053c..33c4adc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2307,17 +2307,6 @@ fs_visitor::opt_algebraic()
             progress = true;
          }
          break;
-      case SHADER_OPCODE_RCP: {
-         fs_inst *prev = (fs_inst *)inst->prev;
-         if (prev->opcode == SHADER_OPCODE_SQRT) {
-            if (inst->src[0].equals(prev->dst)) {
-               inst->opcode = SHADER_OPCODE_RSQ;
-               inst->src[0] = prev->src[0];
-               progress = true;
-            }
-         }
-         break;
-      }
       case SHADER_OPCODE_BROADCAST:
          if (is_uniform(inst->src[0])) {
             inst->opcode = BRW_OPCODE_MOV;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 2616e65..ffab0a8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -654,21 +654,6 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
          }
          break;
 
-      case SHADER_OPCODE_RCP:
-         /* The hardware doesn't do math on immediate values
-          * (because why are you doing that, seriously?), but
-          * the correct answer is to just constant fold it
-          * anyway.
-          */
-         assert(i == 0);
-         if (inst->src[0].f != 0.0f) {
-            inst->opcode = BRW_OPCODE_MOV;
-            inst->src[0] = val;
-            inst->src[0].f = 1.0f / inst->src[0].f;
-            progress = true;
-         }
-         break;
-
       case SHADER_OPCODE_UNTYPED_ATOMIC:
       case SHADER_OPCODE_UNTYPED_SURFACE_READ:
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 29ef609..aa4c745 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -518,10 +518,10 @@ fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
 
    enum opcode extract_op;
    if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) {
-      assert(element->u[0] <= 1);
+      assert(element->u32[0] <= 1);
       extract_op = SHADER_OPCODE_EXTRACT_WORD;
    } else {
-      assert(element->u[0] <= 3);
+      assert(element->u32[0] <= 3);
       extract_op = SHADER_OPCODE_EXTRACT_BYTE;
    }
 
@@ -530,7 +530,7 @@ fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
    op0 = offset(op0, bld, src0->src[0].swizzle[0]);
 
    set_saturate(instr->dest.saturate,
-                bld.emit(extract_op, result, op0, brw_imm_ud(element->u[0])));
+                bld.emit(extract_op, result, op0, brw_imm_ud(element->u32[0])));
    return true;
 }
 
@@ -549,11 +549,11 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
       return false;
 
    nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
-   if (!value1 || fabsf(value1->f[0]) != 1.0f)
+   if (!value1 || fabsf(value1->f32[0]) != 1.0f)
       return false;
 
    nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src);
-   if (!value2 || fabsf(value2->f[0]) != 1.0f)
+   if (!value2 || fabsf(value2->f32[0]) != 1.0f)
       return false;
 
    fs_reg tmp = vgrf(glsl_type::int_type);
@@ -573,7 +573,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
        * surely be TRIANGLES
        */
 
-      if (value1->f[0] == -1.0f) {
+      if (value1->f32[0] == -1.0f) {
          g0.negate = true;
       }
 
@@ -601,7 +601,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
        * surely be TRIANGLES
        */
 
-      if (value1->f[0] == -1.0f) {
+      if (value1->f32[0] == -1.0f) {
          g1_6.negate = true;
       }
 
@@ -1180,7 +1180,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
    case nir_op_extract_i8: {
       nir_const_value *byte = nir_src_as_const_value(instr->src[1].src);
       bld.emit(SHADER_OPCODE_EXTRACT_BYTE,
-               result, op[0], brw_imm_ud(byte->u[0]));
+               result, op[0], brw_imm_ud(byte->u32[0]));
       break;
    }
 
@@ -1188,7 +1188,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
    case nir_op_extract_i16: {
       nir_const_value *word = nir_src_as_const_value(instr->src[1].src);
       bld.emit(SHADER_OPCODE_EXTRACT_WORD,
-               result, op[0], brw_imm_ud(word->u[0]));
+               result, op[0], brw_imm_ud(word->u32[0]));
       break;
    }
 
@@ -1215,7 +1215,7 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
    fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
 
    for (unsigned i = 0; i < instr->def.num_components; i++)
-      bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i[i]));
+      bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i]));
 
    nir_ssa_values[instr->def.index] = reg;
 }
@@ -1769,9 +1769,9 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
    const bool is_point_size = (base_offset == 0);
 
    if (offset_const != NULL && vertex_const != NULL &&
-       4 * (base_offset + offset_const->u[0]) < push_reg_count) {
-      int imm_offset = (base_offset + offset_const->u[0]) * 4 +
-                       vertex_const->u[0] * push_reg_count;
+       4 * (base_offset + offset_const->u32[0]) < push_reg_count) {
+      int imm_offset = (base_offset + offset_const->u32[0]) * 4 +
+                       vertex_const->u32[0] * push_reg_count;
       /* This input was pushed into registers. */
       if (is_point_size) {
          /* gl_PointSize comes in .w */
@@ -1793,7 +1793,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
       if (vertex_const) {
          /* The vertex index is constant; just select the proper URB handle. */
          icp_handle =
-            retype(brw_vec8_grf(first_icp_handle + vertex_const->i[0], 0),
+            retype(brw_vec8_grf(first_icp_handle + vertex_const->i32[0], 0),
                    BRW_REGISTER_TYPE_UD);
       } else {
          /* The vertex index is non-constant.  We need to use indirect
@@ -1837,7 +1837,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
       if (offset_const) {
          /* Constant indexing - use global offset. */
          inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
-         inst->offset = base_offset + offset_const->u[0];
+         inst->offset = base_offset + offset_const->u32[0];
          inst->base_mrf = -1;
          inst->mlen = 1;
          inst->regs_written = num_components;
@@ -1875,7 +1875,7 @@ fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
        * add_const_offset_to_base() will fold other constant offsets
        * into instr->const_index[0].
        */
-      assert(const_value->u[0] == 0);
+      assert(const_value->u32[0] == 0);
       return fs_reg();
    }
 
@@ -2193,7 +2193,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
          nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
 
          if (const_sample) {
-            unsigned msg_data = const_sample->i[0] << 4;
+            unsigned msg_data = const_sample->i32[0] << 4;
 
             emit_pixel_interpolater_send(bld,
                                          FS_OPCODE_INTERPOLATE_AT_SAMPLE,
@@ -2260,8 +2260,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
          nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
 
          if (const_offset) {
-            unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
-            unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
+            unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
+            unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
 
             emit_pixel_interpolater_send(bld,
                                          FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
@@ -2420,7 +2420,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
       fs_reg offset_reg;
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
       if (const_offset) {
-         offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]);
+         offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
       } else {
          offset_reg = vgrf(glsl_type::uint_type);
          bld.ADD(offset_reg,
@@ -2464,7 +2464,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
 
          nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
          if (const_offset) {
-            offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] +
+            offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
                                     4 * first_component);
          } else {
             offset_reg = vgrf(glsl_type::uint_type);
@@ -2695,8 +2695,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
       if (const_offset) {
          /* Offsets are in bytes but they should always be multiples of 4 */
-         assert(const_offset->u[0] % 4 == 0);
-         src.reg_offset = const_offset->u[0] / 4;
+         assert(const_offset->u32[0] % 4 == 0);
+         src.reg_offset = const_offset->u32[0] / 4;
 
          for (unsigned j = 0; j < instr->num_components; j++) {
             bld.MOV(offset(dest, bld, j), offset(src, bld, j));
@@ -2729,7 +2729,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
       if (const_index) {
          const unsigned index = stage_prog_data->binding_table.ubo_start +
-                                const_index->u[0];
+                                const_index->u32[0];
          surf_index = brw_imm_ud(index);
          brw_mark_surface_used(prog_data, index);
       } else {
@@ -2762,12 +2762,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          fs_reg packed_consts = vgrf(glsl_type::float_type);
          packed_consts.type = dest.type;
 
-         struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u[0] & ~15);
+         struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u32[0] & ~15);
          bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
                   surf_index, const_offset_reg);
 
          for (unsigned i = 0; i < instr->num_components; i++) {
-            packed_consts.set_smear(const_offset->u[0] % 16 / 4 + i);
+            packed_consts.set_smear(const_offset->u32[0] % 16 / 4 + i);
 
             /* The std140 packing rules don't allow vectors to cross 16-byte
              * boundaries, and a reg is 32 bytes.
@@ -2790,7 +2790,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       fs_reg surf_index;
       if (const_uniform_block) {
          unsigned index = stage_prog_data->binding_table.ssbo_start +
-                          const_uniform_block->u[0];
+                          const_uniform_block->u32[0];
          surf_index = brw_imm_ud(index);
          brw_mark_surface_used(prog_data, index);
       } else {
@@ -2809,7 +2809,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       fs_reg offset_reg;
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
       if (const_offset) {
-         offset_reg = brw_imm_ud(const_offset->u[0]);
+         offset_reg = brw_imm_ud(const_offset->u32[0]);
       } else {
          offset_reg = get_nir_src(instr->src[1]);
       }
@@ -2837,7 +2837,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
       assert(const_offset && "Indirect input loads not allowed");
-      src = offset(src, bld, const_offset->u[0]);
+      src = offset(src, bld, const_offset->u32[0]);
 
       for (unsigned j = 0; j < instr->num_components; j++) {
          bld.MOV(offset(dest, bld, j), offset(src, bld, j));
@@ -2854,7 +2854,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          nir_src_as_const_value(instr->src[1]);
       if (const_uniform_block) {
          unsigned index = stage_prog_data->binding_table.ssbo_start +
-                          const_uniform_block->u[0];
+                          const_uniform_block->u32[0];
          surf_index = brw_imm_ud(index);
          brw_mark_surface_used(prog_data, index);
       } else {
@@ -2885,7 +2885,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          fs_reg offset_reg;
          nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
          if (const_offset) {
-            offset_reg = brw_imm_ud(const_offset->u[0] + 4 * first_component);
+            offset_reg = brw_imm_ud(const_offset->u32[0] + 4 * first_component);
          } else {
             offset_reg = vgrf(glsl_type::uint_type);
             bld.ADD(offset_reg,
@@ -2913,7 +2913,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
       assert(const_offset && "Indirect output stores not allowed");
-      new_dest = offset(new_dest, bld, const_offset->u[0]);
+      new_dest = offset(new_dest, bld, const_offset->u32[0]);
 
       for (unsigned j = 0; j < instr->num_components; j++) {
          bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
@@ -2954,7 +2954,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
    case nir_intrinsic_get_buffer_size: {
       nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
-      unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+      unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0;
       int reg_width = dispatch_width / 8;
 
       /* Set LOD = 0 */
@@ -3005,7 +3005,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
    nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
    if (const_surface) {
       unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
-                            const_surface->u[0];
+                            const_surface->u32[0];
       surface = brw_imm_ud(surf_index);
       brw_mark_surface_used(prog_data, surf_index);
    } else {
@@ -3134,7 +3134,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
          nir_const_value *const_offset =
             nir_src_as_const_value(instr->src[i].src);
          if (const_offset) {
-            tex_offset = brw_imm_ud(brw_texture_offset(const_offset->i, 3));
+            tex_offset = brw_imm_ud(brw_texture_offset(const_offset->i32, 3));
          } else {
             tex_offset = retype(src, BRW_REGISTER_TYPE_D);
          }
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 941920a..ab6000b 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -77,7 +77,7 @@ add_const_offset_to_base_block(nir_block *block, void *closure)
          nir_const_value *const_offset = nir_src_as_const_value(*offset);
 
          if (const_offset) {
-            intrin->const_index[0] += const_offset->u[0];
+            intrin->const_index[0] += const_offset->u32[0];
             b->cursor = nir_before_instr(&intrin->instr);
             nir_instr_rewrite_src(&intrin->instr, offset,
                                   nir_src_for_ssa(nir_imm_int(b, 0)));
@@ -175,7 +175,7 @@ remap_patch_urb_offsets(nir_block *block, void *closure)
          if (vertex) {
             nir_const_value *const_vertex = nir_src_as_const_value(*vertex);
             if (const_vertex) {
-               intrin->const_index[0] += const_vertex->u[0] *
+               intrin->const_index[0] += const_vertex->u32[0] *
                                          state->vue_map->num_per_vertex_slots;
             } else {
                state->b.cursor = nir_before_instr(&intrin->instr);
@@ -623,12 +623,24 @@ brw_type_for_nir_type(nir_alu_type type)
 {
    switch (type) {
    case nir_type_uint:
+   case nir_type_uint32:
       return BRW_REGISTER_TYPE_UD;
    case nir_type_bool:
    case nir_type_int:
+   case nir_type_bool32:
+   case nir_type_int32:
       return BRW_REGISTER_TYPE_D;
    case nir_type_float:
+   case nir_type_float32:
       return BRW_REGISTER_TYPE_F;
+   case nir_type_float64:
+      return BRW_REGISTER_TYPE_DF;
+   case nir_type_int64:
+   case nir_type_uint64:
+      /* TODO we should only see these in moves, so for now it's ok, but when
+       * we add actual 64-bit integer support we should fix this.
+       */
+      return BRW_REGISTER_TYPE_DF;
    default:
       unreachable("unknown type");
    }
@@ -644,12 +656,18 @@ brw_glsl_base_type_for_nir_type(nir_alu_type type)
 {
    switch (type) {
    case nir_type_float:
+   case nir_type_float32:
       return GLSL_TYPE_FLOAT;
 
+   case nir_type_float64:
+      return GLSL_TYPE_DOUBLE;
+
    case nir_type_int:
+   case nir_type_int32:
       return GLSL_TYPE_INT;
 
    case nir_type_uint:
+   case nir_type_uint32:
       return GLSL_TYPE_UINT;
 
    default:
diff --git a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
index 56e15ef..22eeb1a 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
+++ b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
@@ -165,7 +165,7 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state)
          }
 
          default:
-            if (nir_op_infos[alu->op].output_type == nir_type_bool) {
+            if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_bool) {
                /* This instructions will turn into a CMP when we actually emit
                 * them so the result will have to be resolved before it can be
                 * used.
@@ -225,7 +225,7 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state)
           * have to worry about resolving them.
           */
          instr->pass_flags &= ~BRW_NIR_BOOLEAN_MASK;
-         if (load->value.u[0] == NIR_TRUE || load->value.u[0] == NIR_FALSE) {
+         if (load->value.u32[0] == NIR_TRUE || load->value.u32[0] == NIR_FALSE) {
             instr->pass_flags |= BRW_NIR_BOOLEAN_NO_RESOLVE;
          } else {
             instr->pass_flags |= BRW_NIR_NON_BOOLEAN;
diff --git a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
index 5ff2cba..6e8b1f9 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
+++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
@@ -168,7 +168,9 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
       if (add->op != nir_op_fadd)
          continue;
 
-      /* TODO: Maybe bail if this expression is considered "precise"? */
+      assert(add->dest.dest.is_ssa);
+      if (add->exact)
+         continue;
 
       assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
 
@@ -201,6 +203,8 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
       if (mul == NULL)
          continue;
 
+      unsigned bit_size = add->dest.dest.ssa.bit_size;
+
       nir_ssa_def *mul_src[2];
       mul_src[0] = mul->src[0].src.ssa;
       mul_src[1] = mul->src[1].src.ssa;
@@ -220,7 +224,7 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
                                                       nir_op_fabs);
             abs->src[0].src = nir_src_for_ssa(mul_src[i]);
             nir_ssa_dest_init(&abs->instr, &abs->dest.dest,
-                              mul_src[i]->num_components, NULL);
+                              mul_src[i]->num_components, bit_size, NULL);
             abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1;
             nir_instr_insert_before(&add->instr, &abs->instr);
             mul_src[i] = &abs->dest.dest.ssa;
@@ -232,7 +236,7 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
                                                    nir_op_fneg);
          neg->src[0].src = nir_src_for_ssa(mul_src[0]);
          nir_ssa_dest_init(&neg->instr, &neg->dest.dest,
-                           mul_src[0]->num_components, NULL);
+                           mul_src[0]->num_components, bit_size, NULL);
          neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1;
          nir_instr_insert_before(&add->instr, &neg->instr);
          mul_src[0] = &neg->dest.dest.ssa;
@@ -253,6 +257,7 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
 
       nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
                         add->dest.dest.ssa.num_components,
+                        bit_size,
                         add->dest.dest.ssa.name);
       nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
                                nir_src_for_ssa(&ffma->dest.dest.ssa));
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 6b85eac..783af78 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -34,6 +34,7 @@
 #define BRW_STATE_H
 
 #include "brw_context.h"
+#include "brw_defines.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -406,6 +407,59 @@ void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
 void
 gen7_restore_default_l3_config(struct brw_context *brw);
 
+static inline bool
+is_drawing_points(const struct brw_context *brw)
+{
+   /* Determine if the primitives *reaching the SF* are points */
+   /* _NEW_POLYGON */
+   if (brw->ctx.Polygon.FrontMode == GL_POINT ||
+       brw->ctx.Polygon.BackMode == GL_POINT) {
+      return true;
+   }
+
+   if (brw->geometry_program) {
+      /* BRW_NEW_GEOMETRY_PROGRAM */
+      return brw->geometry_program->OutputType == GL_POINTS;
+   } else if (brw->tes.prog_data) {
+      /* BRW_NEW_TES_PROG_DATA */
+      return brw->tes.prog_data->output_topology ==
+             BRW_TESS_OUTPUT_TOPOLOGY_POINT;
+   } else {
+      /* BRW_NEW_PRIMITIVE */
+      return brw->primitive == _3DPRIM_POINTLIST;
+   }
+}
+
+static inline bool
+is_drawing_lines(const struct brw_context *brw)
+{
+   /* Determine if the primitives *reaching the SF* are points */
+   /* _NEW_POLYGON */
+   if (brw->ctx.Polygon.FrontMode == GL_LINE ||
+       brw->ctx.Polygon.BackMode == GL_LINE) {
+      return true;
+   }
+
+   if (brw->geometry_program) {
+      /* BRW_NEW_GEOMETRY_PROGRAM */
+      return brw->geometry_program->OutputType == GL_LINE_STRIP;
+   } else if (brw->tes.prog_data) {
+      /* BRW_NEW_TES_PROG_DATA */
+      return brw->tes.prog_data->output_topology ==
+             BRW_TESS_OUTPUT_TOPOLOGY_LINE;
+   } else {
+      /* BRW_NEW_PRIMITIVE */
+      switch (brw->primitive) {
+      case _3DPRIM_LINELIST:
+      case _3DPRIM_LINESTRIP:
+      case _3DPRIM_LINELOOP:
+         return true;
+      }
+   }
+   return false;
+}
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index 4666788..b7b0a86 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -423,11 +423,12 @@ static void gen7_dump_sampler_state(struct brw_context *brw,
                 GET_BITS(samp[1], 15, 8)
                );
       batch_out(brw, name, offset, i+2, "Border Color\n"); /* FINISHME: gen8+ */
-      batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] Address Control: %s|%s|%s\n",
+      batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] Address Control: %s|%s|%s, %snormalized coords\n",
                 (GET_FIELD(samp[3], BRW_SAMPLER_MAX_ANISOTROPY) + 1) * 2,
                 sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCX_WRAP_MODE)],
                 sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCY_WRAP_MODE)],
-                sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCZ_WRAP_MODE)]
+                sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCZ_WRAP_MODE)],
+                (samp[3] & GEN7_SAMPLER_NON_NORMALIZED_COORDINATES) ? "non-" : ""
                );
 
       samp += 4;
diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h
index 1f27e98..3e9a6ee 100644
--- a/src/mesa/drivers/dri/i965/brw_util.h
+++ b/src/mesa/drivers/dri/i965/brw_util.h
@@ -34,6 +34,7 @@
 #define BRW_UTIL_H
 
 #include "brw_context.h"
+#include "main/framebuffer.h"
 
 extern GLuint brw_translate_blend_factor( GLenum factor );
 extern GLuint brw_translate_blend_equation( GLenum mode );
@@ -49,13 +50,13 @@ brw_get_line_width(struct brw_context *brw)
     * implementation-dependent maximum non-antialiased line width."
     */
    float line_width =
-      CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag
+      CLAMP(!_mesa_is_multisample_enabled(&brw->ctx) && !brw->ctx.Line.SmoothFlag
             ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
             0.0f, brw->ctx.Const.MaxLineWidth);
    uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
 
    /* Line width of 0 is not allowed when MSAA enabled */
-   if (brw->ctx.Multisample._Enabled) {
+   if (_mesa_is_multisample_enabled(&brw->ctx)) {
       if (line_width_u3_7 == 0)
          line_width_u3_7 = 1;
    } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5f) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 65e57ba..0025343 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -693,17 +693,6 @@ vec4_visitor::opt_algebraic()
             break;
          }
          break;
-      case SHADER_OPCODE_RCP: {
-         vec4_instruction *prev = (vec4_instruction *)inst->prev;
-         if (prev->opcode == SHADER_OPCODE_SQRT) {
-            if (inst->src[0].equals(src_reg(prev->dst))) {
-               inst->opcode = SHADER_OPCODE_RSQ;
-               inst->src[0] = prev->src[0];
-               progress = true;
-            }
-         }
-         break;
-      }
       case SHADER_OPCODE_BROADCAST:
          if (is_uniform(inst->src[0]) ||
              inst->src[1].is_zero()) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
index d9c048e..e915aee 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -70,8 +70,8 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       /* Make up a type...we have no way of knowing... */
       const glsl_type *const type = glsl_type::ivec(instr->num_components);
 
-      src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u[0] +
-                          instr->const_index[0] + offset->u[0],
+      src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
+                          instr->const_index[0] + offset->u32[0],
                     type);
       /* gl_PointSize is passed in the .w component of the VUE header */
       if (instr->const_index[0] == VARYING_SLOT_PSIZ)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 4686f20..7c06f92 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -343,7 +343,7 @@ vec4_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
        * add_const_offset_to_base() will fold other constant offsets
        * into instr->const_index[0].
        */
-      assert(const_value->u[0] == 0);
+      assert(const_value->u32[0] == 0);
       return src_reg();
    }
 
@@ -369,13 +369,13 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
          continue;
 
       for (unsigned j = i; j < instr->def.num_components; j++) {
-         if (instr->value.u[i] == instr->value.u[j]) {
+         if (instr->value.u32[i] == instr->value.u32[j]) {
             writemask |= 1 << j;
          }
       }
 
       reg.writemask = writemask;
-      emit(MOV(reg, brw_imm_d(instr->value.i[i])));
+      emit(MOV(reg, brw_imm_d(instr->value.i32[i])));
 
       remaining &= ~writemask;
    }
@@ -400,7 +400,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       /* We set EmitNoIndirectInput for VS */
       assert(const_offset);
 
-      src = src_reg(ATTR, instr->const_index[0] + const_offset->u[0],
+      src = src_reg(ATTR, instr->const_index[0] + const_offset->u32[0],
                     glsl_type::uvec4_type);
 
       dest = get_nir_dest(instr->dest, src.type);
@@ -414,7 +414,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
       assert(const_offset);
 
-      int varying = instr->const_index[0] + const_offset->u[0];
+      int varying = instr->const_index[0] + const_offset->u32[0];
 
       src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
                         instr->num_components);
@@ -425,7 +425,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 
    case nir_intrinsic_get_buffer_size: {
       nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
-      unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+      unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0;
 
       const unsigned index =
          prog_data->base.binding_table.ssbo_start + ssbo_index;
@@ -458,7 +458,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          nir_src_as_const_value(instr->src[1]);
       if (const_uniform_block) {
          unsigned index = prog_data->base.binding_table.ssbo_start +
-                          const_uniform_block->u[0];
+                          const_uniform_block->u32[0];
          surf_index = brw_imm_ud(index);
          brw_mark_surface_used(&prog_data->base, index);
       } else {
@@ -476,7 +476,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       src_reg offset_reg;
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
       if (const_offset) {
-         offset_reg = brw_imm_ud(const_offset->u[0]);
+         offset_reg = brw_imm_ud(const_offset->u32[0]);
       } else {
          offset_reg = get_nir_src(instr->src[2], 1);
       }
@@ -596,7 +596,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       src_reg surf_index;
       if (const_uniform_block) {
          unsigned index = prog_data->base.binding_table.ssbo_start +
-                          const_uniform_block->u[0];
+                          const_uniform_block->u32[0];
          surf_index = brw_imm_ud(index);
 
          brw_mark_surface_used(&prog_data->base, index);
@@ -617,7 +617,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       src_reg offset_reg;
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
       if (const_offset) {
-         offset_reg = brw_imm_ud(const_offset->u[0]);
+         offset_reg = brw_imm_ud(const_offset->u32[0]);
       } else {
          offset_reg = get_nir_src(instr->src[1], 1);
       }
@@ -697,8 +697,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
       if (const_offset) {
          /* Offsets are in bytes but they should always be multiples of 16 */
-         assert(const_offset->u[0] % 16 == 0);
-         src.reg_offset = const_offset->u[0] / 16;
+         assert(const_offset->u32[0] % 16 == 0);
+         src.reg_offset = const_offset->u32[0] / 16;
 
          emit(MOV(dest, src));
       } else {
@@ -760,7 +760,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
           * as an immediate.
           */
          const unsigned index = prog_data->base.binding_table.ubo_start +
-                                const_block_index->u[0];
+                                const_block_index->u32[0];
          surf_index = brw_imm_ud(index);
          brw_mark_surface_used(&prog_data->base, index);
       } else {
@@ -785,7 +785,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       src_reg offset;
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
       if (const_offset) {
-         offset = brw_imm_ud(const_offset->u[0] & ~15);
+         offset = brw_imm_ud(const_offset->u32[0] & ~15);
       } else {
          offset = get_nir_src(instr->src[1], nir_type_int, 1);
       }
@@ -800,10 +800,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 
       packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
       if (const_offset) {
-         packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u[0] % 16 / 4,
-                                               const_offset->u[0] % 16 / 4,
-                                               const_offset->u[0] % 16 / 4,
-                                               const_offset->u[0] % 16 / 4);
+         packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u32[0] % 16 / 4,
+                                               const_offset->u32[0] % 16 / 4,
+                                               const_offset->u32[0] % 16 / 4,
+                                               const_offset->u32[0] % 16 / 4);
       }
 
       emit(MOV(dest, packed_consts));
@@ -845,7 +845,7 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
    nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
    if (const_surface) {
       unsigned surf_index = prog_data->base.binding_table.ssbo_start +
-                            const_surface->u[0];
+                            const_surface->u32[0];
       surface = brw_imm_ud(surf_index);
       brw_mark_surface_used(&prog_data->base, surf_index);
    } else {
@@ -1042,12 +1042,12 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
           * operand. If we can determine that one of the args is in the low
           * 16 bits, though, we can just emit a single MUL.
           */
-         if (value0 && value0->u[0] < (1 << 16)) {
+         if (value0 && value0->u32[0] < (1 << 16)) {
             if (devinfo->gen < 7)
                emit(MUL(dst, op[0], op[1]));
             else
                emit(MUL(dst, op[1], op[0]));
-         } else if (value1 && value1->u[0] < (1 << 16)) {
+         } else if (value1 && value1->u32[0] < (1 << 16)) {
             if (devinfo->gen < 7)
                emit(MUL(dst, op[1], op[0]));
             else
@@ -1793,7 +1793,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
          nir_const_value *const_offset =
             nir_src_as_const_value(instr->src[i].src);
          if (const_offset) {
-            constant_offset = brw_texture_offset(const_offset->i, 3);
+            constant_offset = brw_texture_offset(const_offset->i32, 3);
          } else {
             offset_value =
                get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index f344eaa..0ce48b8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -353,7 +353,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 
       nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]);
       src_reg vertex_index =
-         vertex_const ? src_reg(brw_imm_ud(vertex_const->u[0]))
+         vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0]))
                       : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
 
       dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
@@ -400,6 +400,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          }
       } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
          dst.type = BRW_REGISTER_TYPE_F;
+         unsigned swiz = BRW_SWIZZLE_WZYX;
 
          /* This is a read of gl_TessLevelOuter[], which lives in the
           * high 4 DWords of the Patch URB header, in reverse order.
@@ -412,6 +413,8 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
             dst.writemask = WRITEMASK_XYZ;
             break;
          case GL_ISOLINES:
+            /* Isolines are not reversed; swizzle .zw -> .xy */
+            swiz = BRW_SWIZZLE_ZWZW;
             dst.writemask = WRITEMASK_XY;
             return;
          default:
@@ -420,7 +423,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 
          dst_reg tmp(this, glsl_type::vec4_type);
          emit_output_urb_read(tmp, 1, src_reg());
-         emit(MOV(dst, swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
+         emit(MOV(dst, swizzle(src_reg(tmp), swiz)));
       } else {
          emit_output_urb_read(dst, imm_offset, indirect_offset);
       }
@@ -473,8 +476,15 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
           * Patch URB Header at DWords 4-7.  However, it's reversed, so
           * instead of .xyzw we have .wzyx.
           */
-         swiz = BRW_SWIZZLE_WZYX;
-         mask = writemask_for_backwards_vector(mask);
+         if (key->tes_primitive_mode == GL_ISOLINES) {
+            /* Isolines .xy should be stored in .zw, in order. */
+            swiz = BRW_SWIZZLE4(0, 0, 0, 1);
+            mask <<= 2;
+         } else {
+            /* Other domains are reversed; store .wzyx instead of .xyzw. */
+            swiz = BRW_SWIZZLE_WZYX;
+            mask = writemask_for_backwards_vector(mask);
+         }
       }
 
       emit_urb_write(swizzle(value, swiz), mask,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
index e3c23f1..7ba494f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
@@ -149,9 +149,15 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
                src_reg(brw_vec8_grf(1, 0))));
       break;
    case nir_intrinsic_load_tess_level_outer:
-      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
-               swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
-                       BRW_SWIZZLE_WZYX)));
+      if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
+         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
+                          BRW_SWIZZLE_ZWZW)));
+      } else {
+         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
+                          BRW_SWIZZLE_WZYX)));
+      }
       break;
    case nir_intrinsic_load_tess_level_inner:
       if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index cee139b..f5a7d4d 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -198,14 +198,14 @@ gen6_upload_blend_state(struct brw_context *brw)
       if(!is_buffer_zero_integer_format) {
          /* _NEW_MULTISAMPLE */
          blend[b].blend1.alpha_to_coverage =
-            ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToCoverage;
+            _mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToCoverage;
 
 	/* From SandyBridge PRM, volume 2 Part 1, section 8.2.3, BLEND_STATE:
 	 * DWord 1, Bit 30 (AlphaToOne Enable):
 	 * "If Dual Source Blending is enabled, this bit must be disabled"
 	 */
          WARN_ONCE(ctx->Color.Blend[b]._UsesDualSrc &&
-                   ctx->Multisample._Enabled &&
+                   _mesa_is_multisample_enabled(ctx) &&
                    ctx->Multisample.SampleAlphaToOne,
                    "HW workaround: disabling alpha to one with dual src "
                    "blending\n");
@@ -213,7 +213,7 @@ gen6_upload_blend_state(struct brw_context *brw)
             blend[b].blend1.alpha_to_one = false;
 	 else
 	    blend[b].blend1.alpha_to_one =
-	       ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToOne;
+	       _mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToOne;
 
          blend[b].blend1.alpha_to_coverage_dither = (brw->gen >= 7);
       }
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 9a29366..004eceb 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -174,12 +174,14 @@ upload_clip_state(struct brw_context *brw)
    else
       enable = GEN6_CLIP_ENABLE;
 
+   if (!is_drawing_points(brw) && !is_drawing_lines(brw))
+      dw2 |= GEN6_CLIP_XY_TEST;
+
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
    OUT_BATCH(dw1);
    OUT_BATCH(enable |
 	     GEN6_CLIP_MODE_NORMAL |
-	     GEN6_CLIP_XY_TEST |
 	     dw2);
    OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
              U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
@@ -195,7 +197,9 @@ const struct brw_tracked_state gen6_clip_state = {
                _NEW_TRANSFORM,
       .brw   = BRW_NEW_CONTEXT |
                BRW_NEW_FS_PROG_DATA |
+               BRW_NEW_GEOMETRY_PROGRAM |
                BRW_NEW_META_IN_PROGRESS |
+               BRW_NEW_PRIMITIVE |
                BRW_NEW_RASTERIZER_DISCARD,
    },
    .emit = upload_clip_state,
@@ -209,7 +213,9 @@ const struct brw_tracked_state gen7_clip_state = {
                _NEW_TRANSFORM,
       .brw   = BRW_NEW_CONTEXT |
                BRW_NEW_FS_PROG_DATA |
+               BRW_NEW_GEOMETRY_PROGRAM |
                BRW_NEW_META_IN_PROGRESS |
+               BRW_NEW_PRIMITIVE |
                BRW_NEW_RASTERIZER_DISCARD,
    },
    .emit = upload_clip_state,
diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
index 8eb620d..fcd313a 100644
--- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
@@ -171,7 +171,7 @@ gen6_determine_sample_mask(struct brw_context *brw)
    /* BRW_NEW_NUM_SAMPLES */
    unsigned num_samples = brw->num_samples;
 
-   if (ctx->Multisample._Enabled) {
+   if (_mesa_is_multisample_enabled(ctx)) {
       if (ctx->Multisample.SampleCoverage) {
          coverage = ctx->Multisample.SampleCoverageValue;
          coverage_invert = ctx->Multisample.SampleCoverageInvert;
diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
index 17b4a7f..a206732 100644
--- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
@@ -58,10 +58,10 @@ gen6_upload_scissor_state(struct brw_context *brw)
    for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
       int bbox[4];
 
-      bbox[0] = 0;
-      bbox[1] = fb_width;
-      bbox[2] = 0;
-      bbox[3] = fb_height;
+      bbox[0] = MAX2(ctx->ViewportArray[i].X, 0);
+      bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width);
+      bbox[2] = MAX2(ctx->ViewportArray[i].Y, 0);
+      bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height);
       _mesa_intersect_scissor_bounding_box(ctx, i, bbox);
 
       if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) {
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 2634e6b..42f9a5c 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -147,26 +147,6 @@ get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
 }
 
 
-static bool
-is_drawing_points(const struct brw_context *brw)
-{
-   /* Determine if the primitives *reaching the SF* are points */
-   /* _NEW_POLYGON */
-   if (brw->ctx.Polygon.FrontMode == GL_POINT ||
-       brw->ctx.Polygon.BackMode == GL_POINT) {
-      return true;
-   }
-
-   if (brw->geometry_program) {
-      /* BRW_NEW_GEOMETRY_PROGRAM */
-      return brw->geometry_program->OutputType == GL_POINTS;
-   } else {
-      /* BRW_NEW_PRIMITIVE */
-      return brw->primitive == _3DPRIM_POINTLIST;
-   }
-}
-
-
 /**
  * Create the mapping from the FS inputs we produce to the previous pipeline
  * stage (GS or VS) outputs they source from.
@@ -216,8 +196,10 @@ calculate_attr_overrides(const struct brw_context *brw,
     * This is not required on Haswell, as the hardware ignores this state
     * when drawing non-points -- although we do still need to be careful to
     * correctly set the attr overrides.
+    *
+    * _NEW_POLYGON
+    * BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA
     */
-   /* BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM */
    bool drawing_points = is_drawing_points(brw);
 
    /* Initialize all the attr_overrides to 0.  In the loop below we'll modify
@@ -369,8 +351,9 @@ upload_sf_state(struct brw_context *brw)
        unreachable("not reached");
    }
 
-   /* _NEW_SCISSOR */
-   if (ctx->Scissor.EnableFlags)
+   /* _NEW_SCISSOR _NEW_POLYGON BRW_NEW_GEOMETRY_PROGRAM BRW_NEW_PRIMITIVE */
+   if (ctx->Scissor.EnableFlags ||
+       is_drawing_points(brw) || is_drawing_lines(brw))
       dw3 |= GEN6_SF_SCISSOR_ENABLE;
 
    /* _NEW_POLYGON */
@@ -484,6 +467,7 @@ const struct brw_tracked_state gen6_sf_state = {
                BRW_NEW_FS_PROG_DATA |
                BRW_NEW_GEOMETRY_PROGRAM |
                BRW_NEW_PRIMITIVE |
+               BRW_NEW_TES_PROG_DATA |
                BRW_NEW_VUE_MAP_GEOM_OUT,
    },
    .emit = upload_sf_state,
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index b1f13ac..7c98c73 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -188,8 +188,9 @@ upload_sf_state(struct brw_context *brw)
       dw2 |= GEN6_SF_CULL_NONE;
    }
 
-   /* _NEW_SCISSOR */
-   if (ctx->Scissor.EnableFlags)
+   /* _NEW_SCISSOR _NEW_POLYGON BRW_NEW_GEOMETRY_PROGRAM BRW_NEW_PRIMITIVE */
+   if (ctx->Scissor.EnableFlags ||
+       is_drawing_points(brw) || is_drawing_lines(brw))
       dw2 |= GEN6_SF_SCISSOR_ENABLE;
 
    /* _NEW_LINE */
@@ -254,7 +255,8 @@ const struct brw_tracked_state gen7_sf_state = {
                _NEW_POLYGON |
                _NEW_PROGRAM |
                _NEW_SCISSOR,
-      .brw   = BRW_NEW_CONTEXT,
+      .brw   = BRW_NEW_CONTEXT |
+               BRW_NEW_PRIMITIVE,
    },
    .emit = upload_sf_state,
 };
diff --git a/src/mesa/drivers/dri/i965/gen8_blend_state.c b/src/mesa/drivers/dri/i965/gen8_blend_state.c
index 786c79a..63186bd 100644
--- a/src/mesa/drivers/dri/i965/gen8_blend_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_blend_state.c
@@ -65,7 +65,7 @@ gen8_upload_blend_state(struct brw_context *brw)
 
    if (rb_zero_type != GL_INT && rb_zero_type != GL_UNSIGNED_INT) {
       /* _NEW_MULTISAMPLE */
-      if (ctx->Multisample._Enabled) {
+      if (_mesa_is_multisample_enabled(ctx)) {
          if (ctx->Multisample.SampleAlphaToCoverage) {
             blend[0] |= GEN8_BLEND_ALPHA_TO_COVERAGE_ENABLE;
             blend[0] |= GEN8_BLEND_ALPHA_TO_COVERAGE_DITHER_ENABLE;
@@ -183,7 +183,7 @@ gen8_upload_blend_state(struct brw_context *brw)
       * "If Dual Source Blending is enabled, this bit must be disabled."
       */
       WARN_ONCE(ctx->Color.Blend[i]._UsesDualSrc &&
-                ctx->Multisample._Enabled &&
+                _mesa_is_multisample_enabled(ctx) &&
                 ctx->Multisample.SampleAlphaToOne,
                 "HW workaround: disabling alpha to one with dual src "
                 "blending\n");
@@ -226,7 +226,7 @@ gen8_upload_ps_blend(struct brw_context *brw)
       dw1 |= GEN8_PS_BLEND_ALPHA_TEST_ENABLE;
 
    /* _NEW_MULTISAMPLE */
-   if (ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToCoverage)
+   if (_mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToCoverage)
       dw1 |= GEN8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE;
 
    /* Used for implementing the following bit of GL_EXT_texture_integer:
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 93100a0..8aaa1a8 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -29,6 +29,7 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "brw_wm.h"
+#include "main/framebuffer.h"
 
 /**
  * Helper function to emit depth related command packets.
@@ -303,7 +304,7 @@ pma_fix_enable(const struct brw_context *brw)
    const bool kill_pixel =
       brw->wm.prog_data->uses_kill ||
       brw->wm.prog_data->uses_omask ||
-      (ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToCoverage) ||
+      (_mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToCoverage) ||
       ctx->Color.AlphaEnabled;
 
    /* The big formula in CACHE_MODE_1::NP PMA FIX ENABLE. */
diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c
index 8b6f31f..2ac21f7 100644
--- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
@@ -178,7 +178,7 @@ upload_sf(struct brw_context *brw)
       dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
 
    /* _NEW_POINT | _NEW_MULTISAMPLE */
-   if ((ctx->Point.SmoothFlag || ctx->Multisample._Enabled) &&
+   if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
        !ctx->Point.PointSprite) {
       dw3 |= GEN8_SF_SMOOTH_POINT_ENABLE;
    }
@@ -249,7 +249,7 @@ upload_raster(struct brw_context *brw)
    if (ctx->Point.SmoothFlag)
       dw1 |= GEN8_RASTER_SMOOTH_POINT_ENABLE;
 
-   if (ctx->Multisample._Enabled)
+   if (_mesa_is_multisample_enabled(ctx))
       dw1 |= GEN8_RASTER_API_MULTISAMPLE_ENABLE;
 
    if (ctx->Polygon.OffsetFill)
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c
index 08b7623..ccb82b6 100644
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -140,9 +140,9 @@ copy_image_with_memcpy(struct brw_context *brw,
    _mesa_get_format_block_size(src_mt->format, &src_bw, &src_bh);
 
    assert(src_width % src_bw == 0);
-   assert(src_height % src_bw == 0);
+   assert(src_height % src_bh == 0);
    assert(src_x % src_bw == 0);
-   assert(src_y % src_bw == 0);
+   assert(src_y % src_bh == 0);
 
    /* If we are on the same miptree, same level, and same slice, then
     * intel_miptree_map won't let us map it twice.  We have to do things a
@@ -153,7 +153,7 @@ copy_image_with_memcpy(struct brw_context *brw,
 
    if (same_slice) {
       assert(dst_x % src_bw == 0);
-      assert(dst_y % src_bw == 0);
+      assert(dst_y % src_bh == 0);
 
       map_x1 = MIN2(src_x, dst_x);
       map_y1 = MIN2(src_y, dst_y);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 6c233d8..9e84abb 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2172,7 +2172,8 @@ intel_miptree_updownsample(struct brw_context *brw,
                               src->logical_width0, src->logical_height0,
                               0, 0,
                               dst->logical_width0, dst->logical_height0,
-                              GL_NEAREST, false, false /*mirror x, y*/);
+                              GL_NEAREST, false, false /*mirror x, y*/,
+                              false, false);
    } else if (src->format == MESA_FORMAT_S_UINT8) {
       brw_meta_stencil_updownsample(brw, src, dst);
    } else {
@@ -2194,7 +2195,8 @@ intel_miptree_updownsample(struct brw_context *brw,
                               src->logical_width0, src->logical_height0,
                               0, 0,
                               dst->logical_width0, dst->logical_height0,
-                              GL_NEAREST, false, false /*mirror x, y*/);
+                              GL_NEAREST, false, false /*mirror x, y*/,
+                              false, false /* decode/encode srgb */);
    }
 }