40 files changed, 1028 insertions, 385 deletions
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index da5d730..7b8b466 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -731,7 +731,7 @@ nir_visitor::visit(ir_call *ir)
          ir_dereference *param =
             (ir_dereference *) ir->actual_parameters.get_head();
          instr->variables[0] = evaluate_deref(&instr->instr, param);
-         nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+         nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
@@ -765,7 +765,7 @@ nir_visitor::visit(ir_call *ir)
             const nir_intrinsic_info *info =
                     &nir_intrinsic_infos[instr->intrinsic];
             nir_ssa_dest_init(&instr->instr, &instr->dest,
-                              info->dest_components, NULL);
+                              info->dest_components, 32, NULL);
          }
 
          if (op == nir_intrinsic_image_size ||
@@ -826,7 +826,7 @@ nir_visitor::visit(ir_call *ir)
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       case nir_intrinsic_shader_clock:
-         nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+         nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       case nir_intrinsic_store_ssbo: {
@@ -867,7 +867,7 @@ nir_visitor::visit(ir_call *ir)
 
          /* Setup destination register */
          nir_ssa_dest_init(&instr->instr, &instr->dest,
-                           type->vector_elements, NULL);
+                           type->vector_elements, 32, NULL);
 
          /* Insert the created nir instruction now since in the case of boolean
           * result we will need to emit another instruction after it
@@ -890,7 +890,7 @@ nir_visitor::visit(ir_call *ir)
                load_ssbo_compare->src[1].swizzle[i] = 0;
             nir_ssa_dest_init(&load_ssbo_compare->instr,
                               &load_ssbo_compare->dest.dest,
-                              type->vector_elements, NULL);
+                              type->vector_elements, 32, NULL);
             load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
             nir_builder_instr_insert(&b, &load_ssbo_compare->instr);
             dest = &load_ssbo_compare->dest.dest;
@@ -936,7 +936,7 @@ nir_visitor::visit(ir_call *ir)
          /* Atomic result */
          assert(ir->return_deref);
          nir_ssa_dest_init(&instr->instr, &instr->dest,
-                           ir->return_deref->type->vector_elements, NULL);
+                           ir->return_deref->type->vector_elements, 32, NULL);
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
@@ -951,8 +951,9 @@ nir_visitor::visit(ir_call *ir)
          instr->num_components = type->vector_elements;
 
          /* Setup destination register */
+         unsigned bit_size = glsl_get_bit_size(type->base_type);
          nir_ssa_dest_init(&instr->instr, &instr->dest,
-                           type->vector_elements, NULL);
+                           type->vector_elements, bit_size, NULL);
 
          nir_builder_instr_insert(&b, &instr->instr);
          break;
@@ -1013,8 +1014,10 @@ nir_visitor::visit(ir_call *ir)
 
          /* Atomic result */
          assert(ir->return_deref);
+         unsigned bit_size = glsl_get_bit_size(ir->return_deref->type->base_type);
          nir_ssa_dest_init(&instr->instr, &instr->dest,
-                           ir->return_deref->type->vector_elements, NULL);
+                           ir->return_deref->type->vector_elements,
+                           bit_size, NULL);
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
@@ -1061,6 +1064,9 @@ nir_visitor::visit(ir_assignment *ir)
 {
    unsigned num_components = ir->lhs->type->vector_elements;
 
+   b.exact = ir->lhs->variable_referenced()->data.invariant ||
+             ir->lhs->variable_referenced()->data.precise;
+
    if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
        (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
       /* We're doing a plain-as-can-be copy, so emit a copy_var */
@@ -1163,7 +1169,7 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
    nir_dest *dest = get_instr_dest(instr);
 
    if (dest)
-      nir_ssa_dest_init(instr, dest, num_components, NULL);
+      nir_ssa_dest_init(instr, dest, num_components, 32, NULL);
 
    nir_builder_instr_insert(&b, instr);
 
@@ -1203,6 +1209,7 @@ nir_visitor::visit(ir_expression *ir)
       nir_intrinsic_instr *load =
          nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
       load->num_components = ir->type->vector_elements;
+      load->dest.ssa.bit_size = glsl_get_bit_size(ir->type->base_type);
       load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
       load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
       add_instr(&load->instr, ir->type->vector_elements);
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 7e41ed3..b67916d 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -70,6 +70,7 @@ reg_create(void *mem_ctx, struct exec_list *list)
    list_inithead(&reg->if_uses);
 
    reg->num_components = 0;
+   reg->bit_size = 32;
    reg->num_array_elems = 0;
    reg->is_packed = false;
    reg->name = NULL;
@@ -473,7 +474,7 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
    nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
    instr_init(&instr->instr, nir_instr_type_load_const);
 
-   nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+   nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
 
    return instr;
 }
@@ -562,7 +563,7 @@ nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
    nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
    instr_init(&instr->instr, nir_instr_type_ssa_undef);
 
-   nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+   nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
 
    return instr;
 }
@@ -699,10 +700,10 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
       case GLSL_TYPE_FLOAT:
       case GLSL_TYPE_INT:
       case GLSL_TYPE_UINT:
-         load->value.u[i] = constant->value.u[matrix_offset + i];
+         load->value.u32[i] = constant->value.u[matrix_offset + i];
          break;
       case GLSL_TYPE_BOOL:
-         load->value.u[i] = constant->value.b[matrix_offset + i] ?
+         load->value.u32[i] = constant->value.b[matrix_offset + i] ?
                              NIR_TRUE : NIR_FALSE;
          break;
       default:
@@ -731,18 +732,11 @@ reduce_cursor(nir_cursor cursor)
 {
    switch (cursor.option) {
    case nir_cursor_before_block:
+      assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
+             nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
       if (exec_list_is_empty(&cursor.block->instr_list)) {
          /* Empty block.  After is as good as before. */
          cursor.option = nir_cursor_after_block;
-      } else {
-         /* Try to switch to after the previous block if there is one.
-          * (This isn't likely, but it can happen.)
-          */
-         nir_cf_node *prev_node = nir_cf_node_prev(&cursor.block->cf_node);
-         if (prev_node && prev_node->type == nir_cf_node_block) {
-            cursor.block = nir_cf_node_as_block(prev_node);
-            cursor.option = nir_cursor_after_block;
-         }
       }
       return cursor;
 
@@ -1379,15 +1373,18 @@ nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
       src_add_all_uses(dest->reg.indirect, instr, NULL);
 }
 
+/* note: does *not* take ownership of 'name' */
 void
 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
-                 unsigned num_components, const char *name)
+                 unsigned num_components,
+                 unsigned bit_size, const char *name)
 {
-   def->name = name;
+   def->name = ralloc_strdup(instr, name);
    def->parent_instr = instr;
    list_inithead(&def->uses);
    list_inithead(&def->if_uses);
    def->num_components = num_components;
+   def->bit_size = bit_size;
 
    if (instr->block) {
       nir_function_impl *impl =
@@ -1399,12 +1396,14 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
    }
 }
 
+/* note: does *not* take ownership of 'name' */
 void
 nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
-                 unsigned num_components, const char *name)
+                 unsigned num_components, unsigned bit_size,
+                 const char *name)
 {
    dest->is_ssa = true;
-   nir_ssa_def_init(instr, &dest->ssa, num_components, name);
+   nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
 }
 
 void
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ab1afdb..2fd75ec 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -101,6 +101,7 @@ union nir_constant_data {
    int i[16];
    float f[16];
    bool b[16];
+   double d[16];
 };
 
 typedef struct nir_constant {
@@ -381,6 +382,9 @@ typedef struct nir_register {
    unsigned num_components; /** < number of vector components */
    unsigned num_array_elems; /** < size of array (0 for no array) */
 
+   /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
+   uint8_t bit_size;
+
    /** generic register index. */
    unsigned index;
 
@@ -488,6 +492,9 @@ typedef struct nir_ssa_def {
    struct list_head if_uses;
 
    uint8_t num_components;
+
+   /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
+   uint8_t bit_size;
 } nir_ssa_def;
 
 struct nir_src;
@@ -594,6 +601,18 @@ nir_dest_for_reg(nir_register *reg)
    return dest;
 }
 
+static inline unsigned
+nir_src_bit_size(nir_src src)
+{
+   return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size;
+}
+
+static inline unsigned
+nir_dest_bit_size(nir_dest dest)
+{
+   return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size;
+}
+
 void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
 void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
 
@@ -649,9 +668,36 @@ typedef enum {
    nir_type_float,
    nir_type_int,
    nir_type_uint,
-   nir_type_bool
+   nir_type_bool,
+   nir_type_bool32 =    32 | nir_type_bool,
+   nir_type_int8 =      8  | nir_type_int,
+   nir_type_int16 =     16 | nir_type_int,
+   nir_type_int32 =     32 | nir_type_int,
+   nir_type_int64 =     64 | nir_type_int,
+   nir_type_uint8 =     8  | nir_type_uint,
+   nir_type_uint16 =    16 | nir_type_uint,
+   nir_type_uint32 =    32 | nir_type_uint,
+   nir_type_uint64 =    64 | nir_type_uint,
+   nir_type_float16 =   16 | nir_type_float,
+   nir_type_float32 =   32 | nir_type_float,
+   nir_type_float64 =   64 | nir_type_float,
 } nir_alu_type;
 
+#define NIR_ALU_TYPE_SIZE_MASK 0xfffffff8
+#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x00000007
+
+static inline unsigned
+nir_alu_type_get_type_size(nir_alu_type type)
+{
+   return type & NIR_ALU_TYPE_SIZE_MASK;
+}
+
+static inline unsigned
+nir_alu_type_get_base_type(nir_alu_type type)
+{
+   return type & NIR_ALU_TYPE_BASE_TYPE_MASK;
+}
+
 typedef enum {
    NIR_OP_IS_COMMUTATIVE = (1 << 0),
    NIR_OP_IS_ASSOCIATIVE = (1 << 1),
@@ -708,6 +754,17 @@ extern const nir_op_info nir_op_infos[nir_num_opcodes];
 typedef struct nir_alu_instr {
    nir_instr instr;
    nir_op op;
+
+   /** Indicates that this ALU instruction generates an exact value
+    *
+    * This is kind of a mixture of GLSL "precise" and "invariant" and not
+    * really equivalent to either.  This indicates that the value generated by
+    * this operation is high-precision and any code transformations that touch
+    * it must ensure that the resulting value is bit-for-bit identical to the
+    * original.
+    */
+   bool exact;
+
    nir_alu_dest dest;
    nir_alu_src src[];
 } nir_alu_instr;
@@ -1218,9 +1275,12 @@ nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
 
 typedef struct {
    union {
-      float f[4];
-      int32_t i[4];
-      uint32_t u[4];
+      float f32[4];
+      double f64[4];
+      int32_t i32[4];
+      uint32_t u32[4];
+      int64_t i64[4];
+      uint64_t u64[4];
    };
 } nir_const_value;
 
@@ -2061,9 +2121,11 @@ void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
                             nir_dest new_dest);
 
 void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
-                       unsigned num_components, const char *name);
+                       unsigned num_components, unsigned bit_size,
+                       const char *name);
 void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
-                      unsigned num_components, const char *name);
+                      unsigned num_components, unsigned bit_size,
+                      const char *name);
 void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
 void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
                                     nir_instr *after_me);
@@ -2094,9 +2156,10 @@ void nir_index_blocks(nir_function_impl *impl);
 void nir_print_shader(nir_shader *shader, FILE *fp);
 void nir_print_instr(const nir_instr *instr, FILE *fp);
 
-nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
+nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
 nir_function_impl *nir_function_impl_clone(const nir_function_impl *fi);
 nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
+nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
 
 #ifdef DEBUG
 void nir_validate_shader(nir_shader *shader);
diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py
index 2357b57..d05564f 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -63,12 +63,13 @@ class Value(object):
 static const ${val.c_type} ${val.name} = {
    { ${val.type_enum} },
 % if isinstance(val, Constant):
-   { ${hex(val)} /* ${val.value} */ },
+   ${val.type()}, { ${hex(val)} /* ${val.value} */ },
 % elif isinstance(val, Variable):
    ${val.index}, /* ${val.var_name} */
    ${'true' if val.is_constant else 'false'},
-   nir_type_${ val.required_type or 'invalid' },
+   ${val.type() or 'nir_type_invalid' },
 % elif isinstance(val, Expression):
+   ${'true' if val.inexact else 'false'},
    nir_op_${val.opcode},
    { ${', '.join(src.c_ptr for src in val.sources)} },
 % endif
@@ -107,10 +108,18 @@ class Constant(Value):
       if isinstance(self.value, (int, long)):
          return hex(self.value)
       elif isinstance(self.value, float):
-         return hex(struct.unpack('I', struct.pack('f', self.value))[0])
+         return hex(struct.unpack('Q', struct.pack('d', self.value))[0])
       else:
          assert False
 
+   def type(self):
+      if isinstance(self.value, (bool)):
+         return "nir_type_bool32"
+      elif isinstance(self.value, (int, long)):
+         return "nir_type_int"
+      elif isinstance(self.value, float):
+         return "nir_type_float"
+
 _var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?")
 
 class Variable(Value):
@@ -129,12 +138,26 @@ class Variable(Value):
 
       self.index = varset[self.var_name]
 
+   def type(self):
+      if self.required_type == 'bool':
+         return "nir_type_bool32"
+      elif self.required_type in ('int', 'unsigned'):
+         return "nir_type_int"
+      elif self.required_type == 'float':
+         return "nir_type_float"
+
+_opcode_re = re.compile(r"(?P<inexact>~)?(?P<opcode>\w+)")
+
 class Expression(Value):
    def __init__(self, expr, name_base, varset):
       Value.__init__(self, name_base, "expression")
       assert isinstance(expr, tuple)
 
-      self.opcode = expr[0]
+      m = _opcode_re.match(expr[0])
+      assert m and m.group('opcode') is not None
+
+      self.opcode = m.group('opcode')
+      self.inexact = m.group('inexact') is not None
       self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
                        for (i, src) in enumerate(expr[1:]) ]
 
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index b4dde54..94f183c 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -31,6 +31,9 @@ struct exec_list;
 typedef struct nir_builder {
    nir_cursor cursor;
 
+   /* Whether new ALU instructions will be marked "exact" */
+   bool exact;
+
    nir_shader *shader;
    nir_function_impl *impl;
 } nir_builder;
@@ -39,6 +42,7 @@ static inline void
 nir_builder_init(nir_builder *build, nir_function_impl *impl)
 {
    memset(build, 0, sizeof(*build));
+   build->exact = false;
    build->impl = impl;
    build->shader = impl->function->shader;
 }
@@ -50,6 +54,7 @@ nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx,
 {
    build->shader = nir_shader_create(mem_ctx, stage, options);
    nir_function *func = nir_function_create(build->shader, "main");
+   build->exact = false;
    build->impl = nir_function_impl_create(func);
    build->cursor = nir_after_cf_list(&build->impl->body);
 }
@@ -104,7 +109,7 @@ nir_imm_float(nir_builder *build, float x)
    nir_const_value v;
 
    memset(&v, 0, sizeof(v));
-   v.f[0] = x;
+   v.f32[0] = x;
 
    return nir_build_imm(build, 1, v);
 }
@@ -115,10 +120,10 @@ nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
    nir_const_value v;
 
    memset(&v, 0, sizeof(v));
-   v.f[0] = x;
-   v.f[1] = y;
-   v.f[2] = z;
-   v.f[3] = w;
+   v.f32[0] = x;
+   v.f32[1] = y;
+   v.f32[2] = z;
+   v.f32[3] = w;
 
    return nir_build_imm(build, 4, v);
 }
@@ -129,7 +134,7 @@ nir_imm_int(nir_builder *build, int x)
    nir_const_value v;
 
    memset(&v, 0, sizeof(v));
-   v.i[0] = x;
+   v.i32[0] = x;
 
    return nir_build_imm(build, 1, v);
 }
@@ -140,10 +145,10 @@ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
    nir_const_value v;
 
    memset(&v, 0, sizeof(v));
-   v.i[0] = x;
-   v.i[1] = y;
-   v.i[2] = z;
-   v.i[3] = w;
+   v.i32[0] = x;
+   v.i32[1] = y;
+   v.i32[2] = z;
+   v.i32[3] = w;
 
    return nir_build_imm(build, 4, v);
 }
@@ -157,6 +162,8 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
    if (!instr)
       return NULL;
 
+   instr->exact = build->exact;
+
    instr->src[0].src = nir_src_for_ssa(src0);
    if (src1)
       instr->src[1].src = nir_src_for_ssa(src1);
@@ -178,6 +185,25 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
    }
    assert(num_components != 0);
 
+   /* Figure out the bitwidth based on the source bitwidth if the instruction
+    * is variable-width.
+    */
+   unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
+   if (bit_size == 0) {
+      for (unsigned i = 0; i < op_info->num_inputs; i++) {
+         unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
+         if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
+            if (bit_size)
+               assert(src_bit_size == bit_size);
+            else
+               bit_size = src_bit_size;
+         } else {
+            assert(src_bit_size ==
+               nir_alu_type_get_type_size(op_info->input_types[i]));
+         }
+      }
+   }
+
    /* Make sure we don't swizzle from outside of our source vector (like if a
     * scalar value was passed into a multiply with a vector).
     */
@@ -187,7 +213,8 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
       }
    }
 
-   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
+                     bit_size, NULL);
    instr->dest.write_mask = (1 << num_components) - 1;
 
    nir_builder_instr_insert(build, &instr->instr);
@@ -252,7 +279,9 @@ static inline nir_ssa_def *
 nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
 {
    nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
-   nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+   nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,
+                     nir_src_bit_size(src.src), NULL);
+   mov->exact = build->exact;
    mov->dest.write_mask = (1 << num_components) - 1;
    mov->src[0] = src;
    nir_builder_instr_insert(build, &mov->instr);
@@ -264,7 +293,9 @@ static inline nir_ssa_def *
 nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
 {
    nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
-   nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+   nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,
+                     nir_src_bit_size(src.src), NULL);
+   mov->exact = build->exact;
    mov->dest.write_mask = (1 << num_components) - 1;
    mov->src[0] = src;
    nir_builder_instr_insert(build, &mov->instr);
@@ -360,7 +391,8 @@ nir_load_var(nir_builder *build, nir_variable *var)
       nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
    load->num_components = num_components;
    load->variables[0] = nir_deref_var_create(load, var);
-   nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+   nir_ssa_dest_init(&load->instr, &load->dest, num_components,
+                     glsl_get_bit_size(glsl_get_base_type(var->type)), NULL);
    nir_builder_instr_insert(build, &load->instr);
    return &load->dest.ssa;
 }
@@ -426,7 +458,7 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
    load->num_components = nir_intrinsic_infos[op].dest_components;
    load->const_index[0] = index;
    nir_ssa_dest_init(&load->instr, &load->dest,
-                     nir_intrinsic_infos[op].dest_components, NULL);
+                     nir_intrinsic_infos[op].dest_components, 32, NULL);
    nir_builder_instr_insert(build, &load->instr);
    return &load->dest.ssa;
 }
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 3268deb..7d2e383 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -127,11 +127,10 @@ nir_constant_clone(const nir_constant *c, nir_variable *nvar)
 /* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
  * having to deal with locals and globals separately:
  */
-static nir_variable *
-clone_variable(clone_state *state, const nir_variable *var)
+nir_variable *
+nir_variable_clone(const nir_variable *var, nir_shader *shader)
 {
-   nir_variable *nvar = rzalloc(state->ns, nir_variable);
-   add_remap(state, nvar, var);
+   nir_variable *nvar = rzalloc(shader, nir_variable);
 
    nvar->type = var->type;
    nvar->name = ralloc_strdup(nvar, var->name);
@@ -149,6 +148,15 @@ clone_variable(clone_state *state, const nir_variable *var)
    return nvar;
 }
 
+static nir_variable *
+clone_variable(clone_state *state, const nir_variable *var)
+{
+   nir_variable *nvar = nir_variable_clone(var, state->ns);
+   add_remap(state, nvar, var);
+
+   return nvar;
+}
+
 /* clone list of nir_variable: */
 static void
 clone_var_list(clone_state *state, struct exec_list *dst,
@@ -220,7 +228,8 @@ __clone_dst(clone_state *state, nir_instr *ninstr,
 {
    ndst->is_ssa = dst->is_ssa;
    if (dst->is_ssa) {
-      nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
+      nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components,
+                        dst->ssa.bit_size, dst->ssa.name);
       add_remap(state, &ndst->ssa, &dst->ssa);
    } else {
       ndst->reg.reg = remap_reg(state, dst->reg.reg);
@@ -303,6 +312,7 @@ static nir_alu_instr *
 clone_alu(clone_state *state, const nir_alu_instr *alu)
 {
    nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
+   nalu->exact = alu->exact;
 
    __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
    nalu->dest.saturate = alu->dest.saturate;
diff --git a/src/compiler/nir/nir_constant_expressions.h b/src/compiler/nir/nir_constant_expressions.h
index 97997f2..201f278 100644
--- a/src/compiler/nir/nir_constant_expressions.h
+++ b/src/compiler/nir/nir_constant_expressions.h
@@ -28,4 +28,4 @@
 #include "nir.h"
 
 nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
-                                      nir_const_value *src);
+                                      unsigned bit_size, nir_const_value *src);
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
index 32784f6..e36dc48 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -1,4 +1,43 @@
 #! /usr/bin/python2
+
+def type_has_size(type_):
+    return type_[-1:].isdigit()
+
+def type_sizes(type_):
+    if type_.endswith("8"):
+        return [8]
+    elif type_.endswith("16"):
+        return [16]
+    elif type_.endswith("32"):
+        return [32]
+    elif type_.endswith("64"):
+        return [64]
+    else:
+        return [32, 64]
+
+def type_add_size(type_, size):
+    if type_has_size(type_):
+        return type_
+    return type_ + str(size)
+
+def get_const_field(type_):
+    if type_ == "int32":
+        return "i32"
+    if type_ == "uint32":
+        return "u32"
+    if type_ == "int64":
+        return "i64"
+    if type_ == "uint64":
+        return "u64"
+    if type_ == "bool32":
+        return "u32"
+    if type_ == "float32":
+        return "f32"
+    if type_ == "float64":
+        return "f64"
+    raise Exception(str(type_))
+    assert(0)
+
 template = """\
 /*
  * Copyright (C) 2014 Intel Corporation
@@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u)
 }
 
 /* Some typed vector structures to make things like src0.y work */
-% for type in ["float", "int", "uint", "bool"]:
-struct ${type}_vec {
-   ${type} x;
-   ${type} y;
-   ${type} z;
-   ${type} w;
+typedef float float32_t;
+typedef double float64_t;
+typedef bool bool32_t;
+% for type in ["float", "int", "uint"]:
+% for width in [32, 64]:
+struct ${type}${width}_vec {
+   ${type}${width}_t x;
+   ${type}${width}_t y;
+   ${type}${width}_t z;
+   ${type}${width}_t w;
 };
 % endfor
+% endfor
+
+struct bool32_vec {
+    bool x;
+    bool y;
+    bool z;
+    bool w;
+};
 
 % for name, op in sorted(opcodes.iteritems()):
 static nir_const_value
-evaluate_${name}(unsigned num_components, nir_const_value *_src)
+evaluate_${name}(unsigned num_components, unsigned bit_size,
+                 nir_const_value *_src)
 {
    nir_const_value _dst_val = { { {0, 0, 0, 0} } };
 
-   ## For each non-per-component input, create a variable srcN that
-   ## contains x, y, z, and w elements which are filled in with the
-   ## appropriately-typed values.
-   % for j in range(op.num_inputs):
-      % if op.input_sizes[j] == 0:
-         <% continue %>
-      % elif "src" + str(j) not in op.const_expr:
-         ## Avoid unused variable warnings
-         <% continue %>
-      %endif
-
-      struct ${op.input_types[j]}_vec src${j} = {
-      % for k in range(op.input_sizes[j]):
-         % if op.input_types[j] == "bool":
-            _src[${j}].u[${k}] != 0,
-         % else:
-            _src[${j}].${op.input_types[j][:1]}[${k}],
-         % endif
-      % endfor
-      };
-   % endfor
+   switch (bit_size) {
+   % for bit_size in [32, 64]:
+   case ${bit_size}: {
+      <%
+      output_type = type_add_size(op.output_type, bit_size)
+      input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
+      %>
+
+      ## For each non-per-component input, create a variable srcN that
+      ## contains x, y, z, and w elements which are filled in with the
+      ## appropriately-typed values.
+      % for j in range(op.num_inputs):
+         % if op.input_sizes[j] == 0:
+            <% continue %>
+         % elif "src" + str(j) not in op.const_expr:
+            ## Avoid unused variable warnings
+            <% continue %>
+         %endif
 
-   % if op.output_size == 0:
-      ## For per-component instructions, we need to iterate over the
-      ## components and apply the constant expression one component
-      ## at a time.
-      for (unsigned _i = 0; _i < num_components; _i++) {
-         ## For each per-component input, create a variable srcN that
-         ## contains the value of the current (_i'th) component.
-         % for j in range(op.num_inputs):
-            % if op.input_sizes[j] != 0:
-               <% continue %>
-            % elif "src" + str(j) not in op.const_expr:
-               ## Avoid unused variable warnings
-               <% continue %>
-            % elif op.input_types[j] == "bool":
-               bool src${j} = _src[${j}].u[_i] != 0;
+         struct ${input_types[j]}_vec src${j} = {
+         % for k in range(op.input_sizes[j]):
+            % if input_types[j] == "bool32":
+               _src[${j}].u32[${k}] != 0,
             % else:
-               ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i];
+               _src[${j}].${get_const_field(input_types[j])}[${k}],
             % endif
          % endfor
+         };
+      % endfor
+
+      % if op.output_size == 0:
+         ## For per-component instructions, we need to iterate over the
+         ## components and apply the constant expression one component
+         ## at a time.
+         for (unsigned _i = 0; _i < num_components; _i++) {
+            ## For each per-component input, create a variable srcN that
+            ## contains the value of the current (_i'th) component.
+            % for j in range(op.num_inputs):
+               % if op.input_sizes[j] != 0:
+                  <% continue %>
+               % elif "src" + str(j) not in op.const_expr:
+                  ## Avoid unused variable warnings
+                  <% continue %>
+               % elif input_types[j] == "bool32":
+                  bool src${j} = _src[${j}].u32[_i] != 0;
+               % else:
+                  ${input_types[j]}_t src${j} =
+                     _src[${j}].${get_const_field(input_types[j])}[_i];
+               % endif
+            % endfor
+
+            ## Create an appropriately-typed variable dst and assign the
+            ## result of the const_expr to it.  If const_expr already contains
+            ## writes to dst, just include const_expr directly.
+            % if "dst" in op.const_expr:
+               ${output_type}_t dst;
+               ${op.const_expr}
+            % else:
+               ${output_type}_t dst = ${op.const_expr};
+            % endif
+
+            ## Store the current component of the actual destination to the
+            ## value of dst.
+            % if output_type == "bool32":
+               ## Sanitize the C value to a proper NIR bool
+               _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
+            % else:
+               _dst_val.${get_const_field(output_type)}[_i] = dst;
+            % endif
+         }
+      % else:
+         ## In the non-per-component case, create a struct dst with
+         ## appropriately-typed elements x, y, z, and w and assign the result
+         ## of the const_expr to all components of dst, or include the
+         ## const_expr directly if it writes to dst already.
+         struct ${output_type}_vec dst;
 
-         ## Create an appropriately-typed variable dst and assign the
-         ## result of the const_expr to it.  If const_expr already contains
-         ## writes to dst, just include const_expr directly.
          % if "dst" in op.const_expr:
-            ${op.output_type} dst;
             ${op.const_expr}
          % else:
-            ${op.output_type} dst = ${op.const_expr};
+            ## Splat the value to all components.  This way expressions which
+            ## write the same value to all components don't need to explicitly
+            ## write to dest.  One such example is fnoise which has a
+            ## const_expr of 0.0f.
+            dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
          % endif
 
-         ## Store the current component of the actual destination to the
-         ## value of dst.
-         % if op.output_type == "bool":
-            ## Sanitize the C value to a proper NIR bool
-            _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
-         % else:
-            _dst_val.${op.output_type[:1]}[_i] = dst;
-         % endif
-      }
-   % else:
-      ## In the non-per-component case, create a struct dst with
-      ## appropriately-typed elements x, y, z, and w and assign the result
-      ## of the const_expr to all components of dst, or include the
-      ## const_expr directly if it writes to dst already.
-      struct ${op.output_type}_vec dst;
-
-      % if "dst" in op.const_expr:
-         ${op.const_expr}
-      % else:
-         ## Splat the value to all components.  This way expressions which
-         ## write the same value to all components don't need to explicitly
-         ## write to dest.  One such example is fnoise which has a
-         ## const_expr of 0.0f.
-         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
+         ## For each component in the destination, copy the value of dst to
+         ## the actual destination.
+         % for k in range(op.output_size):
+            % if output_type == "bool32":
+               ## Sanitize the C value to a proper NIR bool
+               _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
+            % else:
+               _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
+            % endif
+         % endfor
       % endif
 
-      ## For each component in the destination, copy the value of dst to
-      ## the actual destination.
-      % for k in range(op.output_size):
-         % if op.output_type == "bool":
-            ## Sanitize the C value to a proper NIR bool
-            _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
-         % else:
-            _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
-         % endif
-      % endfor
-   % endif
+      break;
+   }
+   % endfor
+
+   default:
+      unreachable("unknown bit width");
+   }
 
    return _dst_val;
 }
@@ -316,12 +385,12 @@ evaluate_${name}(unsigned num_components, nir_const_value *_src)
 
 nir_const_value
 nir_eval_const_opcode(nir_op op, unsigned num_components,
-                      nir_const_value *src)
+                      unsigned bit_width, nir_const_value *src)
 {
    switch (op) {
 % for name in sorted(opcodes.iterkeys()):
    case nir_op_${name}: {
-      return evaluate_${name}(num_components, src);
+      return evaluate_${name}(num_components, bit_width, src);
       break;
    }
 % endfor
@@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned num_components,
 from nir_opcodes import opcodes
 from mako.template import Template
 
-print Template(template).render(opcodes=opcodes)
+print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
+                                type_has_size=type_has_size,
+                                type_add_size=type_add_size,
+                                get_const_field=get_const_field)
diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c
index 8bc9f24..82317c2 100644
--- a/src/compiler/nir/nir_from_ssa.c
+++ b/src/compiler/nir/nir_from_ssa.c
@@ -342,7 +342,8 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
          nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
                                                   nir_parallel_copy_entry);
          nir_ssa_dest_init(&pcopy->instr, &entry->dest,
-                           phi->dest.ssa.num_components, src->src.ssa->name);
+                           phi->dest.ssa.num_components,
+                           phi->dest.ssa.bit_size, src->src.ssa->name);
          exec_list_push_tail(&pcopy->entries, &entry->node);
 
          assert(src->src.is_ssa);
@@ -355,7 +356,8 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
       nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
                                                nir_parallel_copy_entry);
       nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
-                        phi->dest.ssa.num_components, phi->dest.ssa.name);
+                        phi->dest.ssa.num_components, phi->dest.ssa.bit_size,
+                        phi->dest.ssa.name);
       exec_list_push_tail(&block_pcopy->entries, &entry->node);
 
       nir_ssa_def_rewrite_uses(&phi->dest.ssa,
diff --git a/src/compiler/nir/nir_gs_count_vertices.c b/src/compiler/nir/nir_gs_count_vertices.c
index db15d16..3c1bd2a 100644
--- a/src/compiler/nir/nir_gs_count_vertices.c
+++ b/src/compiler/nir/nir_gs_count_vertices.c
@@ -77,13 +77,13 @@ nir_gs_count_vertices(const nir_shader *shader)
                return -1;
 
             if (count == -1)
-               count = val->i[0];
+               count = val->i32[0];
 
             /* We've found contradictory set_vertex_count intrinsics.
              * This can happen if there are early-returns in main() and
              * different paths emit different numbers of vertices.
              */
-            if (count != val->i[0])
+            if (count != val->i32[0])
                return -1;
          }
       }
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index 159ded0..e244122 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -52,6 +52,7 @@ hash_alu(uint32_t hash, const nir_alu_instr *instr)
 {
    hash = HASH(hash, instr->op);
    hash = HASH(hash, instr->dest.dest.ssa.num_components);
+   /* We explicitly don't hash instr->dest.dest.exact */
 
    if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
       assert(nir_op_infos[instr->op].num_inputs == 2);
@@ -81,9 +82,9 @@ hash_load_const(uint32_t hash, const nir_load_const_instr *instr)
 {
    hash = HASH(hash, instr->def.num_components);
 
-   hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f,
+   hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32,
                                           instr->def.num_components
-                                             * sizeof(instr->value.f[0]));
+                                             * sizeof(instr->value.f32[0]));
 
    return hash;
 }
@@ -267,6 +268,8 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
       if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
          return false;
 
+      /* We explicitly don't hash instr->dest.dest.exact */
+
       if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
          assert(nir_op_infos[alu1->op].num_inputs == 2);
          return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
@@ -322,8 +325,8 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
       if (load1->def.num_components != load2->def.num_components)
          return false;
 
-      return memcmp(load1->value.f, load2->value.f,
-                    load1->def.num_components * sizeof(*load2->value.f)) == 0;
+      return memcmp(load1->value.f32, load2->value.f32,
+                    load1->def.num_components * sizeof(*load2->value.f32)) == 0;
    }
    case nir_instr_type_phi: {
       nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
@@ -496,8 +499,17 @@ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr)
    struct set_entry *entry = _mesa_set_search(instr_set, instr);
    if (entry) {
       nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr);
-      nir_ssa_def *new_def =
-         nir_instr_get_dest_ssa_def((nir_instr *) entry->key);
+      nir_instr *match = (nir_instr *) entry->key;
+      nir_ssa_def *new_def = nir_instr_get_dest_ssa_def(match);
+
+      /* It's safe to replace a exact instruction with an inexact one as
+       * long as we make it exact.  If we got here, the two instructions are
+       * exactly identical in every other way so, once we've set the exact
+       * bit, they are the same.
+       */
+      if (instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->exact)
+         nir_instr_as_alu(match)->exact = true;
+
       nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
       return true;
    }
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 312d2f9..e8ba640 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -31,9 +31,11 @@
  */
 
 static void
-nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
+nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components,
+                      unsigned bit_size)
 {
-   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
+                     bit_size, NULL);
    instr->dest.write_mask = (1 << num_components) - 1;
 }
 
@@ -46,7 +48,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
    nir_ssa_def *last = NULL;
    for (unsigned i = 0; i < num_components; i++) {
       nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
-      nir_alu_ssa_dest_init(chan, 1);
+      nir_alu_ssa_dest_init(chan, 1, instr->dest.dest.ssa.bit_size);
       nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
       chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
       if (nir_op_infos[chan_op].num_inputs > 1) {
@@ -80,6 +82,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
    assert(instr->dest.write_mask != 0);
 
    b->cursor = nir_before_instr(&instr->instr);
+   b->exact = instr->exact;
 
 #define LOWER_REDUCTION(name, chan, merge) \
    case name##2: \
@@ -220,7 +223,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
             lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
       }
 
-      nir_alu_ssa_dest_init(lower, 1);
+      nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size);
       lower->dest.saturate = instr->dest.saturate;
       comps[chan] = &lower->dest.dest.ssa;
 
diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c
index eefcb55..70381a7 100644
--- a/src/compiler/nir/nir_lower_atomics.c
+++ b/src/compiler/nir/nir_lower_atomics.c
@@ -75,7 +75,7 @@ lower_instr(nir_intrinsic_instr *instr,
       state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index);
 
    nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
-   offset_const->value.u[0] = instr->variables[0]->var->data.offset;
+   offset_const->value.u32[0] = instr->variables[0]->var->data.offset;
 
    nir_instr_insert_before(&instr->instr, &offset_const->instr);
 
@@ -90,17 +90,17 @@ lower_instr(nir_intrinsic_instr *instr,
       unsigned child_array_elements = tail->child != NULL ?
          glsl_get_aoa_size(tail->type) : 1;
 
-      offset_const->value.u[0] += deref_array->base_offset *
+      offset_const->value.u32[0] += deref_array->base_offset *
          child_array_elements * ATOMIC_COUNTER_SIZE;
 
       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
          nir_load_const_instr *atomic_counter_size =
                nir_load_const_instr_create(mem_ctx, 1);
-         atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
+         atomic_counter_size->value.u32[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
          nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
 
          nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
-         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
          mul->dest.write_mask = 0x1;
          nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
          mul->src[1].src.is_ssa = true;
@@ -108,7 +108,7 @@ lower_instr(nir_intrinsic_instr *instr,
          nir_instr_insert_before(&instr->instr, &mul->instr);
 
          nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
-         nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+         nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, 32, NULL);
          add->dest.write_mask = 0x1;
          add->src[0].src.is_ssa = true;
          add->src[0].src.ssa = &mul->dest.dest.ssa;
@@ -125,7 +125,7 @@ lower_instr(nir_intrinsic_instr *instr,
 
    if (instr->dest.is_ssa) {
       nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
-                        instr->dest.ssa.num_components, NULL);
+                        instr->dest.ssa.num_components, 32, NULL);
       nir_ssa_def_rewrite_uses(&instr->dest.ssa,
                                nir_src_for_ssa(&new_instr->dest.ssa));
    } else {
diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c
index bcbad53..c711230 100644
--- a/src/compiler/nir/nir_lower_clip.c
+++ b/src/compiler/nir/nir_lower_clip.c
@@ -88,7 +88,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
    load->num_components = 4;
    nir_intrinsic_set_base(load, in->data.driver_location);
    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
-   nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+   nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
    nir_builder_instr_insert(b, &load->instr);
 
    val[0] = nir_channel(b, &load->dest.ssa, 0);
diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c
index a4affa7..62b8c84 100644
--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@ -75,8 +75,9 @@ emit_indirect_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
       if (src == NULL) {
          /* We're a load.  We need to insert a phi node */
          nir_phi_instr *phi = nir_phi_instr_create(b->shader);
+         unsigned bit_size = then_dest->bit_size;
          nir_ssa_dest_init(&phi->instr, &phi->dest,
-                           then_dest->num_components, NULL);
+                           then_dest->num_components, bit_size, NULL);
 
          nir_phi_src *src0 = ralloc(phi, nir_phi_src);
          src0->pred = nir_cf_node_as_block(nir_if_last_then_node(if_stmt));
@@ -125,8 +126,9 @@ emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
       load->num_components = orig_instr->num_components;
       load->variables[0] =
          nir_deref_as_var(nir_copy_deref(load, &deref->deref));
+      unsigned bit_size = orig_instr->dest.ssa.bit_size;
       nir_ssa_dest_init(&load->instr, &load->dest,
-                        load->num_components, NULL);
+                        load->num_components, bit_size, NULL);
       nir_builder_instr_insert(b, &load->instr);
       *dest = &load->dest.ssa;
    } else {
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index 9d502ee..a30061d 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -289,7 +289,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
 
          if (intrin->dest.is_ssa) {
             nir_ssa_dest_init(&load->instr, &load->dest,
-                              intrin->num_components, NULL);
+                              intrin->num_components,
+                              intrin->dest.ssa.bit_size, NULL);
             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
                                      nir_src_for_ssa(&load->dest.ssa));
          } else {
@@ -369,7 +370,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
 
          if (intrin->dest.is_ssa) {
             nir_ssa_dest_init(&atomic->instr, &atomic->dest,
-                              intrin->dest.ssa.num_components, NULL);
+                              intrin->dest.ssa.num_components,
+                              intrin->dest.ssa.bit_size, NULL);
             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
                                      nir_src_for_ssa(&atomic->dest.ssa));
          } else {
diff --git a/src/compiler/nir/nir_lower_load_const_to_scalar.c b/src/compiler/nir/nir_lower_load_const_to_scalar.c
index 1eeed13..b5df464 100644
--- a/src/compiler/nir/nir_lower_load_const_to_scalar.c
+++ b/src/compiler/nir/nir_lower_load_const_to_scalar.c
@@ -49,7 +49,7 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
    nir_ssa_def *loads[4];
    for (unsigned i = 0; i < lower->def.num_components; i++) {
       nir_load_const_instr *load_comp = nir_load_const_instr_create(b.shader, 1);
-      load_comp->value.u[0] = lower->value.u[i];
+      load_comp->value.u32[0] = lower->value.u32[i];
       nir_builder_instr_insert(&b, &load_comp->instr);
       loads[i] = &load_comp->def;
    }
diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c
index 45036fa..0438802 100644
--- a/src/compiler/nir/nir_lower_locals_to_regs.c
+++ b/src/compiler/nir/nir_lower_locals_to_regs.c
@@ -161,7 +161,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
       if (src.reg.indirect) {
          nir_load_const_instr *load_const =
             nir_load_const_instr_create(state->shader, 1);
-         load_const->value.u[0] = glsl_get_length(parent_type);
+         load_const->value.u32[0] = glsl_get_length(parent_type);
          nir_instr_insert_before(instr, &load_const->instr);
 
          nir_alu_instr *mul = nir_alu_instr_create(state->shader, nir_op_imul);
@@ -169,7 +169,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
          mul->src[1].src.is_ssa = true;
          mul->src[1].src.ssa = &load_const->def;
          mul->dest.write_mask = 1;
-         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
          nir_instr_insert_before(instr, &mul->instr);
 
          src.reg.indirect->is_ssa = true;
@@ -187,7 +187,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
             add->src[0].src = *src.reg.indirect;
             nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
             add->dest.write_mask = 1;
-            nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+            nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, 32, NULL);
             nir_instr_insert_before(instr, &add->instr);
 
             src.reg.indirect->is_ssa = true;
@@ -221,7 +221,8 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
          mov->dest.write_mask = (1 << intrin->num_components) - 1;
          if (intrin->dest.is_ssa) {
             nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
-                              intrin->num_components, NULL);
+                              intrin->num_components,
+                              intrin->dest.ssa.bit_size, NULL);
             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
                                      nir_src_for_ssa(&mov->dest.dest.ssa));
          } else {
diff --git a/src/compiler/nir/nir_lower_phis_to_scalar.c b/src/compiler/nir/nir_lower_phis_to_scalar.c
index dd2abcf..026c866 100644
--- a/src/compiler/nir/nir_lower_phis_to_scalar.c
+++ b/src/compiler/nir/nir_lower_phis_to_scalar.c
@@ -188,6 +188,8 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
       if (!should_lower_phi(phi, state))
          continue;
 
+      unsigned bit_size = phi->dest.ssa.bit_size;
+
       /* Create a vecN operation to combine the results.  Most of these
        * will be redundant, but copy propagation should clean them up for
        * us.  No need to add the complexity here.
@@ -202,12 +204,14 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
 
       nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
       nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
-                        phi->dest.ssa.num_components, NULL);
+                        phi->dest.ssa.num_components,
+                        bit_size, NULL);
       vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
 
       for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) {
          nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx);
-         nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL);
+         nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1,
+                           phi->dest.ssa.bit_size, NULL);
 
          vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa);
 
@@ -215,7 +219,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
             /* We need to insert a mov to grab the i'th component of src */
             nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
                                                       nir_op_imov);
-            nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, bit_size, NULL);
             mov->dest.write_mask = 1;
             nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);
             mov->src[0].swizzle[0] = i;
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c
index 79f6bed..c1cd139 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -65,9 +65,9 @@ convert_block(nir_block *block, void *void_state)
           */
 
          nir_const_value local_size;
-         local_size.u[0] = b->shader->info.cs.local_size[0];
-         local_size.u[1] = b->shader->info.cs.local_size[1];
-         local_size.u[2] = b->shader->info.cs.local_size[2];
+         local_size.u32[0] = b->shader->info.cs.local_size[0];
+         local_size.u32[1] = b->shader->info.cs.local_size[1];
+         local_size.u32[2] = b->shader->info.cs.local_size[2];
 
          nir_ssa_def *group_id =
             nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0);
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index 806acd8..4999603 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -140,7 +140,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
    txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
    txs->src[0].src_type = nir_tex_src_lod;
 
-   nir_ssa_dest_init(&txs->instr, &txs->dest, 2, NULL);
+   nir_ssa_dest_init(&txs->instr, &txs->dest, 2, 32, NULL);
    nir_builder_instr_insert(b, &txs->instr);
 
    return nir_i2f(b, &txs->dest.ssa);
@@ -223,13 +223,13 @@ get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
    memset(&v, 0, sizeof(v));
 
    if (swizzle_val == 4) {
-      v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0;
+      v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 0;
    } else {
       assert(swizzle_val == 5);
       if (type == nir_type_float)
-         v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0;
+         v.f32[0] = v.f32[1] = v.f32[2] = v.f32[3] = 1.0;
       else
-         v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1;
+         v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 1;
    }
 
    return nir_build_imm(b, 4, v);
diff --git a/src/compiler/nir/nir_lower_two_sided_color.c b/src/compiler/nir/nir_lower_two_sided_color.c
index fe3507c..c7fb67e 100644
--- a/src/compiler/nir/nir_lower_two_sided_color.c
+++ b/src/compiler/nir/nir_lower_two_sided_color.c
@@ -74,7 +74,7 @@ load_input(nir_builder *b, nir_variable *in)
    load->num_components = 4;
    nir_intrinsic_set_base(load, in->data.driver_location);
    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
-   nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+   nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
    nir_builder_instr_insert(b, &load->instr);
 
    return &load->dest.ssa;
diff --git a/src/compiler/nir/nir_lower_var_copies.c b/src/compiler/nir/nir_lower_var_copies.c
index 7db9839..c994f0f 100644
--- a/src/compiler/nir/nir_lower_var_copies.c
+++ b/src/compiler/nir/nir_lower_var_copies.c
@@ -116,12 +116,15 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
       assert(src_tail->type == dest_tail->type);
 
       unsigned num_components = glsl_get_vector_elements(src_tail->type);
+      unsigned bit_size =
+         glsl_get_bit_size(glsl_get_base_type(src_tail->type));
 
       nir_intrinsic_instr *load =
          nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
       load->num_components = num_components;
       load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
-      nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+      nir_ssa_dest_init(&load->instr, &load->dest, num_components, bit_size,
+                        NULL);
 
       nir_instr_insert_before(&copy_instr->instr, &load->instr);
 
diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c
index a3f3fcf..9f9e454 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -505,6 +505,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
             nir_ssa_undef_instr *undef =
                nir_ssa_undef_instr_create(state->shader,
                                           intrin->num_components);
+            undef->def.bit_size = intrin->dest.ssa.bit_size;
 
             nir_instr_insert_before(&intrin->instr, &undef->instr);
             nir_instr_remove(&intrin->instr);
@@ -528,7 +529,8 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
 
          mov->dest.write_mask = (1 << intrin->num_components) - 1;
          nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
-                           intrin->num_components, NULL);
+                           intrin->num_components,
+                           intrin->dest.ssa.bit_size, NULL);
 
          nir_instr_insert_before(&intrin->instr, &mov->instr);
          nir_instr_remove(&intrin->instr);
@@ -719,6 +721,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
       node->pb_value =
          nir_phi_builder_add_value(state.phi_builder,
                                    glsl_get_vector_elements(node->type),
+                                   glsl_get_bit_size(glsl_get_base_type(node->type)),
                                    store_blocks);
 
       if (node->deref->var->constant_initializer) {
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 60ade4a..d6b658d 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -90,8 +90,12 @@ class Opcode(object):
 # helper variables for strings
 tfloat = "float"
 tint = "int"
-tbool = "bool"
+tbool = "bool32"
 tuint = "uint"
+tfloat32 = "float32"
+tint32 = "int32"
+tuint32 = "uint32"
+tfloat64 = "float64"
 
 commutative = "commutative "
 associative = "associative "
@@ -155,57 +159,57 @@ unop("frsq", tfloat, "1.0f / sqrtf(src0)")
 unop("fsqrt", tfloat, "sqrtf(src0)")
 unop("fexp2", tfloat, "exp2f(src0)")
 unop("flog2", tfloat, "log2f(src0)")
-unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion.
-unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion
-unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion.
+unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion.
+unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
+unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
 # Float-to-boolean conversion
-unop_convert("f2b", tbool, tfloat, "src0 != 0.0f")
+unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
 # Boolean-to-float conversion
-unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f")
+unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
 # Int-to-boolean conversion
-unop_convert("i2b", tbool, tint, "src0 != 0")
-unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
-unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion.
+unop_convert("i2b", tbool, tint32, "src0 != 0")
+unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
+unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
 
 # Unary floating-point rounding operations.
 
 
-unop("ftrunc", tfloat, "truncf(src0)")
-unop("fceil", tfloat, "ceilf(src0)")
-unop("ffloor", tfloat, "floorf(src0)")
-unop("ffract", tfloat, "src0 - floorf(src0)")
-unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+unop("ftrunc", tfloat, "bit_size == 64 ? trunc(src0) : truncf(src0)")
+unop("fceil", tfloat, "bit_size == 64 ? ceil(src0) : ceilf(src0)")
+unop("ffloor", tfloat, "bit_size == 64 ? floor(src0) : floorf(src0)")
+unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
+unop("fround_even", tfloat, "bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0)")
 
 unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))")
 
 # Trigonometric operations.
 
 
-unop("fsin", tfloat, "sinf(src0)")
-unop("fcos", tfloat, "cosf(src0)")
+unop("fsin", tfloat, "bit_size == 64 ? sin(src0) : sinf(src0)")
+unop("fcos", tfloat, "bit_size == 64 ? cos(src0) : cosf(src0)")
 
 
 # Partial derivatives.
 
 
-unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
-unop("fddy", tfloat, "0.0f")
-unop("fddx_fine", tfloat, "0.0f")
-unop("fddy_fine", tfloat, "0.0f")
-unop("fddx_coarse", tfloat, "0.0f")
-unop("fddy_coarse", tfloat, "0.0f")
+unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
+unop("fddy", tfloat, "0.0")
+unop("fddx_fine", tfloat, "0.0")
+unop("fddy_fine", tfloat, "0.0")
+unop("fddx_coarse", tfloat, "0.0")
+unop("fddy_coarse", tfloat, "0.0")
 
 
 # Floating point pack and unpack operations.
 
 def pack_2x16(fmt):
-   unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
+   unop_horiz("pack_" + fmt + "_2x16", 1, tuint32, 2, tfloat32, """
 dst.x = (uint32_t) pack_fmt_1x16(src0.x);
 dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
 """.replace("fmt", fmt))
 
 def pack_4x8(fmt):
-   unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
+   unop_horiz("pack_" + fmt + "_4x8", 1, tuint32, 4, tfloat32, """
 dst.x = (uint32_t) pack_fmt_1x8(src0.x);
 dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
 dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
@@ -213,13 +217,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
 """.replace("fmt", fmt))
 
 def unpack_2x16(fmt):
-   unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
+   unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat32, 1, tuint32, """
 dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
 dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
 """.replace("fmt", fmt))
 
 def unpack_4x8(fmt):
-   unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
+   unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat32, 1, tuint32, """
 dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
 dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
 dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
@@ -238,11 +242,11 @@ unpack_2x16("unorm")
 unpack_4x8("unorm")
 unpack_2x16("half")
 
-unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
+unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
 dst.x = (src0.x & 0xffff) | (src0.y >> 16);
 """)
 
-unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
+unop_horiz("pack_uvec4_to_uint", 1, tuint32, 4, tuint32, """
 dst.x = (src0.x <<  0) |
         (src0.y <<  8) |
         (src0.z << 16) |
@@ -252,22 +256,22 @@ dst.x = (src0.x <<  0) |
 # Lowered floating point unpacking operations.
 
 
-unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat32, 1, tuint32,
            "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
-unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
            "unpack_half_1x16((uint16_t)(src0.x >> 16))")
 
 
 # Bit operations, part of ARB_gpu_shader5.
 
 
-unop("bitfield_reverse", tuint, """
+unop("bitfield_reverse", tuint32, """
 /* we're not winning any awards for speed here, but that's ok */
 dst = 0;
 for (unsigned bit = 0; bit < 32; bit++)
    dst |= ((src0 >> bit) & 1) << (31 - bit);
 """)
-unop("bit_count", tuint, """
+unop("bit_count", tuint32, """
 dst = 0;
 for (unsigned bit = 0; bit < 32; bit++) {
    if ((src0 >> bit) & 1)
@@ -275,7 +279,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
 }
 """)
 
-unop_convert("ufind_msb", tint, tuint, """
+unop_convert("ufind_msb", tint32, tuint32, """
 dst = -1;
 for (int bit = 31; bit > 0; bit--) {
    if ((src0 >> bit) & 1) {
@@ -285,7 +289,7 @@ for (int bit = 31; bit > 0; bit--) {
 }
 """)
 
-unop("ifind_msb", tint, """
+unop("ifind_msb", tint32, """
 dst = -1;
 for (int bit = 31; bit >= 0; bit--) {
    /* If src0 < 0, we're looking for the first 0 bit.
@@ -299,7 +303,7 @@ for (int bit = 31; bit >= 0; bit--) {
 }
 """)
 
-unop("find_lsb", tint, """
+unop("find_lsb", tint32, """
 dst = -1;
 for (unsigned bit = 0; bit < 32; bit++) {
    if ((src0 >> bit) & 1) {
@@ -359,10 +363,10 @@ binop("fmul", tfloat, commutative + associative, "src0 * src1")
 # low 32-bits of signed/unsigned integer multiply
 binop("imul", tint, commutative + associative, "src0 * src1")
 # high 32-bits of signed integer multiply
-binop("imul_high", tint, commutative,
+binop("imul_high", tint32, commutative,
       "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
 # high 32-bits of unsigned integer multiply
-binop("umul_high", tuint, commutative,
+binop("umul_high", tuint32, commutative,
       "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
 
 binop("fdiv", tfloat, "", "src0 / src1")
@@ -427,18 +431,18 @@ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
 
 # non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
 
-binop_reduce("fall_equal",  1, tfloat, tfloat, "{src0} == {src1}",
+binop_reduce("fall_equal",  1, tfloat32, tfloat32, "{src0} == {src1}",
              "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
-binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
+binop_reduce("fany_nequal", 1, tfloat32, tfloat32, "{src0} != {src1}",
              "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
 
 # These comparisons for integer-less hardware return 1.0 and 0.0 for true
 # and false respectively
 
-binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
-binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
-binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
-binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+binop("slt", tfloat32, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+binop("sge", tfloat32, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+binop("seq", tfloat32, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+binop("sne", tfloat32, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
 
 
 binop("ishl", tint, "", "src0 << src1")
@@ -461,11 +465,11 @@ binop("ixor", tuint, commutative + associative, "src0 ^ src1")
 # These use (src != 0.0) for testing the truth of the input, and output 1.0
 # for true and 0.0 for false
 
-binop("fand", tfloat, commutative,
+binop("fand", tfloat32, commutative,
       "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
-binop("for", tfloat, commutative,
+binop("for", tfloat32, commutative,
       "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
-binop("fxor", tfloat, commutative,
+binop("fxor", tfloat32, commutative,
       "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
 
 binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
@@ -487,7 +491,7 @@ binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
 binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
 
 # Saturated vector add for 4 8bit ints.
-binop("usadd_4x8", tint, commutative + associative, """
+binop("usadd_4x8", tint32, commutative + associative, """
 dst = 0;
 for (int i = 0; i < 32; i += 8) {
    dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
@@ -495,7 +499,7 @@ for (int i = 0; i < 32; i += 8) {
 """)
 
 # Saturated vector subtract for 4 8bit ints.
-binop("ussub_4x8", tint, "", """
+binop("ussub_4x8", tint32, "", """
 dst = 0;
 for (int i = 0; i < 32; i += 8) {
    int src0_chan = (src0 >> i) & 0xff;
@@ -506,7 +510,7 @@ for (int i = 0; i < 32; i += 8) {
 """)
 
 # vector min for 4 8bit ints.
-binop("umin_4x8", tint, commutative + associative, """
+binop("umin_4x8", tint32, commutative + associative, """
 dst = 0;
 for (int i = 0; i < 32; i += 8) {
    dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
@@ -514,7 +518,7 @@ for (int i = 0; i < 32; i += 8) {
 """)
 
 # vector max for 4 8bit ints.
-binop("umax_4x8", tint, commutative + associative, """
+binop("umax_4x8", tint32, commutative + associative, """
 dst = 0;
 for (int i = 0; i < 32; i += 8) {
    dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
@@ -522,7 +526,7 @@ for (int i = 0; i < 32; i += 8) {
 """)
 
 # unorm multiply: (a * b) / 255.
-binop("umul_unorm_4x8", tint, commutative + associative, """
+binop("umul_unorm_4x8", tint32, commutative + associative, """
 dst = 0;
 for (int i = 0; i < 32; i += 8) {
    int src0_chan = (src0 >> i) & 0xff;
@@ -531,15 +535,15 @@ for (int i = 0; i < 32; i += 8) {
 }
 """)
 
-binop("fpow", tfloat, "", "powf(src0, src1)")
+binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
 
-binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
+binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
             "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
 
 # bfm implements the behavior of the first operation of the SM5 "bfi" assembly
 # and that of the "bfi1" i965 instruction. That is, it has undefined behavior
 # if either of its arguments are 32.
-binop_convert("bfm", tuint, tint, "", """
+binop_convert("bfm", tuint32, tint32, "", """
 int bits = src0, offset = src1;
 if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
    dst = 0; /* undefined */
@@ -548,7 +552,7 @@ else
 """)
 
 opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
-dst = ldexpf(src0, src1);
+dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1);
 /* flush denormals to zero. */
 if (!isnormal(dst))
    dst = copysignf(0.0f, src0);
@@ -588,12 +592,12 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
 # bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
 
 
-triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+triop("fcsel", tfloat32, "(src0 != 0.0f) ? src1 : src2")
 opcode("bcsel", 0, tuint, [0, 0, 0],
       [tbool, tuint, tuint], "", "src0 ? src1 : src2")
 
 # SM5 bfi assembly
-triop("bfi", tuint, """
+triop("bfi", tuint32, """
 unsigned mask = src0, insert = src1, base = src2;
 if (mask == 0) {
    dst = base;
@@ -608,8 +612,8 @@ if (mask == 0) {
 """)
 
 # SM5 ubfe/ibfe assembly
-opcode("ubfe", 0, tuint,
-       [0, 0, 0], [tuint, tint, tint], "", """
+opcode("ubfe", 0, tuint32,
+       [0, 0, 0], [tuint32, tint32, tint32], "", """
 unsigned base = src0;
 int offset = src1, bits = src2;
 if (bits == 0) {
@@ -622,8 +626,8 @@ if (bits == 0) {
    dst = base >> offset;
 }
 """)
-opcode("ibfe", 0, tint,
-       [0, 0, 0], [tint, tint, tint], "", """
+opcode("ibfe", 0, tint32,
+       [0, 0, 0], [tint32, tint32, tint32], "", """
 int base = src0;
 int offset = src1, bits = src2;
 if (bits == 0) {
@@ -638,8 +642,8 @@ if (bits == 0) {
 """)
 
 # GLSL bitfieldExtract()
-opcode("ubitfield_extract", 0, tuint,
-       [0, 0, 0], [tuint, tint, tint], "", """
+opcode("ubitfield_extract", 0, tuint32,
+       [0, 0, 0], [tuint32, tint32, tint32], "", """
 unsigned base = src0;
 int offset = src1, bits = src2;
 if (bits == 0) {
@@ -650,8 +654,8 @@ if (bits == 0) {
    dst = (base >> offset) & ((1ull << bits) - 1);
 }
 """)
-opcode("ibitfield_extract", 0, tint,
-       [0, 0, 0], [tint, tint, tint], "", """
+opcode("ibitfield_extract", 0, tint32,
+       [0, 0, 0], [tint32, tint32, tint32], "", """
 int base = src0;
 int offset = src1, bits = src2;
 if (bits == 0) {
@@ -678,8 +682,8 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
           [tuint, tuint, tuint, tuint],
           "", const_expr)
 
-opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
-       [tuint, tuint, tint, tint], "", """
+opcode("bitfield_insert", 0, tuint32, [0, 0, 0, 0],
+       [tuint32, tuint32, tint32, tint32], "", """
 unsigned base = src0, insert = src1;
 int offset = src2, bits = src3;
 if (bits == 0) {
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 54f7d86..ed21c5d 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -35,10 +35,17 @@ d = 'd'
 
 # Written in the form (<search>, <replace>) where <search> is an expression
 # and <replace> is either an expression or a value.  An expression is
-# defined as a tuple of the form (<op>, <src0>, <src1>, <src2>, <src3>)
+# defined as a tuple of the form ([~]<op>, <src0>, <src1>, <src2>, <src3>)
 # where each source is either an expression or a value.  A value can be
 # either a numeric constant or a string representing a variable name.
 #
+# If the opcode in a search expression is prefixed by a '~' character, this
+# indicates that the operation is inexact.  Such operations will only get
+# applied to SSA values that do not have the exact bit set.  This should be
+# used by by any optimizations that are not bit-for-bit exact.  It should not,
+# however, be used for backend-requested lowering operations as those need to
+# happen regardless of precision.
+#
 # Variable names are specified as "[#]name[@type]" where "#" inicates that
 # the given variable will only match constants and the type indicates that
 # the given variable will only match values from ALU instructions with the
@@ -55,19 +62,19 @@ optimizations = [
    (('fabs', ('fneg', a)), ('fabs', a)),
    (('iabs', ('iabs', a)), ('iabs', a)),
    (('iabs', ('ineg', a)), ('iabs', a)),
-   (('fadd', a, 0.0), a),
+   (('~fadd', a, 0.0), a),
    (('iadd', a, 0), a),
    (('usadd_4x8', a, 0), a),
    (('usadd_4x8', a, ~0), ~0),
-   (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
+   (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
    (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
-   (('fadd', ('fneg', a), a), 0.0),
+   (('~fadd', ('fneg', a), a), 0.0),
    (('iadd', ('ineg', a), a), 0),
    (('iadd', ('ineg', a), ('iadd', a, b)), b),
    (('iadd', a, ('iadd', ('ineg', a), b)), b),
-   (('fadd', ('fneg', a), ('fadd', a, b)), b),
-   (('fadd', a, ('fadd', ('fneg', a), b)), b),
-   (('fmul', a, 0.0), 0.0),
+   (('~fadd', ('fneg', a), ('fadd', a, b)), b),
+   (('~fadd', a, ('fadd', ('fneg', a), b)), b),
+   (('~fmul', a, 0.0), 0.0),
    (('imul', a, 0), 0),
    (('umul_unorm_4x8', a, 0), 0),
    (('umul_unorm_4x8', a, ~0), a),
@@ -76,32 +83,48 @@ optimizations = [
    (('fmul', a, -1.0), ('fneg', a)),
    (('imul', a, -1), ('ineg', a)),
    (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
-   (('ffma', 0.0, a, b), b),
-   (('ffma', a, 0.0, b), b),
-   (('ffma', a, b, 0.0), ('fmul', a, b)),
+   (('~ffma', 0.0, a, b), b),
+   (('~ffma', a, 0.0, b), b),
+   (('~ffma', a, b, 0.0), ('fmul', a, b)),
    (('ffma', a, 1.0, b), ('fadd', a, b)),
    (('ffma', 1.0, a, b), ('fadd', a, b)),
-   (('flrp', a, b, 0.0), a),
-   (('flrp', a, b, 1.0), b),
-   (('flrp', a, a, b), a),
-   (('flrp', 0.0, a, b), ('fmul', a, b)),
+   (('~flrp', a, b, 0.0), a),
+   (('~flrp', a, b, 1.0), b),
+   (('~flrp', a, a, b), a),
+   (('~flrp', 0.0, a, b), ('fmul', a, b)),
+   (('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp'),
    (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
    (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
-   (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
-   (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
+   (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp'),
+   (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg',         c ))), ('fmul', b,         c )), ('flrp', a, b, c), '!options->lower_flrp'),
+   (('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp'),
+   (('~fadd', a, ('fmul',         c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
    (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
-   (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+   (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
    # Comparison simplifications
-   (('inot', ('flt', a, b)), ('fge', a, b)),
-   (('inot', ('fge', a, b)), ('flt', a, b)),
-   (('inot', ('feq', a, b)), ('fne', a, b)),
-   (('inot', ('fne', a, b)), ('feq', a, b)),
+   (('~inot', ('flt', a, b)), ('fge', a, b)),
+   (('~inot', ('fge', a, b)), ('flt', a, b)),
+   (('~inot', ('feq', a, b)), ('fne', a, b)),
+   (('~inot', ('fne', a, b)), ('feq', a, b)),
    (('inot', ('ilt', a, b)), ('ige', a, b)),
    (('inot', ('ige', a, b)), ('ilt', a, b)),
    (('inot', ('ieq', a, b)), ('ine', a, b)),
    (('inot', ('ine', a, b)), ('ieq', a, b)),
+
+   # 0.0 >= b2f(a)
+   # b2f(a) <= 0.0
+   # b2f(a) == 0.0 because b2f(a) can only be 0 or 1
+   # inot(a)
+   (('fge', 0.0, ('b2f', a)), ('inot', a)),
+
+   # 0.0 < fabs(a)
+   # fabs(a) > 0.0
+   # fabs(a) != 0.0 because fabs(a) must be >= 0
+   # a != 0.0
+   (('flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
+
    (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
-   (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
+   (('bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
    (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
    (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)),
    (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
@@ -111,15 +134,19 @@ optimizations = [
    (('imax', a, a), a),
    (('umin', a, a), a),
    (('umax', a, a), a),
-   (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
-   (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
+   (('~fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
+   (('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
    (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
    (('fsat', ('fsat', a)), ('fsat', a)),
    (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
-   (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
-   (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
-   (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
-   (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
+   (('~ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+   (('~ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
+   (('~ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+   (('~ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
+   (('fabs', ('slt', a, b)), ('slt', a, b)),
+   (('fabs', ('sge', a, b)), ('sge', a, b)),
+   (('fabs', ('seq', a, b)), ('seq', a, b)),
+   (('fabs', ('sne', a, b)), ('sne', a, b)),
    (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
    (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
    (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
@@ -151,7 +178,6 @@ optimizations = [
    (('ior', a, 0), a),
    (('fxor', a, a), 0.0),
    (('ixor', a, a), 0),
-   (('fxor', a, 0.0), a),
    (('ixor', a, 0), a),
    (('inot', ('inot', a)), a),
    # DeMorgan's Laws
@@ -167,35 +193,35 @@ optimizations = [
    (('iand', 0xff, ('ushr', a, 24)), ('ushr', a, 24)),
    (('iand', 0xffff, ('ushr', a, 16)), ('ushr', a, 16)),
    # Exponential/logarithmic identities
-   (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
-   (('flog2', ('fexp2', a)), a), # lg2(2^a) = a
+   (('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
+   (('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
    (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
-   (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
-   (('fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
-    ('fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
-   (('fpow', a, 1.0), a),
-   (('fpow', a, 2.0), ('fmul', a, a)),
-   (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
-   (('fpow', 2.0, a), ('fexp2', a)),
-   (('fpow', ('fpow', a, 2.2), 0.454545), a),
-   (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
-   (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
-   (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
-   (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
-   (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
-   (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
-   (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
-   (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
-   (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
-   (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
-   (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+   (('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
+   (('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
+    ('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
+   (('~fpow', a, 1.0), a),
+   (('~fpow', a, 2.0), ('fmul', a, a)),
+   (('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
+   (('~fpow', 2.0, a), ('fexp2', a)),
+   (('~fpow', ('fpow', a, 2.2), 0.454545), a),
+   (('~fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
+   (('~fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+   (('~frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+   (('~frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+   (('~flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+   (('~flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+   (('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+   (('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+   (('~fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+   (('~fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+   (('~fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
    # Division and reciprocal
-   (('fdiv', 1.0, a), ('frcp', a)),
+   (('~fdiv', 1.0, a), ('frcp', a)),
    (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
-   (('frcp', ('frcp', a)), a),
-   (('frcp', ('fsqrt', a)), ('frsq', a)),
+   (('~frcp', ('frcp', a)), a),
+   (('~frcp', ('fsqrt', a)), ('frsq', a)),
    (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
-   (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
+   (('~frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
    # Boolean simplifications
    (('ieq', 'a@bool', True), a),
    (('ine', 'a@bool', True), ('inot', a)),
@@ -216,6 +242,10 @@ optimizations = [
    (('i2b', ('b2i', a)), a),
    (('f2i', ('ftrunc', a)), ('f2i', a)),
    (('f2u', ('ftrunc', a)), ('f2u', a)),
+   (('i2b', ('ineg', a)), ('i2b', a)),
+   (('i2b', ('iabs', a)), ('i2b', a)),
+   (('fabs', ('b2f', a)), ('b2f', a)),
+   (('iabs', ('b2i', a)), ('b2i', a)),
 
    # Byte extraction
    (('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
@@ -228,7 +258,7 @@ optimizations = [
    (('iand', 0xffff, a), ('extract_u16', a, 0), '!options->lower_extract_word'),
 
    # Subtracts
-   (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
+   (('~fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
    (('isub', a, ('isub', 0, b)), ('iadd', a, b)),
    (('ussub_4x8', a, 0), a),
    (('ussub_4x8', a, ~0), 0),
@@ -236,7 +266,7 @@ optimizations = [
    (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
    (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
    (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
-   (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
+   (('~fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
    (('iadd', a, ('isub', 0, b)), ('isub', a, b)),
    (('fabs', ('fsub', 0.0, a)), ('fabs', a)),
    (('iabs', ('isub', 0, a)), ('iabs', a)),
@@ -368,10 +398,13 @@ for op in ['flt', 'fge', 'feq', 'fne',
 # they help code generation but do not necessarily produce code that is
 # more easily optimizable.
 late_optimizations = [
+   # Most of these optimizations aren't quite safe when you get infinity or
+   # Nan involved but the first one should be fine.
    (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
-   (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
-   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
-   (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+   (('~fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+   (('~feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+   (('~fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+
    (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
    (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
    (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
index 04876a4..e64ca36 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -46,10 +46,28 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
    if (!instr->dest.dest.is_ssa)
       return false;
 
+   /* In the case that any outputs/inputs have unsized types, then we need to
+    * guess the bit-size. In this case, the validator ensures that all
+    * bit-sizes match so we can just take the bit-size from first
+    * output/input with an unsized type. If all the outputs/inputs are sized
+    * then we don't need to guess the bit-size at all because the code we
+    * generate for constant opcodes in this case already knows the sizes of
+    * the types involved and does not need the provided bit-size for anything
+    * (although it still requires to receive a valid bit-size).
+    */
+   unsigned bit_size = 0;
+   if (!nir_alu_type_get_type_size(nir_op_infos[instr->op].output_type))
+      bit_size = instr->dest.dest.ssa.bit_size;
+
    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
       if (!instr->src[i].src.is_ssa)
          return false;
 
+      if (bit_size == 0 &&
+          !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_sizes[i])) {
+         bit_size = instr->src[i].src.ssa->bit_size;
+      }
+
       nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
 
       if (src_instr->type != nir_instr_type_load_const)
@@ -58,24 +76,31 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
 
       for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i);
            j++) {
-         src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
+         if (load_const->def.bit_size == 64)
+            src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]];
+         else
+            src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]];
       }
 
       /* We shouldn't have any source modifiers in the optimization loop. */
       assert(!instr->src[i].abs && !instr->src[i].negate);
    }
 
+   if (bit_size == 0)
+      bit_size = 32;
+
    /* We shouldn't have any saturate modifiers in the optimization loop. */
    assert(!instr->dest.saturate);
 
    nir_const_value dest =
       nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
-                            src);
+                            bit_size, src);
 
    nir_load_const_instr *new_instr =
       nir_load_const_instr_create(mem_ctx,
                                   instr->dest.dest.ssa.num_components);
 
+   new_instr->def.bit_size = instr->dest.dest.ssa.bit_size;
    new_instr->value = dest;
 
    nir_instr_insert_before(&instr->instr, &new_instr->instr);
@@ -106,7 +131,7 @@ constant_fold_deref(nir_instr *instr, nir_deref_var *deref)
          nir_load_const_instr *indirect =
             nir_instr_as_load_const(arr->indirect.ssa->parent_instr);
 
-         arr->base_offset += indirect->value.u[0];
+         arr->base_offset += indirect->value.u32[0];
 
          /* Clear out the source */
          nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL));
diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c
index 4cc6798..4658b23 100644
--- a/src/compiler/nir/nir_opt_dead_cf.c
+++ b/src/compiler/nir/nir_opt_dead_cf.c
@@ -228,7 +228,7 @@ dead_cf_block(nir_block *block)
      if (!const_value)
         return false;
 
-      opt_constant_if(following_if, const_value->u[0] != 0);
+      opt_constant_if(following_if, const_value->u32[0] != 0);
       return true;
    }
 
diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c
index 0fc658d..bad9dc4 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -210,7 +210,8 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
       }
 
       nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
-                        phi->dest.ssa.num_components, phi->dest.ssa.name);
+                        phi->dest.ssa.num_components,
+                        phi->dest.ssa.bit_size, phi->dest.ssa.name);
       sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
 
       nir_ssa_def_rewrite_uses(&phi->dest.ssa,
diff --git a/src/compiler/nir/nir_phi_builder.c b/src/compiler/nir/nir_phi_builder.c
index 5429083..a39e360 100644
--- a/src/compiler/nir/nir_phi_builder.c
+++ b/src/compiler/nir/nir_phi_builder.c
@@ -52,6 +52,7 @@ struct nir_phi_builder_value {
 
    /* Needed so we can create phis and undefs */
    unsigned num_components;
+   unsigned bit_size;
 
    /* The list of phi nodes associated with this value.  Phi nodes are not
     * added directly.  Instead, they are created, the instr->block pointer
@@ -61,8 +62,18 @@ struct nir_phi_builder_value {
     */
    struct exec_list phis;
 
-   /* Array of SSA defs, indexed by block.  If a phi needs to be inserted
-    * in a given block, it will have the magic value NEEDS_PHI.
+   /* Array of SSA defs, indexed by block.  For each block, this array has has
+    * one of three types of values:
+    *
+    *  - NULL. Indicates that there is no known definition in this block.  If
+    *    you need to find one, look at the block's immediate dominator.
+    *
+    *  - NEEDS_PHI. Indicates that the block may need a phi node but none has
+    *    been created yet.  If a def is requested for a block, a phi will need
+    *    to be created.
+    *
+    *  - A regular SSA def.  This will be either the result of a phi node or
+    *    one of the defs provided by nir_phi_builder_value_set_blocK_def().
     */
    nir_ssa_def *defs[0];
 };
@@ -101,7 +112,7 @@ nir_phi_builder_create(nir_function_impl *impl)
 
 struct nir_phi_builder_value *
 nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
-                          const BITSET_WORD *defs)
+                          unsigned bit_size, const BITSET_WORD *defs)
 {
    struct nir_phi_builder_value *val;
    unsigned i, w_start = 0, w_end = 0;
@@ -109,6 +120,7 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
    val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks);
    val->builder = pb;
    val->num_components = num_components;
+   val->bit_size = bit_size;
    exec_list_make_empty(&val->phis);
    exec_list_push_tail(&pb->values, &val->node);
 
@@ -127,8 +139,7 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
       set_foreach(cur->dom_frontier, dom_entry) {
          nir_block *next = (nir_block *) dom_entry->key;
 
-         /*
-          * If there's more than one return statement, then the end block
+         /* If there's more than one return statement, then the end block
           * can be a join point for some definitions. However, there are
           * no instructions in the end block, so nothing would use those
           * phi nodes. Of course, we couldn't place those phi nodes
@@ -139,6 +150,10 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
             continue;
 
          if (val->defs[next->index] == NULL) {
+            /* Instead of creating a phi node immediately, we simply set the
+             * value to the magic value NEEDS_PHI.  Later, we create phi nodes
+             * on demand in nir_phi_builder_value_get_block_def().
+             */
             val->defs[next->index] = NEEDS_PHI;
 
             if (pb->work[next->index] < pb->iter_count) {
@@ -163,7 +178,9 @@ nir_ssa_def *
 nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
                                     nir_block *block)
 {
+   /* For each block, we have one of three types of values */
    if (val->defs[block->index] == NULL) {
+      /* NULL indicates that we have no SSA def for this block. */
       if (block->imm_dom) {
          /* Grab it from our immediate dominator.  We'll stash it here for
           * easy access later.
@@ -185,17 +202,36 @@ nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
          return &undef->def;
       }
    } else if (val->defs[block->index] == NEEDS_PHI) {
-      /* If we need a phi instruction, go ahead and create one but don't
-       * add it to the program yet.  Later, we'll go through and set up phi
-       * sources and add the instructions will be added at that time.
+      /* The magic value NEEDS_PHI indicates that the block needs a phi node
+       * but none has been created.  We need to create one now so we can
+       * return it to the caller.
+       *
+       * Because a phi node may use SSA defs that it does not dominate (this
+       * happens in loops), we do not yet have enough information to fully
+       * fill out the phi node.  Instead, the phi nodes we create here will be
+       * empty (have no sources) and won't actually be placed in the block's
+       * instruction list yet.  Later, in nir_phi_builder_finish(), we walk
+       * over all of the phi instructions, fill out the sources lists, and
+       * place them at the top of their respective block's instruction list.
+       *
+       * Creating phi nodes on-demand allows us to avoid creating dead phi
+       * nodes that will just get deleted later. While this probably isn't a
+       * big win for a full into-SSA pass, other users may use the phi builder
+       * to make small SSA form repairs where most of the phi nodes will never
+       * be used.
        */
       nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader);
-      nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components, NULL);
+      nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components,
+                        val->bit_size, NULL);
       phi->instr.block = block;
       exec_list_push_tail(&val->phis, &phi->instr.node);
       val->defs[block->index] = &phi->dest.ssa;
       return &phi->dest.ssa;
    } else {
+      /* In this case, we have an actual SSA def.  It's either the result of a
+       * phi node created by the case above or one passed to us through
+       * nir_phi_builder_value_set_block_def().
+       */
       return val->defs[block->index];
    }
 }
@@ -216,9 +252,14 @@ nir_phi_builder_finish(struct nir_phi_builder *pb)
    NIR_VLA(nir_block *, preds, num_blocks);
 
    foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) {
-      /* We can't iterate over the list of phis normally because we are
-       * removing them as we go and, in some cases, adding new phis as we
-       * build the source lists of others.
+      /* We treat the linked list of phi nodes like a worklist.  The list is
+       * pre-populated by calls to nir_phi_builder_value_get_block_def() that
+       * create phi nodes.  As we fill in the sources of phi nodes, more may
+       * be created and are added to the end of the list.
+       *
+       * Because we are adding and removing phi nodes from the list as we go,
+       * we can't iterate over it normally.  Instead, we just iterate until
+       * the list is empty.
        */
       while (!exec_list_is_empty(&val->phis)) {
          struct exec_node *head = exec_list_get_head(&val->phis);
diff --git a/src/compiler/nir/nir_phi_builder.h b/src/compiler/nir/nir_phi_builder.h
index 50251bf..edc5302 100644
--- a/src/compiler/nir/nir_phi_builder.h
+++ b/src/compiler/nir/nir_phi_builder.h
@@ -25,7 +25,38 @@
 
 #include "nir.h"
 
+/** A helper for placing phi nodes in a NIR shader
+ *
+ * Basic usage goes something like this:
+ *
+ *     each variable, var, has:
+ *         a bitset var.defs of blocks where the variable is defined
+ *         a struct nir_phi_builder_value *pb_val
+ *
+ *     // initialize bitsets
+ *     foreach block:
+ *         foreach def of variable var:
+ *             var.defs[def.block] = true;
+ *
+ *     // initialize phi builder
+ *     pb = nir_phi_builder_create()
+ *     foreach var:
+ *         var.pb_val = nir_phi_builder_add_value(pb, var.defs)
+ *
+ *     // Visit each block.  This needs to visit dominators first;
+ *     // nir_for_each_block() will be ok.
+ *     foreach block:
+ *         foreach instruction:
+ *             foreach use of variable var:
+ *                 replace use with nir_phi_builder_get_block_def(var.pb_val)
+ *             foreach def of variable var:
+ *                 create ssa def, register with
+ *     nir_phi_builder_set_block_def(var.pb_val)
+ *
+ *     nir_phi_builder_finish(pb)
+ */
 struct nir_phi_builder;
+
 struct nir_phi_builder_value;
 
 /* Create a new phi builder.
@@ -43,7 +74,7 @@ struct nir_phi_builder *nir_phi_builder_create(nir_function_impl *impl);
  */
 struct nir_phi_builder_value *
 nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
-                          const BITSET_WORD *defs);
+                          unsigned bit_size, const BITSET_WORD *defs);
 
 /* Register a definition for the given value and block.
  *
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 24d5281..60b74d1 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -207,6 +207,8 @@ print_alu_instr(nir_alu_instr *instr, print_state *state)
    print_alu_dest(&instr->dest, state);
 
    fprintf(fp, " = %s", nir_op_infos[instr->op].name);
+   if (instr->exact)
+      fprintf(fp, "!");
    if (instr->dest.saturate)
       fprintf(fp, ".sat");
    fprintf(fp, " ");
@@ -714,7 +716,7 @@ print_load_const_instr(nir_load_const_instr *instr, print_state *state)
        * and then print the float in a comment for readability.
        */
 
-      fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]);
+      fprintf(fp, "0x%08x /* %f */", instr->value.u32[i], instr->value.f32[i]);
    }
 
    fprintf(fp, ")");
diff --git a/src/compiler/nir/nir_repair_ssa.c b/src/compiler/nir/nir_repair_ssa.c
index 3ab4f0f..96c791c 100644
--- a/src/compiler/nir/nir_repair_ssa.c
+++ b/src/compiler/nir/nir_repair_ssa.c
@@ -85,7 +85,8 @@ repair_ssa_def(nir_ssa_def *def, void *void_state)
    BITSET_SET(state->def_set, def->parent_instr->block->index);
 
    struct nir_phi_builder_value *val =
-      nir_phi_builder_add_value(pb, def->num_components, state->def_set);
+      nir_phi_builder_add_value(pb, def->num_components, def->bit_size,
+                                state->def_set);
 
    nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def);
 
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 56d7e81..6e63063 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -62,7 +62,8 @@ alu_instr_is_bool(nir_alu_instr *instr)
    case nir_op_inot:
       return src_is_bool(instr->src[0].src);
    default:
-      return nir_op_infos[instr->op].output_type == nir_type_bool;
+      return (nir_alu_type_get_base_type(nir_op_infos[instr->op].output_type)
+             == nir_type_bool);
    }
 }
 
@@ -125,8 +126,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
             nir_alu_instr *src_alu =
                nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
 
-            if (nir_op_infos[src_alu->op].output_type != var->type &&
-                !(var->type == nir_type_bool && alu_instr_is_bool(src_alu)))
+            if (nir_alu_type_get_base_type(nir_op_infos[src_alu->op].output_type) !=
+                var->type &&
+                !(nir_alu_type_get_base_type(var->type) == nir_type_bool &&
+                  alu_instr_is_bool(src_alu)))
                return false;
          }
 
@@ -158,21 +161,65 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
       nir_load_const_instr *load =
          nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
 
-      switch (nir_op_infos[instr->op].input_types[src]) {
+      switch (const_val->type) {
       case nir_type_float:
          for (unsigned i = 0; i < num_components; ++i) {
-            if (load->value.f[new_swizzle[i]] != const_val->data.f)
+            double val;
+            switch (load->def.bit_size) {
+            case 32:
+               val = load->value.f32[new_swizzle[i]];
+               break;
+            case 64:
+               val = load->value.f64[new_swizzle[i]];
+               break;
+            default:
+               unreachable("unknown bit size");
+            }
+
+            if (val != const_val->data.d)
                return false;
          }
          return true;
+
       case nir_type_int:
+         for (unsigned i = 0; i < num_components; ++i) {
+            int64_t val;
+            switch (load->def.bit_size) {
+            case 32:
+               val = load->value.i32[new_swizzle[i]];
+               break;
+            case 64:
+               val = load->value.i64[new_swizzle[i]];
+               break;
+            default:
+               unreachable("unknown bit size");
+            }
+
+            if (val != const_val->data.i)
+               return false;
+         }
+         return true;
+
       case nir_type_uint:
-      case nir_type_bool:
+      case nir_type_bool32:
          for (unsigned i = 0; i < num_components; ++i) {
-            if (load->value.i[new_swizzle[i]] != const_val->data.i)
+            uint64_t val;
+            switch (load->def.bit_size) {
+            case 32:
+               val = load->value.u32[new_swizzle[i]];
+               break;
+            case 64:
+               val = load->value.u64[new_swizzle[i]];
+               break;
+            default:
+               unreachable("unknown bit size");
+            }
+
+            if (val != const_val->data.u)
                return false;
          }
          return true;
+
       default:
          unreachable("Invalid alu source type");
       }
@@ -191,6 +238,10 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
    if (instr->op != expr->opcode)
       return false;
 
+   assert(instr->dest.dest.is_ssa);
+   if (expr->inexact && instr->exact)
+      return false;
+
    assert(!instr->dest.saturate);
    assert(nir_op_infos[instr->op].num_inputs > 0);
 
@@ -244,9 +295,123 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
    }
 }
 
+typedef struct bitsize_tree {
+   unsigned num_srcs;
+   struct bitsize_tree *srcs[4];
+
+   unsigned common_size;
+   bool is_src_sized[4];
+   bool is_dest_sized;
+
+   unsigned dest_size;
+   unsigned src_size[4];
+} bitsize_tree;
+
+static bitsize_tree *
+build_bitsize_tree(void *mem_ctx, struct match_state *state,
+                   const nir_search_value *value)
+{
+   bitsize_tree *tree = ralloc(mem_ctx, bitsize_tree);
+
+   switch (value->type) {
+   case nir_search_value_expression: {
+      nir_search_expression *expr = nir_search_value_as_expression(value);
+      nir_op_info info = nir_op_infos[expr->opcode];
+      tree->num_srcs = info.num_inputs;
+      tree->common_size = 0;
+      for (unsigned i = 0; i < info.num_inputs; i++) {
+         tree->is_src_sized[i] = !!nir_alu_type_get_type_size(info.input_types[i]);
+         if (tree->is_src_sized[i])
+            tree->src_size[i] = nir_alu_type_get_type_size(info.input_types[i]);
+         tree->srcs[i] = build_bitsize_tree(mem_ctx, state, expr->srcs[i]);
+      }
+      tree->is_dest_sized = !!nir_alu_type_get_type_size(info.output_type);
+      if (tree->is_dest_sized)
+         tree->dest_size = nir_alu_type_get_type_size(info.output_type);
+      break;
+   }
+
+   case nir_search_value_variable: {
+      nir_search_variable *var = nir_search_value_as_variable(value);
+      tree->num_srcs = 0;
+      tree->is_dest_sized = true;
+      tree->dest_size = nir_src_bit_size(state->variables[var->variable].src);
+      break;
+   }
+
+   case nir_search_value_constant: {
+      tree->num_srcs = 0;
+      tree->is_dest_sized = false;
+      tree->common_size = 0;
+      break;
+   }
+   }
+
+   return tree;
+}
+
+static unsigned
+bitsize_tree_filter_up(bitsize_tree *tree)
+{
+   for (unsigned i = 0; i < tree->num_srcs; i++) {
+      unsigned src_size = bitsize_tree_filter_up(tree->srcs[i]);
+      if (src_size == 0)
+         continue;
+
+      if (tree->is_src_sized[i]) {
+         assert(src_size == tree->src_size[i]);
+      } else if (tree->common_size != 0) {
+         assert(src_size == tree->common_size);
+         tree->src_size[i] = src_size;
+      } else {
+         tree->common_size = src_size;
+         tree->src_size[i] = src_size;
+      }
+   }
+
+   if (tree->num_srcs && tree->common_size) {
+      if (tree->dest_size == 0)
+         tree->dest_size = tree->common_size;
+      else if (!tree->is_dest_sized)
+         assert(tree->dest_size == tree->common_size);
+
+      for (unsigned i = 0; i < tree->num_srcs; i++) {
+         if (!tree->src_size[i])
+            tree->src_size[i] = tree->common_size;
+      }
+   }
+
+   return tree->dest_size;
+}
+
+static void
+bitsize_tree_filter_down(bitsize_tree *tree, unsigned size)
+{
+   if (tree->dest_size)
+      assert(tree->dest_size == size);
+   else
+      tree->dest_size = size;
+
+   if (!tree->is_dest_sized) {
+      if (tree->common_size)
+         assert(tree->common_size == size);
+      else
+         tree->common_size = size;
+   }
+
+   for (unsigned i = 0; i < tree->num_srcs; i++) {
+      if (!tree->src_size[i]) {
+         assert(tree->common_size);
+         tree->src_size[i] = tree->common_size;
+      }
+      bitsize_tree_filter_down(tree->srcs[i], tree->src_size[i]);
+   }
+}
+
 static nir_alu_src
-construct_value(const nir_search_value *value, nir_alu_type type,
-                unsigned num_components, struct match_state *state,
+construct_value(const nir_search_value *value,
+                unsigned num_components, bitsize_tree *bitsize, bool exact,
+                struct match_state *state,
                 nir_instr *instr, void *mem_ctx)
 {
    switch (value->type) {
@@ -257,7 +422,9 @@ construct_value(const nir_search_value *value, nir_alu_type type,
          num_components = nir_op_infos[expr->opcode].output_size;
 
       nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
-      nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL);
+      nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components,
+                        bitsize->dest_size, NULL);
+      alu->exact = exact;
       alu->dest.write_mask = (1 << num_components) - 1;
       alu->dest.saturate = false;
 
@@ -269,8 +436,7 @@ construct_value(const nir_search_value *value, nir_alu_type type,
             num_components = nir_op_infos[alu->op].input_sizes[i];
 
          alu->src[i] = construct_value(expr->srcs[i],
-                                       nir_op_infos[alu->op].input_types[i],
-                                       num_components,
+                                       num_components, bitsize->srcs[i], exact,
                                        state, instr, mem_ctx);
       }
 
@@ -301,23 +467,57 @@ construct_value(const nir_search_value *value, nir_alu_type type,
       const nir_search_constant *c = nir_search_value_as_constant(value);
       nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);
 
-      switch (type) {
+      switch (c->type) {
       case nir_type_float:
-         load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f);
-         load->value.f[0] = c->data.f;
+         load->def.name = ralloc_asprintf(load, "%f", c->data.d);
+         switch (bitsize->dest_size) {
+         case 32:
+            load->value.f32[0] = c->data.d;
+            break;
+         case 64:
+            load->value.f64[0] = c->data.d;
+            break;
+         default:
+            unreachable("unknown bit size");
+         }
          break;
+
       case nir_type_int:
-         load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
-         load->value.i[0] = c->data.i;
+         load->def.name = ralloc_asprintf(load, "%ld", c->data.i);
+         switch (bitsize->dest_size) {
+         case 32:
+            load->value.i32[0] = c->data.i;
+            break;
+         case 64:
+            load->value.i64[0] = c->data.i;
+            break;
+         default:
+            unreachable("unknown bit size");
+         }
          break;
+
       case nir_type_uint:
-      case nir_type_bool:
-         load->value.u[0] = c->data.u;
+         load->def.name = ralloc_asprintf(load, "%lu", c->data.u);
+         switch (bitsize->dest_size) {
+         case 32:
+            load->value.u32[0] = c->data.u;
+            break;
+         case 64:
+            load->value.u64[0] = c->data.u;
+            break;
+         default:
+            unreachable("unknown bit size");
+         }
+
+      case nir_type_bool32:
+         load->value.u32[0] = c->data.u;
          break;
       default:
          unreachable("Invalid alu source type");
       }
 
+      load->def.bit_size = bitsize->dest_size;
+
       nir_instr_insert_before(instr, &load->instr);
 
       nir_alu_src val;
@@ -352,6 +552,11 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
                          swizzle, &state))
       return NULL;
 
+   void *bitsize_ctx = ralloc_context(NULL);
+   bitsize_tree *tree = build_bitsize_tree(bitsize_ctx, &state, replace);
+   bitsize_tree_filter_up(tree);
+   bitsize_tree_filter_down(tree, instr->dest.dest.ssa.bit_size);
+
    /* Inserting a mov may be unnecessary.  However, it's much easier to
     * simply let copy propagation clean this up than to try to go through
     * and rewrite swizzles ourselves.
@@ -359,11 +564,12 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
    nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
    mov->dest.write_mask = instr->dest.write_mask;
    nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
-                     instr->dest.dest.ssa.num_components, NULL);
+                     instr->dest.dest.ssa.num_components,
+                     instr->dest.dest.ssa.bit_size, NULL);
 
-   mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type,
-                                 instr->dest.dest.ssa.num_components, &state,
-                                 &instr->instr, mem_ctx);
+   mov->src[0] = construct_value(replace,
+                                 instr->dest.dest.ssa.num_components, tree,
+                                 instr->exact, &state, &instr->instr, mem_ctx);
    nir_instr_insert_before(&instr->instr, &mov->instr);
 
    nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
@@ -375,5 +581,7 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
     */
    nir_instr_remove(&instr->instr);
 
+   ralloc_free(bitsize_ctx);
+
    return mov;
 }
diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h
index 7d47792..61742f1 100644
--- a/src/compiler/nir/nir_search.h
+++ b/src/compiler/nir/nir_search.h
@@ -71,16 +71,24 @@ typedef struct {
 typedef struct {
    nir_search_value value;
 
+   nir_alu_type type;
+
    union {
-      uint32_t u;
-      int32_t i;
-      float f;
+      uint64_t u;
+      int64_t i;
+      double d;
    } data;
 } nir_search_constant;
 
 typedef struct {
    nir_search_value value;
 
+   /* When set on a search expression, the expression will only match an SSA
+    * value that does *not* have the exact bit set.  If unset, the exact bit
+    * on the SSA value is ignored.
+    */
+   bool inexact;
+
    nir_op opcode;
    const nir_search_value *srcs[4];
 } nir_search_expression;
diff --git a/src/compiler/nir/nir_to_ssa.c b/src/compiler/nir/nir_to_ssa.c
index 44a5054..d588d7d 100644
--- a/src/compiler/nir/nir_to_ssa.c
+++ b/src/compiler/nir/nir_to_ssa.c
@@ -219,7 +219,9 @@ rewrite_def_forwards(nir_dest *dest, void *_state)
                              state->states[index].num_defs);
 
    list_del(&dest->reg.def_link);
-   nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name);
+   nir_ssa_dest_init(state->parent_instr, dest, reg->num_components,
+                     reg->bit_size, name);
+   ralloc_free(name);
 
    /* push our SSA destination on the stack */
    state->states[index].index++;
@@ -271,7 +273,9 @@ rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state)
 
       instr->dest.write_mask = (1 << num_components) - 1;
       list_del(&instr->dest.dest.reg.def_link);
-      nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name);
+      nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
+                        reg->bit_size, name);
+      ralloc_free(name);
 
       if (nir_op_infos[instr->op].output_size == 0) {
          /*
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index 0c32d5f..9f18d1c 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -179,9 +179,12 @@ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
    nir_alu_src *src = &instr->src[index];
 
    unsigned num_components;
-   if (src->src.is_ssa)
+   unsigned src_bit_size;
+   if (src->src.is_ssa) {
+      src_bit_size = src->src.ssa->bit_size;
       num_components = src->src.ssa->num_components;
-   else {
+   } else {
+      src_bit_size = src->src.reg.reg->bit_size;
       if (src->src.reg.reg->is_packed)
          num_components = 4; /* can't check anything */
       else
@@ -194,6 +197,24 @@ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
          assert(src->swizzle[i] < num_components);
    }
 
+   nir_alu_type src_type = nir_op_infos[instr->op].input_types[index];
+
+   /* 8-bit float isn't a thing */
+   if (nir_alu_type_get_base_type(src_type) == nir_type_float)
+      assert(src_bit_size == 16 || src_bit_size == 32 || src_bit_size == 64);
+
+   if (nir_alu_type_get_type_size(src_type)) {
+      /* This source has an explicit bit size */
+      assert(nir_alu_type_get_type_size(src_type) == src_bit_size);
+   } else {
+      if (!nir_alu_type_get_type_size(nir_op_infos[instr->op].output_type)) {
+         unsigned dest_bit_size =
+            instr->dest.dest.is_ssa ? instr->dest.dest.ssa.bit_size
+                                    : instr->dest.dest.reg.reg->bit_size;
+         assert(dest_bit_size == src_bit_size);
+      }
+   }
+
    validate_src(&src->src, state);
 }
 
@@ -263,8 +284,10 @@ validate_dest(nir_dest *dest, validate_state *state)
 }
 
 static void
-validate_alu_dest(nir_alu_dest *dest, validate_state *state)
+validate_alu_dest(nir_alu_instr *instr, validate_state *state)
 {
+   nir_alu_dest *dest = &instr->dest;
+
    unsigned dest_size =
       dest->dest.is_ssa ? dest->dest.ssa.num_components
                         : dest->dest.reg.reg->num_components;
@@ -282,6 +305,17 @@ validate_alu_dest(nir_alu_dest *dest, validate_state *state)
    assert(nir_op_infos[alu->op].output_type == nir_type_float ||
           !dest->saturate);
 
+   unsigned bit_size = dest->dest.is_ssa ? dest->dest.ssa.bit_size
+                                         : dest->dest.reg.reg->bit_size;
+   nir_alu_type type = nir_op_infos[instr->op].output_type;
+
+   /* 8-bit float isn't a thing */
+   if (nir_alu_type_get_base_type(type) == nir_type_float)
+      assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
+
+   assert(nir_alu_type_get_type_size(type) == 0 ||
+          nir_alu_type_get_type_size(type) == bit_size);
+
    validate_dest(&dest->dest, state);
 }
 
@@ -294,7 +328,7 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
       validate_alu_src(instr, i, state);
    }
 
-   validate_alu_dest(&instr->dest, state);
+   validate_alu_dest(instr, state);
 }
 
 static void
diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c
index 5a7184a..42a1f95 100644
--- a/src/compiler/nir/spirv/spirv_to_nir.c
+++ b/src/compiler/nir/spirv/spirv_to_nir.c
@@ -92,7 +92,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
             nir_load_const_instr_create(b->shader, num_components);
 
          for (unsigned i = 0; i < num_components; i++)
-            load->value.u[i] = constant->value.u[i];
+            load->value.u32[i] = constant->value.u[i];
 
          nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
          val->def = &load->def;
@@ -109,7 +109,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
                nir_load_const_instr_create(b->shader, rows);
 
             for (unsigned j = 0; j < rows; j++)
-               load->value.u[j] = constant->value.u[rows * i + j];
+               load->value.u32[j] = constant->value.u[rows * i + j];
 
             nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
             col_val->def = &load->def;
@@ -1035,6 +1035,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
          nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
 
          unsigned num_components = glsl_get_vector_elements(val->const_type);
+         unsigned bit_size =
+            glsl_get_bit_size(glsl_get_base_type(val->const_type));
 
          nir_const_value src[3];
          assert(count <= 7);
@@ -1043,14 +1045,16 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
                vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
 
             unsigned j = swap ? 1 - i : i;
+            assert(bit_size == 32);
             for (unsigned k = 0; k < num_components; k++)
-               src[j].u[k] = c->value.u[k];
+               src[j].u32[k] = c->value.u[k];
          }
 
-         nir_const_value res = nir_eval_const_opcode(op, num_components, src);
+         nir_const_value res = nir_eval_const_opcode(op, num_components,
+                                                     bit_size, src);
 
          for (unsigned k = 0; k < num_components; k++)
-            val->constant->value.u[k] = res.u[k];
+            val->constant->value.u[k] = res.u32[k];
 
          return;
       } /* default */
@@ -1414,7 +1418,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
    }
 
    nir_ssa_dest_init(&instr->instr, &instr->dest,
-                     nir_tex_instr_dest_size(instr), NULL);
+                     nir_tex_instr_dest_size(instr), 32, NULL);
 
    assert(glsl_get_vector_elements(ret_type->type) ==
           nir_tex_instr_dest_size(instr));
@@ -1600,7 +1604,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
    if (opcode != SpvOpImageWrite) {
       struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
       struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
-      nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL);
+      nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL);
 
       nir_builder_instr_insert(&b->nb, &intrin->instr);
 
@@ -1738,7 +1742,7 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
       fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
    }
 
-   nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL);
+   nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
 
    struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
@@ -1750,7 +1754,7 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
 }
 
 static nir_alu_instr *
-create_vec(nir_shader *shader, unsigned num_components)
+create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size)
 {
    nir_op op;
    switch (num_components) {
@@ -1762,7 +1766,8 @@ create_vec(nir_shader *shader, unsigned num_components)
    }
 
    nir_alu_instr *vec = nir_alu_instr_create(shader, op);
-   nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
+   nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components,
+                     bit_size, NULL);
    vec->dest.write_mask = (1 << num_components) - 1;
 
    return vec;
@@ -1779,7 +1784,8 @@ vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
 
    for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
       nir_alu_instr *vec = create_vec(b->shader,
-                                      glsl_get_matrix_columns(src->type));
+                                      glsl_get_matrix_columns(src->type),
+                                      glsl_get_bit_size(glsl_get_base_type(src->type)));
       if (glsl_type_is_vector_or_scalar(src->type)) {
           vec->src[0].src = nir_src_for_ssa(src->def);
           vec->src[0].swizzle[0] = i;
@@ -1809,7 +1815,8 @@ nir_ssa_def *
 vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
                   unsigned index)
 {
-   nir_alu_instr *vec = create_vec(b->shader, src->num_components);
+   nir_alu_instr *vec = create_vec(b->shader, src->num_components,
+                                   src->bit_size);
 
    for (unsigned i = 0; i < src->num_components; i++) {
       if (i == index) {
@@ -1854,7 +1861,7 @@ vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
                    nir_ssa_def *src0, nir_ssa_def *src1,
                    const uint32_t *indices)
 {
-   nir_alu_instr *vec = create_vec(b->shader, num_components);
+   nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size);
 
    nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1);
    nir_builder_instr_insert(&b->nb, &undef->instr);
@@ -1884,7 +1891,8 @@ static nir_ssa_def *
 vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
                      unsigned num_srcs, nir_ssa_def **srcs)
 {
-   nir_alu_instr *vec = create_vec(b->shader, num_components);
+   nir_alu_instr *vec = create_vec(b->shader, num_components,
+                                   srcs[0]->bit_size);
 
    unsigned dest_idx = 0;
    for (unsigned i = 0; i < num_srcs; i++) {
diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c
index 6b649fd..3360fda 100644
--- a/src/compiler/nir/spirv/vtn_glsl450.c
+++ b/src/compiler/nir/spirv/vtn_glsl450.c
@@ -627,7 +627,9 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
 
    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
    nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
-                     glsl_get_vector_elements(val->ssa->type), val->name);
+                     glsl_get_vector_elements(val->ssa->type),
+                     glsl_get_bit_size(glsl_get_base_type(val->ssa->type)),
+                     val->name);
    instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1;
    val->ssa->def = &instr->dest.dest.ssa;
 
diff --git a/src/compiler/nir/spirv/vtn_variables.c b/src/compiler/nir/spirv/vtn_variables.c
index 31bf416..3cbac1e 100644
--- a/src/compiler/nir/spirv/vtn_variables.c
+++ b/src/compiler/nir/spirv/vtn_variables.c
@@ -190,7 +190,9 @@ _vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
 
       if (load) {
          nir_ssa_dest_init(&intrin->instr, &intrin->dest,
-                           intrin->num_components, NULL);
+                           intrin->num_components,
+                           glsl_get_bit_size(glsl_get_base_type(tail->type)),
+                           NULL);
          inout->def = &intrin->dest.ssa;
       } else {
          nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1);
@@ -322,7 +324,7 @@ get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
    nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set);
    nir_intrinsic_set_binding(instr, chain->var->binding);
 
-   nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+   nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
    nir_builder_instr_insert(&b->nb, &instr->instr);
 
    return &instr->dest.ssa;
@@ -411,7 +413,8 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
 
    if (load) {
       nir_ssa_dest_init(&instr->instr, &instr->dest,
-                        instr->num_components, NULL);
+                        instr->num_components,
+                        glsl_get_bit_size(glsl_get_base_type(type)), NULL);
       (*inout)->def = &instr->dest.ssa;
    }
 
@@ -1385,7 +1388,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
          nir_intrinsic_instr_create(b->nb.shader,
                                     nir_intrinsic_get_buffer_size);
       instr->src[0] = nir_src_for_ssa(index);
-      nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+      nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
       nir_builder_instr_insert(&b->nb, &instr->instr);
       nir_ssa_def *buf_size = &instr->dest.ssa;