diff options
Diffstat (limited to 'src/mesa/drivers')
33 files changed, 305 insertions, 181 deletions
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c index 0066f7f..6761238 100644 --- a/src/mesa/drivers/common/meta_blit.c +++ b/src/mesa/drivers/common/meta_blit.c @@ -597,6 +597,7 @@ blitframebuffer_texture(struct gl_context *ctx, GLenum filter, GLint flipX, GLint flipY, GLboolean glsl_version, GLboolean do_depth) { + struct save_state *save = &ctx->Meta->Save[ctx->Meta->SaveStackDepth - 1]; int att_index = do_depth ? BUFFER_DEPTH : readFb->_ColorReadBufferIndex; const struct gl_renderbuffer_attachment *readAtt = &readFb->Attachment[att_index]; @@ -709,7 +710,7 @@ blitframebuffer_texture(struct gl_context *ctx, fb_tex_blit.samp_obj = _mesa_meta_setup_sampler(ctx, texObj, target, filter, srcLevel); - /* Always do our blits with no net sRGB decode or encode. + /* For desktop GL, we do our blits with no net sRGB decode or encode. * * However, if both the src and dst can be srgb decode/encoded, enable them * so that we do any blending (from scaling or from MSAA resolves) in the @@ -723,18 +724,42 @@ blitframebuffer_texture(struct gl_context *ctx, * scissor test." * * The GL 4.4 specification disagrees and says that the sRGB part of the - * fragment pipeline applies, but this was found to break applications. + * fragment pipeline applies, but this was found to break applications + * (such as Left 4 Dead 2). + * + * However, for ES 3.0, we follow the specification and perform sRGB + * decoding and encoding. The specification has always been clear in + * the ES world, and hasn't changed over time. */ if (ctx->Extensions.EXT_texture_sRGB_decode) { - if (_mesa_get_format_color_encoding(rb->Format) == GL_SRGB && - drawFb->Visual.sRGBCapable) { + bool src_srgb = _mesa_get_format_color_encoding(rb->Format) == GL_SRGB; + if (save->API == API_OPENGLES2 && ctx->Version >= 30) { + /* From the ES 3.0.4 specification, page 198: + * "When values are taken from the read buffer, if the value of + * FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING for the framebuffer + * attachment corresponding to the read buffer is SRGB (see section + * 6.1.13), the red, green, and blue components are converted from + * the non-linear sRGB color space according to equation 3.24. + * + * When values are written to the draw buffers, blit operations + * bypass the fragment pipeline. The only fragment operations which + * affect a blit are the pixel ownership test, the scissor test, + * and sRGB conversion (see section 4.1.8)." + */ _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj, - GL_DECODE_EXT); - _mesa_set_framebuffer_srgb(ctx, GL_TRUE); + src_srgb ? GL_DECODE_EXT + : GL_SKIP_DECODE_EXT); + _mesa_set_framebuffer_srgb(ctx, drawFb->Visual.sRGBCapable); } else { - _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj, - GL_SKIP_DECODE_EXT); - /* set_framebuffer_srgb was set by _mesa_meta_begin(). */ + if (src_srgb && drawFb->Visual.sRGBCapable) { + _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj, + GL_DECODE_EXT); + _mesa_set_framebuffer_srgb(ctx, GL_TRUE); + } else { + _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj, + GL_SKIP_DECODE_EXT); + /* set_framebuffer_srgb was set by _mesa_meta_begin(). */ + } } } diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c index 18b9681..9402a46 100644 --- a/src/mesa/drivers/common/meta_copy_image.c +++ b/src/mesa/drivers/common/meta_copy_image.c @@ -269,6 +269,9 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, if (status != GL_FRAMEBUFFER_COMPLETE) goto meta_end; + /* Explicitly disable sRGB encoding */ + ctx->DrawBuffer->Visual.sRGBCapable = false; + /* Since we've bound a new draw framebuffer, we need to update its * derived state -- _Xmin, etc -- for BlitFramebuffer's clipping to * be correct. diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c index dfd3327..62c3fce 100644 --- a/src/mesa/drivers/common/meta_tex_subimage.c +++ b/src/mesa/drivers/common/meta_tex_subimage.c @@ -263,6 +263,9 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, if (status != GL_FRAMEBUFFER_COMPLETE) goto fail; + /* Explicitly disable sRGB encoding */ + ctx->DrawBuffer->Visual.sRGBCapable = false; + _mesa_update_state(ctx); if (_mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer, @@ -420,6 +423,9 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, if (status != GL_FRAMEBUFFER_COMPLETE) goto fail; + /* Explicitly disable sRGB encoding */ + ctx->DrawBuffer->Visual.sRGBCapable = false; + _mesa_update_state(ctx); if (_mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer, diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index 4497eab..38a3236 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -115,12 +115,11 @@ brw_blorp_surface_info::set(struct brw_context *brw, this->brw_surfaceformat = BRW_SURFACEFORMAT_R16_UNORM; break; default: { - mesa_format linear_format = _mesa_get_srgb_format_linear(format); if (is_render_target) { - assert(brw->format_supported_as_render_target[linear_format]); - this->brw_surfaceformat = brw->render_target_format[linear_format]; + assert(brw->format_supported_as_render_target[format]); + this->brw_surfaceformat = brw->render_target_format[format]; } else { - this->brw_surfaceformat = brw_format_for_mesa_format(linear_format); + this->brw_surfaceformat = brw_format_for_mesa_format(format); } break; } diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index a04a1df..f04e196 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -46,7 +46,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw, float src_x1, float src_y1, float dst_x0, float dst_y0, float dst_x1, float dst_y1, - GLenum filter, bool mirror_x, bool mirror_y); + GLenum filter, bool mirror_x, bool mirror_y, + bool decode_srgb, bool encode_srgb); #ifdef __cplusplus } /* end extern "C" */ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 05fff91..5fd25f1 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include "main/context.h" #include "main/teximage.h" #include "main/fbobject.h" @@ -63,7 +64,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw, float src_x1, float src_y1, float dst_x0, float dst_y0, float dst_x1, float dst_y1, - GLenum filter, bool mirror_x, bool mirror_y) + GLenum filter, bool mirror_x, bool mirror_y, + bool decode_srgb, bool encode_srgb) { /* Get ready to blit. This includes depth resolving the src and dst * buffers if necessary. Note: it's not necessary to do a color resolve on @@ -89,6 +91,12 @@ brw_blorp_blit_miptrees(struct brw_context *brw, dst_level, dst_layer, dst_x0, dst_y0, dst_x1, dst_y1, mirror_x, mirror_y); + if (!decode_srgb && _mesa_get_format_color_encoding(src_format) == GL_SRGB) + src_format = _mesa_get_srgb_format_linear(src_format); + + if (!encode_srgb && _mesa_get_format_color_encoding(dst_format) == GL_SRGB) + dst_format = _mesa_get_srgb_format_linear(dst_format); + brw_blorp_blit_params params(brw, src_mt, src_level, src_layer, src_format, dst_mt, dst_level, dst_layer, dst_format, @@ -114,6 +122,8 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit, struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb); struct intel_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb); + const bool es3 = _mesa_is_gles3(&brw->ctx); + /* Do the blit */ brw_blorp_blit_miptrees(brw, src_mt, src_irb->mt_level, src_irb->mt_layer, @@ -122,7 +132,8 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit, dst_format, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, - filter, mirror_x, mirror_y); + filter, mirror_x, mirror_y, + es3, es3); dst_irb->need_downsample = true; } @@ -289,7 +300,8 @@ brw_blorp_copytexsubimage(struct brw_context *brw, dst_image->TexFormat, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, - GL_NEAREST, false, mirror_y); + GL_NEAREST, false, mirror_y, + false, false); /* If we're copying to a packed depth stencil texture and the source * framebuffer has separate stencil, we need to also copy the stencil data @@ -314,7 +326,8 @@ brw_blorp_copytexsubimage(struct brw_context *brw, dst_mt->format, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, - GL_NEAREST, false, mirror_y); + GL_NEAREST, false, mirror_y, + false, false); } } diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index a95f51b..b32252f 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -108,6 +108,26 @@ static const struct nir_shader_compiler_options vector_nir_options = { */ .fdot_replicates = true, + /* Prior to Gen6, there are no three source operations for SIMD4x2. */ + .lower_flrp = true, + + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_2x16 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_unorm_2x16 = true, + .lower_extract_byte = true, + .lower_extract_word = true, +}; + +static const struct nir_shader_compiler_options vector_nir_options_gen6 = { + COMMON_OPTIONS, + + /* In the vec4 backend, our dpN instruction replicates its result to all the + * components of a vec4. We would like NIR to give us replicated fdot + * instructions because it can optimize better for us. + */ + .fdot_replicates = true, + .lower_pack_snorm_2x16 = true, .lower_pack_unorm_2x16 = true, .lower_unpack_snorm_2x16 = true, @@ -160,8 +180,12 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) if (devinfo->gen < 7) compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; - compiler->glsl_compiler_options[i].NirOptions = - is_scalar ? &scalar_nir_options : &vector_nir_options; + if (is_scalar) { + compiler->glsl_compiler_options[i].NirOptions = &scalar_nir_options; + } else { + compiler->glsl_compiler_options[i].NirOptions = + devinfo->gen < 6 ? &vector_nir_options : &vector_nir_options_gen6; + } compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; } diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 0b99356..5b14252 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1294,7 +1294,7 @@ pop_if_stack(struct brw_codegen *p) static void push_loop_stack(struct brw_codegen *p, brw_inst *inst) { - if (p->loop_stack_array_size < p->loop_stack_depth) { + if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) { p->loop_stack_array_size *= 2; p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, p->loop_stack_array_size); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 874053c..33c4adc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2307,17 +2307,6 @@ fs_visitor::opt_algebraic() progress = true; } break; - case SHADER_OPCODE_RCP: { - fs_inst *prev = (fs_inst *)inst->prev; - if (prev->opcode == SHADER_OPCODE_SQRT) { - if (inst->src[0].equals(prev->dst)) { - inst->opcode = SHADER_OPCODE_RSQ; - inst->src[0] = prev->src[0]; - progress = true; - } - } - break; - } case SHADER_OPCODE_BROADCAST: if (is_uniform(inst->src[0])) { inst->opcode = BRW_OPCODE_MOV; diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 2616e65..ffab0a8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -654,21 +654,6 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } break; - case SHADER_OPCODE_RCP: - /* The hardware doesn't do math on immediate values - * (because why are you doing that, seriously?), but - * the correct answer is to just constant fold it - * anyway. - */ - assert(i == 0); - if (inst->src[0].f != 0.0f) { - inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = val; - inst->src[0].f = 1.0f / inst->src[0].f; - progress = true; - } - break; - case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 29ef609..aa4c745 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -518,10 +518,10 @@ fs_visitor::optimize_extract_to_float(nir_alu_instr *instr, enum opcode extract_op; if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) { - assert(element->u[0] <= 1); + assert(element->u32[0] <= 1); extract_op = SHADER_OPCODE_EXTRACT_WORD; } else { - assert(element->u[0] <= 3); + assert(element->u32[0] <= 3); extract_op = SHADER_OPCODE_EXTRACT_BYTE; } @@ -530,7 +530,7 @@ fs_visitor::optimize_extract_to_float(nir_alu_instr *instr, op0 = offset(op0, bld, src0->src[0].swizzle[0]); set_saturate(instr->dest.saturate, - bld.emit(extract_op, result, op0, brw_imm_ud(element->u[0]))); + bld.emit(extract_op, result, op0, brw_imm_ud(element->u32[0]))); return true; } @@ -549,11 +549,11 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, return false; nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src); - if (!value1 || fabsf(value1->f[0]) != 1.0f) + if (!value1 || fabsf(value1->f32[0]) != 1.0f) return false; nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src); - if (!value2 || fabsf(value2->f[0]) != 1.0f) + if (!value2 || fabsf(value2->f32[0]) != 1.0f) return false; fs_reg tmp = vgrf(glsl_type::int_type); @@ -573,7 +573,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, * surely be TRIANGLES */ - if (value1->f[0] == -1.0f) { + if (value1->f32[0] == -1.0f) { g0.negate = true; } @@ -601,7 +601,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, * surely be TRIANGLES */ - if (value1->f[0] == -1.0f) { + if (value1->f32[0] == -1.0f) { g1_6.negate = true; } @@ -1180,7 +1180,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_extract_i8: { nir_const_value *byte = nir_src_as_const_value(instr->src[1].src); bld.emit(SHADER_OPCODE_EXTRACT_BYTE, - result, op[0], brw_imm_ud(byte->u[0])); + result, op[0], brw_imm_ud(byte->u32[0])); break; } @@ -1188,7 +1188,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_extract_i16: { nir_const_value *word = nir_src_as_const_value(instr->src[1].src); bld.emit(SHADER_OPCODE_EXTRACT_WORD, - result, op[0], brw_imm_ud(word->u[0])); + result, op[0], brw_imm_ud(word->u32[0])); break; } @@ -1215,7 +1215,7 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld, fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components); for (unsigned i = 0; i < instr->def.num_components; i++) - bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i[i])); + bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i])); nir_ssa_values[instr->def.index] = reg; } @@ -1769,9 +1769,9 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, const bool is_point_size = (base_offset == 0); if (offset_const != NULL && vertex_const != NULL && - 4 * (base_offset + offset_const->u[0]) < push_reg_count) { - int imm_offset = (base_offset + offset_const->u[0]) * 4 + - vertex_const->u[0] * push_reg_count; + 4 * (base_offset + offset_const->u32[0]) < push_reg_count) { + int imm_offset = (base_offset + offset_const->u32[0]) * 4 + + vertex_const->u32[0] * push_reg_count; /* This input was pushed into registers. */ if (is_point_size) { /* gl_PointSize comes in .w */ @@ -1793,7 +1793,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, if (vertex_const) { /* The vertex index is constant; just select the proper URB handle. */ icp_handle = - retype(brw_vec8_grf(first_icp_handle + vertex_const->i[0], 0), + retype(brw_vec8_grf(first_icp_handle + vertex_const->i32[0], 0), BRW_REGISTER_TYPE_UD); } else { /* The vertex index is non-constant. We need to use indirect @@ -1837,7 +1837,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, if (offset_const) { /* Constant indexing - use global offset. */ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle); - inst->offset = base_offset + offset_const->u[0]; + inst->offset = base_offset + offset_const->u32[0]; inst->base_mrf = -1; inst->mlen = 1; inst->regs_written = num_components; @@ -1875,7 +1875,7 @@ fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr) * add_const_offset_to_base() will fold other constant offsets * into instr->const_index[0]. */ - assert(const_value->u[0] == 0); + assert(const_value->u32[0] == 0); return fs_reg(); } @@ -2193,7 +2193,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); if (const_sample) { - unsigned msg_data = const_sample->i[0] << 4; + unsigned msg_data = const_sample->i32[0] << 4; emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, @@ -2260,8 +2260,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); if (const_offset) { - unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf; - unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf; + unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf; + unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf; emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, @@ -2420,7 +2420,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, fs_reg offset_reg; nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); if (const_offset) { - offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]); + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); } else { offset_reg = vgrf(glsl_type::uint_type); bld.ADD(offset_reg, @@ -2464,7 +2464,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); if (const_offset) { - offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] + + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] + 4 * first_component); } else { offset_reg = vgrf(glsl_type::uint_type); @@ -2695,8 +2695,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); if (const_offset) { /* Offsets are in bytes but they should always be multiples of 4 */ - assert(const_offset->u[0] % 4 == 0); - src.reg_offset = const_offset->u[0] / 4; + assert(const_offset->u32[0] % 4 == 0); + src.reg_offset = const_offset->u32[0] / 4; for (unsigned j = 0; j < instr->num_components; j++) { bld.MOV(offset(dest, bld, j), offset(src, bld, j)); @@ -2729,7 +2729,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (const_index) { const unsigned index = stage_prog_data->binding_table.ubo_start + - const_index->u[0]; + const_index->u32[0]; surf_index = brw_imm_ud(index); brw_mark_surface_used(prog_data, index); } else { @@ -2762,12 +2762,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; - struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u[0] & ~15); + struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u32[0] & ~15); bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, surf_index, const_offset_reg); for (unsigned i = 0; i < instr->num_components; i++) { - packed_consts.set_smear(const_offset->u[0] % 16 / 4 + i); + packed_consts.set_smear(const_offset->u32[0] % 16 / 4 + i); /* The std140 packing rules don't allow vectors to cross 16-byte * boundaries, and a reg is 32 bytes. @@ -2790,7 +2790,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg surf_index; if (const_uniform_block) { unsigned index = stage_prog_data->binding_table.ssbo_start + - const_uniform_block->u[0]; + const_uniform_block->u32[0]; surf_index = brw_imm_ud(index); brw_mark_surface_used(prog_data, index); } else { @@ -2809,7 +2809,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg offset_reg; nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); if (const_offset) { - offset_reg = brw_imm_ud(const_offset->u[0]); + offset_reg = brw_imm_ud(const_offset->u32[0]); } else { offset_reg = get_nir_src(instr->src[1]); } @@ -2837,7 +2837,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); assert(const_offset && "Indirect input loads not allowed"); - src = offset(src, bld, const_offset->u[0]); + src = offset(src, bld, const_offset->u32[0]); for (unsigned j = 0; j < instr->num_components; j++) { bld.MOV(offset(dest, bld, j), offset(src, bld, j)); @@ -2854,7 +2854,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir_src_as_const_value(instr->src[1]); if (const_uniform_block) { unsigned index = stage_prog_data->binding_table.ssbo_start + - const_uniform_block->u[0]; + const_uniform_block->u32[0]; surf_index = brw_imm_ud(index); brw_mark_surface_used(prog_data, index); } else { @@ -2885,7 +2885,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg offset_reg; nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); if (const_offset) { - offset_reg = brw_imm_ud(const_offset->u[0] + 4 * first_component); + offset_reg = brw_imm_ud(const_offset->u32[0] + 4 * first_component); } else { offset_reg = vgrf(glsl_type::uint_type); bld.ADD(offset_reg, @@ -2913,7 +2913,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); assert(const_offset && "Indirect output stores not allowed"); - new_dest = offset(new_dest, bld, const_offset->u[0]); + new_dest = offset(new_dest, bld, const_offset->u32[0]); for (unsigned j = 0; j < instr->num_components; j++) { bld.MOV(offset(new_dest, bld, j), offset(src, bld, j)); @@ -2954,7 +2954,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_get_buffer_size: { nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); - unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0; + unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0; int reg_width = dispatch_width / 8; /* Set LOD = 0 */ @@ -3005,7 +3005,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); if (const_surface) { unsigned surf_index = stage_prog_data->binding_table.ssbo_start + - const_surface->u[0]; + const_surface->u32[0]; surface = brw_imm_ud(surf_index); brw_mark_surface_used(prog_data, surf_index); } else { @@ -3134,7 +3134,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) nir_const_value *const_offset = nir_src_as_const_value(instr->src[i].src); if (const_offset) { - tex_offset = brw_imm_ud(brw_texture_offset(const_offset->i, 3)); + tex_offset = brw_imm_ud(brw_texture_offset(const_offset->i32, 3)); } else { tex_offset = retype(src, BRW_REGISTER_TYPE_D); } diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 941920a..ab6000b 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -77,7 +77,7 @@ add_const_offset_to_base_block(nir_block *block, void *closure) nir_const_value *const_offset = nir_src_as_const_value(*offset); if (const_offset) { - intrin->const_index[0] += const_offset->u[0]; + intrin->const_index[0] += const_offset->u32[0]; b->cursor = nir_before_instr(&intrin->instr); nir_instr_rewrite_src(&intrin->instr, offset, nir_src_for_ssa(nir_imm_int(b, 0))); @@ -175,7 +175,7 @@ remap_patch_urb_offsets(nir_block *block, void *closure) if (vertex) { nir_const_value *const_vertex = nir_src_as_const_value(*vertex); if (const_vertex) { - intrin->const_index[0] += const_vertex->u[0] * + intrin->const_index[0] += const_vertex->u32[0] * state->vue_map->num_per_vertex_slots; } else { state->b.cursor = nir_before_instr(&intrin->instr); @@ -623,12 +623,24 @@ brw_type_for_nir_type(nir_alu_type type) { switch (type) { case nir_type_uint: + case nir_type_uint32: return BRW_REGISTER_TYPE_UD; case nir_type_bool: case nir_type_int: + case nir_type_bool32: + case nir_type_int32: return BRW_REGISTER_TYPE_D; case nir_type_float: + case nir_type_float32: return BRW_REGISTER_TYPE_F; + case nir_type_float64: + return BRW_REGISTER_TYPE_DF; + case nir_type_int64: + case nir_type_uint64: + /* TODO we should only see these in moves, so for now it's ok, but when + * we add actual 64-bit integer support we should fix this. + */ + return BRW_REGISTER_TYPE_DF; default: unreachable("unknown type"); } @@ -644,12 +656,18 @@ brw_glsl_base_type_for_nir_type(nir_alu_type type) { switch (type) { case nir_type_float: + case nir_type_float32: return GLSL_TYPE_FLOAT; + case nir_type_float64: + return GLSL_TYPE_DOUBLE; + case nir_type_int: + case nir_type_int32: return GLSL_TYPE_INT; case nir_type_uint: + case nir_type_uint32: return GLSL_TYPE_UINT; default: diff --git a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c index 56e15ef..22eeb1a 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c +++ b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c @@ -165,7 +165,7 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state) } default: - if (nir_op_infos[alu->op].output_type == nir_type_bool) { + if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_bool) { /* This instructions will turn into a CMP when we actually emit * them so the result will have to be resolved before it can be * used. @@ -225,7 +225,7 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state) * have to worry about resolving them. */ instr->pass_flags &= ~BRW_NIR_BOOLEAN_MASK; - if (load->value.u[0] == NIR_TRUE || load->value.u[0] == NIR_FALSE) { + if (load->value.u32[0] == NIR_TRUE || load->value.u32[0] == NIR_FALSE) { instr->pass_flags |= BRW_NIR_BOOLEAN_NO_RESOLVE; } else { instr->pass_flags |= BRW_NIR_NON_BOOLEAN; diff --git a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c index 5ff2cba..6e8b1f9 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c +++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c @@ -168,7 +168,9 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state) if (add->op != nir_op_fadd) continue; - /* TODO: Maybe bail if this expression is considered "precise"? */ + assert(add->dest.dest.is_ssa); + if (add->exact) + continue; assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa); @@ -201,6 +203,8 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state) if (mul == NULL) continue; + unsigned bit_size = add->dest.dest.ssa.bit_size; + nir_ssa_def *mul_src[2]; mul_src[0] = mul->src[0].src.ssa; mul_src[1] = mul->src[1].src.ssa; @@ -220,7 +224,7 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state) nir_op_fabs); abs->src[0].src = nir_src_for_ssa(mul_src[i]); nir_ssa_dest_init(&abs->instr, &abs->dest.dest, - mul_src[i]->num_components, NULL); + mul_src[i]->num_components, bit_size, NULL); abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1; nir_instr_insert_before(&add->instr, &abs->instr); mul_src[i] = &abs->dest.dest.ssa; @@ -232,7 +236,7 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state) nir_op_fneg); neg->src[0].src = nir_src_for_ssa(mul_src[0]); nir_ssa_dest_init(&neg->instr, &neg->dest.dest, - mul_src[0]->num_components, NULL); + mul_src[0]->num_components, bit_size, NULL); neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1; nir_instr_insert_before(&add->instr, &neg->instr); mul_src[0] = &neg->dest.dest.ssa; @@ -253,6 +257,7 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state) nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest, add->dest.dest.ssa.num_components, + bit_size, add->dest.dest.ssa.name); nir_ssa_def_rewrite_uses(&add->dest.dest.ssa, nir_src_for_ssa(&ffma->dest.dest.ssa)); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 6b85eac..783af78 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -34,6 +34,7 @@ #define BRW_STATE_H #include "brw_context.h" +#include "brw_defines.h" #ifdef __cplusplus extern "C" { @@ -406,6 +407,59 @@ void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw); void gen7_restore_default_l3_config(struct brw_context *brw); +static inline bool +is_drawing_points(const struct brw_context *brw) +{ + /* Determine if the primitives *reaching the SF* are points */ + /* _NEW_POLYGON */ + if (brw->ctx.Polygon.FrontMode == GL_POINT || + brw->ctx.Polygon.BackMode == GL_POINT) { + return true; + } + + if (brw->geometry_program) { + /* BRW_NEW_GEOMETRY_PROGRAM */ + return brw->geometry_program->OutputType == GL_POINTS; + } else if (brw->tes.prog_data) { + /* BRW_NEW_TES_PROG_DATA */ + return brw->tes.prog_data->output_topology == + BRW_TESS_OUTPUT_TOPOLOGY_POINT; + } else { + /* BRW_NEW_PRIMITIVE */ + return brw->primitive == _3DPRIM_POINTLIST; + } +} + +static inline bool +is_drawing_lines(const struct brw_context *brw) +{ + /* Determine if the primitives *reaching the SF* are points */ + /* _NEW_POLYGON */ + if (brw->ctx.Polygon.FrontMode == GL_LINE || + brw->ctx.Polygon.BackMode == GL_LINE) { + return true; + } + + if (brw->geometry_program) { + /* BRW_NEW_GEOMETRY_PROGRAM */ + return brw->geometry_program->OutputType == GL_LINE_STRIP; + } else if (brw->tes.prog_data) { + /* BRW_NEW_TES_PROG_DATA */ + return brw->tes.prog_data->output_topology == + BRW_TESS_OUTPUT_TOPOLOGY_LINE; + } else { + /* BRW_NEW_PRIMITIVE */ + switch (brw->primitive) { + case _3DPRIM_LINELIST: + case _3DPRIM_LINESTRIP: + case _3DPRIM_LINELOOP: + return true; + } + } + return false; +} + + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 4666788..b7b0a86 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -423,11 +423,12 @@ static void gen7_dump_sampler_state(struct brw_context *brw, GET_BITS(samp[1], 15, 8) ); batch_out(brw, name, offset, i+2, "Border Color\n"); /* FINISHME: gen8+ */ - batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] Address Control: %s|%s|%s\n", + batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] Address Control: %s|%s|%s, %snormalized coords\n", (GET_FIELD(samp[3], BRW_SAMPLER_MAX_ANISOTROPY) + 1) * 2, sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCX_WRAP_MODE)], sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCY_WRAP_MODE)], - sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCZ_WRAP_MODE)] + sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCZ_WRAP_MODE)], + (samp[3] & GEN7_SAMPLER_NON_NORMALIZED_COORDINATES) ? "non-" : "" ); samp += 4; diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index 1f27e98..3e9a6ee 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -34,6 +34,7 @@ #define BRW_UTIL_H #include "brw_context.h" +#include "main/framebuffer.h" extern GLuint brw_translate_blend_factor( GLenum factor ); extern GLuint brw_translate_blend_equation( GLenum mode ); @@ -49,13 +50,13 @@ brw_get_line_width(struct brw_context *brw) * implementation-dependent maximum non-antialiased line width." */ float line_width = - CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag + CLAMP(!_mesa_is_multisample_enabled(&brw->ctx) && !brw->ctx.Line.SmoothFlag ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width, 0.0f, brw->ctx.Const.MaxLineWidth); uint32_t line_width_u3_7 = U_FIXED(line_width, 7); /* Line width of 0 is not allowed when MSAA enabled */ - if (brw->ctx.Multisample._Enabled) { + if (_mesa_is_multisample_enabled(&brw->ctx)) { if (line_width_u3_7 == 0) line_width_u3_7 = 1; } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5f) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 65e57ba..0025343 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -693,17 +693,6 @@ vec4_visitor::opt_algebraic() break; } break; - case SHADER_OPCODE_RCP: { - vec4_instruction *prev = (vec4_instruction *)inst->prev; - if (prev->opcode == SHADER_OPCODE_SQRT) { - if (inst->src[0].equals(src_reg(prev->dst))) { - inst->opcode = SHADER_OPCODE_RSQ; - inst->src[0] = prev->src[0]; - progress = true; - } - } - break; - } case SHADER_OPCODE_BROADCAST: if (is_uniform(inst->src[0]) || inst->src[1].is_zero()) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp index d9c048e..e915aee 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp @@ -70,8 +70,8 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* Make up a type...we have no way of knowing... */ const glsl_type *const type = glsl_type::ivec(instr->num_components); - src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u[0] + - instr->const_index[0] + offset->u[0], + src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] + + instr->const_index[0] + offset->u32[0], type); /* gl_PointSize is passed in the .w component of the VUE header */ if (instr->const_index[0] == VARYING_SLOT_PSIZ) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 4686f20..7c06f92 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -343,7 +343,7 @@ vec4_visitor::get_indirect_offset(nir_intrinsic_instr *instr) * add_const_offset_to_base() will fold other constant offsets * into instr->const_index[0]. */ - assert(const_value->u[0] == 0); + assert(const_value->u32[0] == 0); return src_reg(); } @@ -369,13 +369,13 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr) continue; for (unsigned j = i; j < instr->def.num_components; j++) { - if (instr->value.u[i] == instr->value.u[j]) { + if (instr->value.u32[i] == instr->value.u32[j]) { writemask |= 1 << j; } } reg.writemask = writemask; - emit(MOV(reg, brw_imm_d(instr->value.i[i]))); + emit(MOV(reg, brw_imm_d(instr->value.i32[i]))); remaining &= ~writemask; } @@ -400,7 +400,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* We set EmitNoIndirectInput for VS */ assert(const_offset); - src = src_reg(ATTR, instr->const_index[0] + const_offset->u[0], + src = src_reg(ATTR, instr->const_index[0] + const_offset->u32[0], glsl_type::uvec4_type); dest = get_nir_dest(instr->dest, src.type); @@ -414,7 +414,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); assert(const_offset); - int varying = instr->const_index[0] + const_offset->u[0]; + int varying = instr->const_index[0] + const_offset->u32[0]; src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, instr->num_components); @@ -425,7 +425,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_get_buffer_size: { nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); - unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0; + unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0; const unsigned index = prog_data->base.binding_table.ssbo_start + ssbo_index; @@ -458,7 +458,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_src_as_const_value(instr->src[1]); if (const_uniform_block) { unsigned index = prog_data->base.binding_table.ssbo_start + - const_uniform_block->u[0]; + const_uniform_block->u32[0]; surf_index = brw_imm_ud(index); brw_mark_surface_used(&prog_data->base, index); } else { @@ -476,7 +476,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg offset_reg; nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); if (const_offset) { - offset_reg = brw_imm_ud(const_offset->u[0]); + offset_reg = brw_imm_ud(const_offset->u32[0]); } else { offset_reg = get_nir_src(instr->src[2], 1); } @@ -596,7 +596,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg surf_index; if (const_uniform_block) { unsigned index = prog_data->base.binding_table.ssbo_start + - const_uniform_block->u[0]; + const_uniform_block->u32[0]; surf_index = brw_imm_ud(index); brw_mark_surface_used(&prog_data->base, index); @@ -617,7 +617,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg offset_reg; nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); if (const_offset) { - offset_reg = brw_imm_ud(const_offset->u[0]); + offset_reg = brw_imm_ud(const_offset->u32[0]); } else { offset_reg = get_nir_src(instr->src[1], 1); } @@ -697,8 +697,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); if (const_offset) { /* Offsets are in bytes but they should always be multiples of 16 */ - assert(const_offset->u[0] % 16 == 0); - src.reg_offset = const_offset->u[0] / 16; + assert(const_offset->u32[0] % 16 == 0); + src.reg_offset = const_offset->u32[0] / 16; emit(MOV(dest, src)); } else { @@ -760,7 +760,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) * as an immediate. */ const unsigned index = prog_data->base.binding_table.ubo_start + - const_block_index->u[0]; + const_block_index->u32[0]; surf_index = brw_imm_ud(index); brw_mark_surface_used(&prog_data->base, index); } else { @@ -785,7 +785,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg offset; nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); if (const_offset) { - offset = brw_imm_ud(const_offset->u[0] & ~15); + offset = brw_imm_ud(const_offset->u32[0] & ~15); } else { offset = get_nir_src(instr->src[1], nir_type_int, 1); } @@ -800,10 +800,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) packed_consts.swizzle = brw_swizzle_for_size(instr->num_components); if (const_offset) { - packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u[0] % 16 / 4, - const_offset->u[0] % 16 / 4, - const_offset->u[0] % 16 / 4, - const_offset->u[0] % 16 / 4); + packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u32[0] % 16 / 4, + const_offset->u32[0] % 16 / 4, + const_offset->u32[0] % 16 / 4, + const_offset->u32[0] % 16 / 4); } emit(MOV(dest, packed_consts)); @@ -845,7 +845,7 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr) nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); if (const_surface) { unsigned surf_index = prog_data->base.binding_table.ssbo_start + - const_surface->u[0]; + const_surface->u32[0]; surface = brw_imm_ud(surf_index); brw_mark_surface_used(&prog_data->base, surf_index); } else { @@ -1042,12 +1042,12 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) * operand. If we can determine that one of the args is in the low * 16 bits, though, we can just emit a single MUL. */ - if (value0 && value0->u[0] < (1 << 16)) { + if (value0 && value0->u32[0] < (1 << 16)) { if (devinfo->gen < 7) emit(MUL(dst, op[0], op[1])); else emit(MUL(dst, op[1], op[0])); - } else if (value1 && value1->u[0] < (1 << 16)) { + } else if (value1 && value1->u32[0] < (1 << 16)) { if (devinfo->gen < 7) emit(MUL(dst, op[1], op[0])); else @@ -1793,7 +1793,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr) nir_const_value *const_offset = nir_src_as_const_value(instr->src[i].src); if (const_offset) { - constant_offset = brw_texture_offset(const_offset->i, 3); + constant_offset = brw_texture_offset(const_offset->i32, 3); } else { offset_value = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index f344eaa..0ce48b8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -353,7 +353,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]); src_reg vertex_index = - vertex_const ? src_reg(brw_imm_ud(vertex_const->u[0])) + vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0])) : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); @@ -400,6 +400,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) { dst.type = BRW_REGISTER_TYPE_F; + unsigned swiz = BRW_SWIZZLE_WZYX; /* This is a read of gl_TessLevelOuter[], which lives in the * high 4 DWords of the Patch URB header, in reverse order. @@ -412,6 +413,8 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) dst.writemask = WRITEMASK_XYZ; break; case GL_ISOLINES: + /* Isolines are not reversed; swizzle .zw -> .xy */ + swiz = BRW_SWIZZLE_ZWZW; dst.writemask = WRITEMASK_XY; return; default: @@ -420,7 +423,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) dst_reg tmp(this, glsl_type::vec4_type); emit_output_urb_read(tmp, 1, src_reg()); - emit(MOV(dst, swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); + emit(MOV(dst, swizzle(src_reg(tmp), swiz))); } else { emit_output_urb_read(dst, imm_offset, indirect_offset); } @@ -473,8 +476,15 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) * Patch URB Header at DWords 4-7. However, it's reversed, so * instead of .xyzw we have .wzyx. */ - swiz = BRW_SWIZZLE_WZYX; - mask = writemask_for_backwards_vector(mask); + if (key->tes_primitive_mode == GL_ISOLINES) { + /* Isolines .xy should be stored in .zw, in order. */ + swiz = BRW_SWIZZLE4(0, 0, 0, 1); + mask <<= 2; + } else { + /* Other domains are reversed; store .wzyx instead of .xyzw. */ + swiz = BRW_SWIZZLE_WZYX; + mask = writemask_for_backwards_vector(mask); + } } emit_urb_write(swizzle(value, swiz), mask, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp index e3c23f1..7ba494f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp @@ -149,9 +149,15 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg(brw_vec8_grf(1, 0)))); break; case nir_intrinsic_load_tess_level_outer: - emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), - swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), - BRW_SWIZZLE_WZYX))); + if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), + BRW_SWIZZLE_ZWZW))); + } else { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), + BRW_SWIZZLE_WZYX))); + } break; case nir_intrinsic_load_tess_level_inner: if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index cee139b..f5a7d4d 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -198,14 +198,14 @@ gen6_upload_blend_state(struct brw_context *brw) if(!is_buffer_zero_integer_format) { /* _NEW_MULTISAMPLE */ blend[b].blend1.alpha_to_coverage = - ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToCoverage; + _mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToCoverage; /* From SandyBridge PRM, volume 2 Part 1, section 8.2.3, BLEND_STATE: * DWord 1, Bit 30 (AlphaToOne Enable): * "If Dual Source Blending is enabled, this bit must be disabled" */ WARN_ONCE(ctx->Color.Blend[b]._UsesDualSrc && - ctx->Multisample._Enabled && + _mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToOne, "HW workaround: disabling alpha to one with dual src " "blending\n"); @@ -213,7 +213,7 @@ gen6_upload_blend_state(struct brw_context *brw) blend[b].blend1.alpha_to_one = false; else blend[b].blend1.alpha_to_one = - ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToOne; + _mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToOne; blend[b].blend1.alpha_to_coverage_dither = (brw->gen >= 7); } diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index 9a29366..004eceb 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -174,12 +174,14 @@ upload_clip_state(struct brw_context *brw) else enable = GEN6_CLIP_ENABLE; + if (!is_drawing_points(brw) && !is_drawing_lines(brw)) + dw2 |= GEN6_CLIP_XY_TEST; + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); OUT_BATCH(dw1); OUT_BATCH(enable | GEN6_CLIP_MODE_NORMAL | - GEN6_CLIP_XY_TEST | dw2); OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | @@ -195,7 +197,9 @@ const struct brw_tracked_state gen6_clip_state = { _NEW_TRANSFORM, .brw = BRW_NEW_CONTEXT | BRW_NEW_FS_PROG_DATA | + BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_META_IN_PROGRESS | + BRW_NEW_PRIMITIVE | BRW_NEW_RASTERIZER_DISCARD, }, .emit = upload_clip_state, @@ -209,7 +213,9 @@ const struct brw_tracked_state gen7_clip_state = { _NEW_TRANSFORM, .brw = BRW_NEW_CONTEXT | BRW_NEW_FS_PROG_DATA | + BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_META_IN_PROGRESS | + BRW_NEW_PRIMITIVE | BRW_NEW_RASTERIZER_DISCARD, }, .emit = upload_clip_state, diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c index 8eb620d..fcd313a 100644 --- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c +++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c @@ -171,7 +171,7 @@ gen6_determine_sample_mask(struct brw_context *brw) /* BRW_NEW_NUM_SAMPLES */ unsigned num_samples = brw->num_samples; - if (ctx->Multisample._Enabled) { + if (_mesa_is_multisample_enabled(ctx)) { if (ctx->Multisample.SampleCoverage) { coverage = ctx->Multisample.SampleCoverageValue; coverage_invert = ctx->Multisample.SampleCoverageInvert; diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index 17b4a7f..a206732 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -58,10 +58,10 @@ gen6_upload_scissor_state(struct brw_context *brw) for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { int bbox[4]; - bbox[0] = 0; - bbox[1] = fb_width; - bbox[2] = 0; - bbox[3] = fb_height; + bbox[0] = MAX2(ctx->ViewportArray[i].X, 0); + bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width); + bbox[2] = MAX2(ctx->ViewportArray[i].Y, 0); + bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height); _mesa_intersect_scissor_bounding_box(ctx, i, bbox); if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) { diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 2634e6b..42f9a5c 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -147,26 +147,6 @@ get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset, } -static bool -is_drawing_points(const struct brw_context *brw) -{ - /* Determine if the primitives *reaching the SF* are points */ - /* _NEW_POLYGON */ - if (brw->ctx.Polygon.FrontMode == GL_POINT || - brw->ctx.Polygon.BackMode == GL_POINT) { - return true; - } - - if (brw->geometry_program) { - /* BRW_NEW_GEOMETRY_PROGRAM */ - return brw->geometry_program->OutputType == GL_POINTS; - } else { - /* BRW_NEW_PRIMITIVE */ - return brw->primitive == _3DPRIM_POINTLIST; - } -} - - /** * Create the mapping from the FS inputs we produce to the previous pipeline * stage (GS or VS) outputs they source from. @@ -216,8 +196,10 @@ calculate_attr_overrides(const struct brw_context *brw, * This is not required on Haswell, as the hardware ignores this state * when drawing non-points -- although we do still need to be careful to * correctly set the attr overrides. + * + * _NEW_POLYGON + * BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA */ - /* BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM */ bool drawing_points = is_drawing_points(brw); /* Initialize all the attr_overrides to 0. In the loop below we'll modify @@ -369,8 +351,9 @@ upload_sf_state(struct brw_context *brw) unreachable("not reached"); } - /* _NEW_SCISSOR */ - if (ctx->Scissor.EnableFlags) + /* _NEW_SCISSOR _NEW_POLYGON BRW_NEW_GEOMETRY_PROGRAM BRW_NEW_PRIMITIVE */ + if (ctx->Scissor.EnableFlags || + is_drawing_points(brw) || is_drawing_lines(brw)) dw3 |= GEN6_SF_SCISSOR_ENABLE; /* _NEW_POLYGON */ @@ -484,6 +467,7 @@ const struct brw_tracked_state gen6_sf_state = { BRW_NEW_FS_PROG_DATA | BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_PRIMITIVE | + BRW_NEW_TES_PROG_DATA | BRW_NEW_VUE_MAP_GEOM_OUT, }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index b1f13ac..7c98c73 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -188,8 +188,9 @@ upload_sf_state(struct brw_context *brw) dw2 |= GEN6_SF_CULL_NONE; } - /* _NEW_SCISSOR */ - if (ctx->Scissor.EnableFlags) + /* _NEW_SCISSOR _NEW_POLYGON BRW_NEW_GEOMETRY_PROGRAM BRW_NEW_PRIMITIVE */ + if (ctx->Scissor.EnableFlags || + is_drawing_points(brw) || is_drawing_lines(brw)) dw2 |= GEN6_SF_SCISSOR_ENABLE; /* _NEW_LINE */ @@ -254,7 +255,8 @@ const struct brw_tracked_state gen7_sf_state = { _NEW_POLYGON | _NEW_PROGRAM | _NEW_SCISSOR, - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_CONTEXT | + BRW_NEW_PRIMITIVE, }, .emit = upload_sf_state, }; diff --git a/src/mesa/drivers/dri/i965/gen8_blend_state.c b/src/mesa/drivers/dri/i965/gen8_blend_state.c index 786c79a..63186bd 100644 --- a/src/mesa/drivers/dri/i965/gen8_blend_state.c +++ b/src/mesa/drivers/dri/i965/gen8_blend_state.c @@ -65,7 +65,7 @@ gen8_upload_blend_state(struct brw_context *brw) if (rb_zero_type != GL_INT && rb_zero_type != GL_UNSIGNED_INT) { /* _NEW_MULTISAMPLE */ - if (ctx->Multisample._Enabled) { + if (_mesa_is_multisample_enabled(ctx)) { if (ctx->Multisample.SampleAlphaToCoverage) { blend[0] |= GEN8_BLEND_ALPHA_TO_COVERAGE_ENABLE; blend[0] |= GEN8_BLEND_ALPHA_TO_COVERAGE_DITHER_ENABLE; @@ -183,7 +183,7 @@ gen8_upload_blend_state(struct brw_context *brw) * "If Dual Source Blending is enabled, this bit must be disabled." */ WARN_ONCE(ctx->Color.Blend[i]._UsesDualSrc && - ctx->Multisample._Enabled && + _mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToOne, "HW workaround: disabling alpha to one with dual src " "blending\n"); @@ -226,7 +226,7 @@ gen8_upload_ps_blend(struct brw_context *brw) dw1 |= GEN8_PS_BLEND_ALPHA_TEST_ENABLE; /* _NEW_MULTISAMPLE */ - if (ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToCoverage) + if (_mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToCoverage) dw1 |= GEN8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE; /* Used for implementing the following bit of GL_EXT_texture_integer: diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index 93100a0..8aaa1a8 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -29,6 +29,7 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_wm.h" +#include "main/framebuffer.h" /** * Helper function to emit depth related command packets. @@ -303,7 +304,7 @@ pma_fix_enable(const struct brw_context *brw) const bool kill_pixel = brw->wm.prog_data->uses_kill || brw->wm.prog_data->uses_omask || - (ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToCoverage) || + (_mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToCoverage) || ctx->Color.AlphaEnabled; /* The big formula in CACHE_MODE_1::NP PMA FIX ENABLE. */ diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index 8b6f31f..2ac21f7 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -178,7 +178,7 @@ upload_sf(struct brw_context *brw) dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH; /* _NEW_POINT | _NEW_MULTISAMPLE */ - if ((ctx->Point.SmoothFlag || ctx->Multisample._Enabled) && + if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) && !ctx->Point.PointSprite) { dw3 |= GEN8_SF_SMOOTH_POINT_ENABLE; } @@ -249,7 +249,7 @@ upload_raster(struct brw_context *brw) if (ctx->Point.SmoothFlag) dw1 |= GEN8_RASTER_SMOOTH_POINT_ENABLE; - if (ctx->Multisample._Enabled) + if (_mesa_is_multisample_enabled(ctx)) dw1 |= GEN8_RASTER_API_MULTISAMPLE_ENABLE; if (ctx->Polygon.OffsetFill) diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c index 08b7623..ccb82b6 100644 --- a/src/mesa/drivers/dri/i965/intel_copy_image.c +++ b/src/mesa/drivers/dri/i965/intel_copy_image.c @@ -140,9 +140,9 @@ copy_image_with_memcpy(struct brw_context *brw, _mesa_get_format_block_size(src_mt->format, &src_bw, &src_bh); assert(src_width % src_bw == 0); - assert(src_height % src_bw == 0); + assert(src_height % src_bh == 0); assert(src_x % src_bw == 0); - assert(src_y % src_bw == 0); + assert(src_y % src_bh == 0); /* If we are on the same miptree, same level, and same slice, then * intel_miptree_map won't let us map it twice. We have to do things a @@ -153,7 +153,7 @@ copy_image_with_memcpy(struct brw_context *brw, if (same_slice) { assert(dst_x % src_bw == 0); - assert(dst_y % src_bw == 0); + assert(dst_y % src_bh == 0); map_x1 = MIN2(src_x, dst_x); map_y1 = MIN2(src_y, dst_y); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 6c233d8..9e84abb 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -2172,7 +2172,8 @@ intel_miptree_updownsample(struct brw_context *brw, src->logical_width0, src->logical_height0, 0, 0, dst->logical_width0, dst->logical_height0, - GL_NEAREST, false, false /*mirror x, y*/); + GL_NEAREST, false, false /*mirror x, y*/, + false, false); } else if (src->format == MESA_FORMAT_S_UINT8) { brw_meta_stencil_updownsample(brw, src, dst); } else { @@ -2194,7 +2195,8 @@ intel_miptree_updownsample(struct brw_context *brw, src->logical_width0, src->logical_height0, 0, 0, dst->logical_width0, dst->logical_height0, - GL_NEAREST, false, false /*mirror x, y*/); + GL_NEAREST, false, false /*mirror x, y*/, + false, false /* decode/encode srgb */); } } |