summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2016-03-24 17:30:14 -0700
committerJason Ekstrand <jason.ekstrand@intel.com>2016-03-24 17:30:14 -0700
commit2c3f95d6aaab38cd66dd3dee1b089d5c91928eea (patch)
tree43423daf0da9c45f4054c5763a87f33dbfc7c4d5 /src
parenta5dc3c0f02aa523d1d3d123b62b9a187821079fe (diff)
parent22b343a8ec75a08dae6a6badbb261eab8437475d (diff)
downloadexternal_mesa3d-2c3f95d6aaab38cd66dd3dee1b089d5c91928eea.zip
external_mesa3d-2c3f95d6aaab38cd66dd3dee1b089d5c91928eea.tar.gz
external_mesa3d-2c3f95d6aaab38cd66dd3dee1b089d5c91928eea.tar.bz2
Merge remote-tracking branch 'public/master' into vulkan
Diffstat (limited to 'src')
-rw-r--r--src/compiler/Makefile.sources1
-rw-r--r--src/compiler/glsl/Makefile.sources1
-rw-r--r--src/compiler/glsl/ast_to_hir.cpp4
-rw-r--r--src/compiler/glsl/glsl_parser_extras.cpp1
-rw-r--r--src/compiler/glsl/ir.h7
-rw-r--r--src/compiler/glsl/ir_optimization.h4
-rw-r--r--src/compiler/glsl/link_varyings.cpp210
-rw-r--r--src/compiler/glsl/lower_packed_varyings.cpp34
-rw-r--r--src/compiler/glsl/opt_algebraic.cpp19
-rw-r--r--src/compiler/glsl/opt_rebalance_tree.cpp16
-rw-r--r--src/compiler/glsl/propagate_invariance.cpp125
-rw-r--r--src/compiler/nir/glsl_to_nir.cpp25
-rw-r--r--src/compiler/nir/nir.c33
-rw-r--r--src/compiler/nir/nir.h77
-rw-r--r--src/compiler/nir/nir_algebraic.py31
-rw-r--r--src/compiler/nir/nir_builder.h62
-rw-r--r--src/compiler/nir/nir_clone.c20
-rw-r--r--src/compiler/nir/nir_constant_expressions.h2
-rw-r--r--src/compiler/nir/nir_constant_expressions.py246
-rw-r--r--src/compiler/nir/nir_from_ssa.c6
-rw-r--r--src/compiler/nir/nir_gs_count_vertices.c4
-rw-r--r--src/compiler/nir/nir_instr_set.c24
-rw-r--r--src/compiler/nir/nir_lower_alu_to_scalar.c11
-rw-r--r--src/compiler/nir/nir_lower_atomics.c12
-rw-r--r--src/compiler/nir/nir_lower_clip.c2
-rw-r--r--src/compiler/nir/nir_lower_indirect_derefs.c6
-rw-r--r--src/compiler/nir/nir_lower_io.c6
-rw-r--r--src/compiler/nir/nir_lower_load_const_to_scalar.c2
-rw-r--r--src/compiler/nir/nir_lower_locals_to_regs.c9
-rw-r--r--src/compiler/nir/nir_lower_phis_to_scalar.c10
-rw-r--r--src/compiler/nir/nir_lower_system_values.c6
-rw-r--r--src/compiler/nir/nir_lower_tex.c8
-rw-r--r--src/compiler/nir/nir_lower_two_sided_color.c2
-rw-r--r--src/compiler/nir/nir_lower_var_copies.c5
-rw-r--r--src/compiler/nir/nir_lower_vars_to_ssa.c5
-rw-r--r--src/compiler/nir/nir_opcodes.py138
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py151
-rw-r--r--src/compiler/nir/nir_opt_constant_folding.c31
-rw-r--r--src/compiler/nir/nir_opt_dead_cf.c2
-rw-r--r--src/compiler/nir/nir_opt_peephole_select.c3
-rw-r--r--src/compiler/nir/nir_phi_builder.c65
-rw-r--r--src/compiler/nir/nir_phi_builder.h33
-rw-r--r--src/compiler/nir/nir_print.c4
-rw-r--r--src/compiler/nir/nir_repair_ssa.c3
-rw-r--r--src/compiler/nir/nir_search.c254
-rw-r--r--src/compiler/nir/nir_search.h14
-rw-r--r--src/compiler/nir/nir_to_ssa.c8
-rw-r--r--src/compiler/nir/nir_validate.c42
-rw-r--r--src/compiler/nir/spirv/spirv_to_nir.c36
-rw-r--r--src/compiler/nir/spirv/vtn_glsl450.c4
-rw-r--r--src/compiler/nir/spirv/vtn_variables.c11
-rw-r--r--src/compiler/nir_types.h21
-rw-r--r--src/egl/main/eglconfig.c1
-rw-r--r--src/egl/main/egldefines.h7
-rw-r--r--src/egl/main/eglsurface.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c10
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c6
-rw-r--r--src/gallium/auxiliary/hud/hud_context.c1
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c16
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.h2
-rw-r--r--src/gallium/auxiliary/postprocess/pp_colors.h3
-rw-r--r--src/gallium/auxiliary/postprocess/pp_mlaa.h6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c12
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c9
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h25
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c25
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h10
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_strings.c1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c14
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_transform.h51
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c44
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h4
-rw-r--r--src/gallium/auxiliary/util/u_pstipple.c10
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c3
-rw-r--r--src/gallium/docs/source/tgsi.rst8
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c10
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp93
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp18
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp39
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp63
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp9
-rw-r--r--src/gallium/drivers/nouveau/nouveau_compiler.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.c145
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c34
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h77
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c1
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c10
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_shader_state.c37
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c58
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c137
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c40
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_tex.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c14
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_winsys.h4
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.c13
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h25
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c16
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c14
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c28
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c9
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c14
-rw-r--r--src/gallium/drivers/r300/r300_context.c2
-rw-r--r--src/gallium/drivers/r300/r300_flush.c6
-rw-r--r--src/gallium/drivers/r300/r300_texture.c6
-rw-r--r--src/gallium/drivers/r600/Makefile.am8
-rw-r--r--src/gallium/drivers/r600/Makefile.sources4
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c65
-rw-r--r--src/gallium/drivers/r600/evergreen_compute_internal.h4
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c11
-rw-r--r--src/gallium/drivers/r600/r600_llvm.c943
-rw-r--r--src/gallium/drivers/r600/r600_llvm.h42
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c9
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h9
-rw-r--r--src/gallium/drivers/r600/r600_shader.c280
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c23
-rw-r--r--src/gallium/drivers/r600/sb/sb_expr.cpp8
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c21
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h8
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c38
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm.h17
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c3
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.c4
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.c4
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h13
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c36
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c226
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h7
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c675
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h4
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c87
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h19
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c41
-rw-r--r--src/gallium/drivers/svga/svga_context.c1
-rw-r--r--src/gallium/drivers/svga/svga_context.h52
-rw-r--r--src/gallium/drivers/svga/svga_draw.c8
-rw-r--r--src/gallium/drivers/svga/svga_pipe_misc.c7
-rw-r--r--src/gallium/drivers/svga/svga_pipe_query.c50
-rw-r--r--src/gallium/drivers/svga/svga_pipe_streamout.c19
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.c2
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.c6
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.c4
-rw-r--r--src/gallium/drivers/svga/svga_screen.c10
-rw-r--r--src/gallium/drivers/svga/svga_shader.c16
-rw-r--r--src/gallium/drivers/svga/svga_shader.h3
-rw-r--r--src/gallium/drivers/svga/svga_state_constants.c21
-rw-r--r--src/gallium/drivers/svga/svga_streamout.h3
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_decl_sm30.c20
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_emit.h2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_vgpu10.c78
-rw-r--r--src/gallium/drivers/svga/svga_winsys.h4
-rw-r--r--src/gallium/drivers/swr/swr_context.cpp16
-rw-r--r--src/gallium/drivers/swr/swr_resource.h18
-rw-r--r--src/gallium/drivers/swr/swr_screen.cpp8
-rw-r--r--src/gallium/drivers/swr/swr_screen.h1
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp10
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_blend.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c12
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c16
-rw-r--r--src/gallium/include/pipe/p_defines.h8
-rw-r--r--src/gallium/include/pipe/p_shader_tokens.h22
-rw-r--r--src/gallium/include/state_tracker/drm_driver.h10
-rw-r--r--src/gallium/state_trackers/dri/dri2.c2
-rw-r--r--src/gallium/state_trackers/omx/vid_dec.c21
-rw-r--r--src/gallium/state_trackers/omx/vid_dec.h7
-rw-r--r--src/gallium/state_trackers/omx/vid_dec_h264.c26
-rw-r--r--src/gallium/state_trackers/omx/vid_dec_mpeg12.c6
-rw-r--r--src/gallium/state_trackers/omx/vid_enc.c44
-rw-r--r--src/gallium/tests/graw/quad-tex.c1
-rw-r--r--src/gallium/tests/graw/tex-srgb.c1
-rw-r--r--src/gallium/tests/graw/tex-swizzle.c1
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.c10
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_cs.c6
-rw-r--r--src/gallium/winsys/radeon/drm/Makefile.am2
-rw-r--r--src/gallium/winsys/radeon/drm/Makefile.sources4
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_ctx.h205
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_bo.c11
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.c13
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.h5
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c161
-rw-r--r--src/gallium/winsys/svga/drm/vmw_screen_dri.c1
-rw-r--r--src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c3
-rw-r--r--src/intel/vulkan/anv_meta_blit.c2
-rw-r--r--src/intel/vulkan/anv_meta_blit2d.c2
-rw-r--r--src/intel/vulkan/anv_meta_resolve.c2
-rw-r--r--src/intel/vulkan/anv_nir_apply_dynamic_offsets.c7
-rw-r--r--src/intel/vulkan/anv_nir_apply_pipeline_layout.c2
-rw-r--r--src/mesa/drivers/common/meta_blit.c43
-rw-r--r--src/mesa/drivers/common/meta_copy_image.c3
-rw-r--r--src/mesa/drivers/common/meta_tex_subimage.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit.cpp21
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.c28
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp11
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp15
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp70
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h54
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp11
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp44
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp18
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_tes.cpp12
-rw-r--r--src/mesa/drivers/dri/i965/gen6_cc.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen6_clip_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen6_multisample_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_scissor_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c30
-rw-r--r--src/mesa/drivers/dri/i965/gen7_sf_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen8_blend_state.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen8_depth_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen8_sf_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_copy_image.c6
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c6
-rw-r--r--src/mesa/main/debug_output.c12
-rw-r--r--src/mesa/main/fbobject.c17
-rw-r--r--src/mesa/main/framebuffer.c19
-rw-r--r--src/mesa/main/framebuffer.h3
-rw-r--r--src/mesa/main/genmipmap.c14
-rw-r--r--src/mesa/main/glformats.c83
-rw-r--r--src/mesa/main/glformats.h7
-rw-r--r--src/mesa/main/mtypes.h1
-rw-r--r--src/mesa/main/state.c17
-rw-r--r--src/mesa/program/ir_to_mesa.cpp2
-rw-r--r--src/mesa/program/prog_parameter.c265
-rw-r--r--src/mesa/program/prog_parameter.h36
-rw-r--r--src/mesa/program/prog_statevars.c2
-rw-r--r--src/mesa/program/prog_to_nir.c10
-rw-r--r--src/mesa/state_tracker/st_atom_rasterizer.c5
-rw-r--r--src/mesa/state_tracker/st_atom_shader.c2
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.h2
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap_shader.c58
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c10
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.h2
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels_shader.c101
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.c22
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c5
-rw-r--r--src/mesa/state_tracker/st_cb_texturebarrier.c44
-rw-r--r--src/mesa/state_tracker/st_extensions.c8
-rw-r--r--src/mesa/state_tracker/st_format.c83
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp38
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c91
-rw-r--r--src/mesa/state_tracker/st_program.c3
-rw-r--r--src/mesa/swrast/s_points.c4
-rw-r--r--src/mesa/swrast/s_texture.c4
261 files changed, 4704 insertions, 3431 deletions
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index b0b8281..43377f1 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -129,6 +129,7 @@ LIBGLSL_FILES = \
glsl/opt_tree_grafting.cpp \
glsl/opt_vectorize.cpp \
glsl/program.h \
+ glsl/propagate_invariance.cpp \
glsl/s_expression.cpp \
glsl/s_expression.h
diff --git a/src/compiler/glsl/Makefile.sources b/src/compiler/glsl/Makefile.sources
index 3f537d5..970fab0 100644
--- a/src/compiler/glsl/Makefile.sources
+++ b/src/compiler/glsl/Makefile.sources
@@ -217,6 +217,7 @@ LIBGLSL_FILES = \
opt_tree_grafting.cpp \
opt_vectorize.cpp \
program.h \
+ propagate_invariance.cpp \
s_expression.cpp \
s_expression.h
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 5262bd8..35def8e 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -2125,7 +2125,9 @@ process_array_size(exec_node *node,
}
ir_constant *const size = ir->constant_expression_value();
- if (size == NULL || array_size->has_sequence_subexpression()) {
+ if (size == NULL ||
+ (state->is_version(120, 300) &&
+ array_size->has_sequence_subexpression())) {
_mesa_glsl_error(& loc, state, "array size must be a "
"constant valued expression");
return 0;
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp
index 1ac8489..5d010fd 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -1887,6 +1887,7 @@ do_common_optimization(exec_list *ir, bool linked,
OPT(do_dead_functions, ir);
OPT(do_structure_splitting, ir);
}
+ propagate_invariance(ir);
OPT(do_if_simplification, ir);
OPT(opt_flatten_nested_if_blocks, ir);
OPT(opt_conditional_discard, ir);
diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index f451967..b74d68a 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -720,6 +720,13 @@ public:
unsigned is_unmatched_generic_inout:1;
/**
+ * Is this varying used only by transform feedback?
+ *
+ * This is used by the linker to decide if its safe to pack the varying.
+ */
+ unsigned is_xfb_only:1;
+
+ /**
* If non-zero, then this variable may be packed along with other variables
* into a single varying slot, so this offset should be applied when
* accessing components. For example, an offset of 1 means that the x
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index b56413a..f9599a3 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -124,7 +124,8 @@ void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
void lower_ubo_reference(struct gl_shader *shader);
void lower_packed_varyings(void *mem_ctx,
unsigned locations_used, ir_variable_mode mode,
- unsigned gs_input_vertices, gl_shader *shader);
+ unsigned gs_input_vertices, gl_shader *shader,
+ bool disable_varying_packing, bool xfb_enabled);
bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
bool lower_vector_derefs(gl_shader *shader);
void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
@@ -138,6 +139,7 @@ bool lower_tess_level(gl_shader *shader);
bool lower_vertex_id(gl_shader *shader);
bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
+void propagate_invariance(exec_list *instructions);
ir_rvalue *
compare_index_block(exec_list *instructions, ir_variable *index,
diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index 34eb848..44fc8f6 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -826,7 +826,7 @@ namespace {
class varying_matches
{
public:
- varying_matches(bool disable_varying_packing,
+ varying_matches(bool disable_varying_packing, bool xfb_enabled,
gl_shader_stage producer_stage,
gl_shader_stage consumer_stage);
~varying_matches();
@@ -836,14 +836,30 @@ public:
void store_locations() const;
private:
+ bool is_varying_packing_safe(const glsl_type *type,
+ const ir_variable *var);
+
/**
* If true, this driver disables varying packing, so all varyings need to
* be aligned on slot boundaries, and take up a number of slots equal to
* their number of matrix columns times their array size.
+ *
+ * Packing may also be disabled because our current packing method is not
+ * safe in SSO or versions of OpenGL where interpolation qualifiers are not
+ * guaranteed to match across stages.
*/
const bool disable_varying_packing;
/**
+ * If true, this driver has transform feedback enabled. The transform
+ * feedback code requires at least some packing be done even when varying
+ * packing is disabled, fortunately where transform feedback requires
+ * packing it's safe to override the disabled setting. See
+ * is_varying_packing_safe().
+ */
+ const bool xfb_enabled;
+
+ /**
* Enum representing the order in which varyings are packed within a
* packing class.
*
@@ -862,6 +878,7 @@ private:
static unsigned compute_packing_class(const ir_variable *var);
static packing_order_enum compute_packing_order(const ir_variable *var);
static int match_comparator(const void *x_generic, const void *y_generic);
+ static int xfb_comparator(const void *x_generic, const void *y_generic);
/**
* Structure recording the relationship between a single producer output
@@ -917,9 +934,11 @@ private:
} /* anonymous namespace */
varying_matches::varying_matches(bool disable_varying_packing,
+ bool xfb_enabled,
gl_shader_stage producer_stage,
gl_shader_stage consumer_stage)
: disable_varying_packing(disable_varying_packing),
+ xfb_enabled(xfb_enabled),
producer_stage(producer_stage),
consumer_stage(consumer_stage)
{
@@ -942,6 +961,24 @@ varying_matches::~varying_matches()
/**
+ * Packing is always safe on individual arrays, structure and matices. It is
+ * also safe if the varying is only used for transform feedback.
+ */
+bool
+varying_matches::is_varying_packing_safe(const glsl_type *type,
+ const ir_variable *var)
+{
+ if (consumer_stage == MESA_SHADER_TESS_EVAL ||
+ consumer_stage == MESA_SHADER_TESS_CTRL ||
+ producer_stage == MESA_SHADER_TESS_CTRL)
+ return false;
+
+ return xfb_enabled && (type->is_array() || type->is_record() ||
+ type->is_matrix() || var->data.is_xfb_only);
+}
+
+
+/**
* Record the given producer/consumer variable pair in the list of variables
* that should later be assigned locations.
*
@@ -1020,7 +1057,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
= this->compute_packing_class(var);
this->matches[this->num_matches].packing_order
= this->compute_packing_order(var);
- if (this->disable_varying_packing) {
+ if (this->disable_varying_packing && !is_varying_packing_safe(type, var)) {
unsigned slots = type->count_attribute_slots(false);
this->matches[this->num_matches].num_components = slots * 4;
} else {
@@ -1046,37 +1083,28 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
uint64_t reserved_slots,
bool separate_shader)
{
- /* We disable varying sorting for separate shader programs for the
- * following reasons:
- *
- * 1/ All programs must sort the code in the same order to guarantee the
- * interface matching. However varying_matches::record() will change the
- * interpolation qualifier of some stages.
- *
- * 2/ GLSL version 4.50 removes the matching constrain on the interpolation
- * qualifier.
- *
- * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec:
- *
- * "The type and presence of interpolation qualifiers of variables with
- * the same name declared in all linked shaders for the same cross-stage
- * interface must match, otherwise the link command will fail.
- *
- * When comparing an output from one stage to an input of a subsequent
- * stage, the input and output don't match if their interpolation
- * qualifiers (or lack thereof) are not the same."
- *
- * "It is a link-time error if, within the same stage, the interpolation
- * qualifiers of variables of the same name do not match."
+ /* If packing has been disabled then we cannot safely sort the varyings by
+ * class as it may mean we are using a version of OpenGL where
+ * interpolation qualifiers are not guaranteed to be matching across
+ * shaders, sorting in this case could result in mismatching shader
+ * interfaces.
+ * When packing is disabled the sort orders varyings used by transform
+ * feedback first, but also depends on *undefined behaviour* of qsort to
+ * reverse the order of the varyings. See: xfb_comparator().
*/
- if (!separate_shader) {
+ if (!this->disable_varying_packing) {
/* Sort varying matches into an order that makes them easy to pack. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
&varying_matches::match_comparator);
+ } else {
+ /* Only sort varyings that are only used by transform feedback. */
+ qsort(this->matches, this->num_matches, sizeof(*this->matches),
+ &varying_matches::xfb_comparator);
}
unsigned generic_location = 0;
unsigned generic_patch_location = MAX_VARYING*4;
+ bool previous_var_xfb_only = false;
for (unsigned i = 0; i < this->num_matches; i++) {
unsigned *location = &generic_location;
@@ -1100,16 +1128,30 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
/* Advance to the next slot if this varying has a different packing
* class than the previous one, and we're not already on a slot
* boundary.
+ *
+ * Also advance to the next slot if packing is disabled. This makes sure
+ * we don't assign varyings the same locations which is possible
+ * because we still pack individual arrays, records and matrices even
+ * when packing is disabled. Note we don't advance to the next slot if
+ * we can pack varyings together that are only used for transform
+ * feedback.
*/
- if (i > 0 &&
- this->matches[i - 1].packing_class
- != this->matches[i].packing_class) {
+ if ((this->disable_varying_packing &&
+ !(previous_var_xfb_only && var->data.is_xfb_only)) ||
+ (i > 0 && this->matches[i - 1].packing_class
+ != this->matches[i].packing_class )) {
*location = ALIGN(*location, 4);
}
+ previous_var_xfb_only = var->data.is_xfb_only;
+
unsigned num_elements = type->count_attribute_slots(is_vertex_input);
- unsigned slot_end = this->disable_varying_packing ? 4 :
- type->without_array()->vector_elements;
+ unsigned slot_end;
+ if (this->disable_varying_packing &&
+ !is_varying_packing_safe(type, var))
+ slot_end = 4;
+ else
+ slot_end = type->without_array()->vector_elements;
slot_end += *location - 1;
/* FIXME: We could be smarter in the below code and loop back over
@@ -1133,7 +1175,8 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
/* Increase the slot to make sure there is enough room for next
* array element.
*/
- if (this->disable_varying_packing)
+ if (this->disable_varying_packing &&
+ !is_varying_packing_safe(type, var))
slot_end += 4;
else
slot_end += type->without_array()->vector_elements;
@@ -1259,6 +1302,32 @@ varying_matches::match_comparator(const void *x_generic, const void *y_generic)
/**
+ * Comparison function passed to qsort() to sort varyings used only by
+ * transform feedback when packing of other varyings is disabled.
+ */
+int
+varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
+{
+ const match *x = (const match *) x_generic;
+
+ if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
+ return match_comparator(x_generic, y_generic);
+
+ /* FIXME: When the comparator returns 0 it means the elements being
+ * compared are equivalent. However the qsort documentation says:
+ *
+ * "The order of equivalent elements is undefined."
+ *
+ * In practice the sort ends up reversing the order of the varyings which
+ * means locations are also assigned in this reversed order and happens to
+ * be what we want. This is also whats happening in
+ * varying_matches::match_comparator().
+ */
+ return 0;
+}
+
+
+/**
* Is the given variable a varying variable to be counted against the
* limit in ctx->Const.MaxVarying?
* This includes variables such as texcoords, colors and generic
@@ -1573,26 +1642,60 @@ assign_varying_locations(struct gl_context *ctx,
unsigned num_tfeedback_decls,
tfeedback_decl *tfeedback_decls)
{
- if (ctx->Const.DisableVaryingPacking) {
- /* Transform feedback code assumes varyings are packed, so if the driver
- * has disabled varying packing, make sure it does not support transform
- * feedback.
- */
- assert(!ctx->Extensions.EXT_transform_feedback);
- }
-
/* Tessellation shaders treat inputs and outputs as shared memory and can
* access inputs and outputs of other invocations.
* Therefore, they can't be lowered to temps easily (and definitely not
* efficiently).
*/
- bool disable_varying_packing =
- ctx->Const.DisableVaryingPacking ||
+ bool unpackable_tess =
(consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
(consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
(producer && producer->Stage == MESA_SHADER_TESS_CTRL);
- varying_matches matches(disable_varying_packing,
+ /* Transform feedback code assumes varying arrays are packed, so if the
+ * driver has disabled varying packing, make sure to at least enable
+ * packing required by transform feedback.
+ */
+ bool xfb_enabled =
+ ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
+
+ /* Disable varying packing for GL 4.4+ as there is no guarantee
+ * that interpolation qualifiers will match between shaders in these
+ * versions. We also disable packing on outerward facing interfaces for
+ * SSO because in ES we need to retain the unpacked varying information
+ * for draw time validation. For desktop GL we could allow packing for
+ * versions < 4.4 but its just safer not to do packing.
+ *
+ * Packing is still enabled on individual arrays, structs, and matrices as
+ * these are required by the transform feedback code and it is still safe
+ * to do so. We also enable packing when a varying is only used for
+ * transform feedback and its not a SSO.
+ *
+ * Varying packing currently only packs together varyings with matching
+ * interpolation qualifiers as the backends assume all packed components
+ * are to be processed in the same way. Therefore we cannot do packing in
+ * these versions of GL without the risk of mismatching interfaces.
+ *
+ * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
+ *
+ * "The type and presence of interpolation qualifiers of variables with
+ * the same name declared in all linked shaders for the same cross-stage
+ * interface must match, otherwise the link command will fail.
+ *
+ * When comparing an output from one stage to an input of a subsequent
+ * stage, the input and output don't match if their interpolation
+ * qualifiers (or lack thereof) are not the same."
+ *
+ * This text was also in at least revison 7 of the 4.40 spec but is no
+ * longer in revision 9 and not in the 4.50 spec.
+ */
+ bool disable_varying_packing =
+ ctx->Const.DisableVaryingPacking || unpackable_tess;
+ if ((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) ||
+ (prog->SeparateShader && (producer == NULL || consumer == NULL)))
+ disable_varying_packing = true;
+
+ varying_matches matches(disable_varying_packing, xfb_enabled,
producer ? producer->Stage : (gl_shader_stage)-1,
consumer ? consumer->Stage : (gl_shader_stage)-1);
hash_table *tfeedback_candidates
@@ -1711,8 +1814,10 @@ assign_varying_locations(struct gl_context *ctx,
return false;
}
- if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout)
+ if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
+ matched_candidate->toplevel_var->data.is_xfb_only = 1;
matches.record(matched_candidate->toplevel_var, NULL);
+ }
}
const uint64_t reserved_slots =
@@ -1784,15 +1889,16 @@ assign_varying_locations(struct gl_context *ctx,
ir_var_shader_in);
}
- if (!disable_varying_packing) {
- if (producer) {
- lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
- 0, producer);
- }
- if (consumer) {
- lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
- consumer_vertices, consumer);
- }
+ if (producer) {
+ lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
+ 0, producer, disable_varying_packing,
+ xfb_enabled);
+ }
+
+ if (consumer) {
+ lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
+ consumer_vertices, consumer,
+ disable_varying_packing, xfb_enabled);
}
return true;
diff --git a/src/compiler/glsl/lower_packed_varyings.cpp b/src/compiler/glsl/lower_packed_varyings.cpp
index 8d1eb17..825cc9e 100644
--- a/src/compiler/glsl/lower_packed_varyings.cpp
+++ b/src/compiler/glsl/lower_packed_varyings.cpp
@@ -168,7 +168,9 @@ public:
ir_variable_mode mode,
unsigned gs_input_vertices,
exec_list *out_instructions,
- exec_list *out_variables);
+ exec_list *out_variables,
+ bool disable_varying_packing,
+ bool xfb_enabled);
void run(struct gl_shader *shader);
@@ -231,6 +233,9 @@ private:
* Exec list into which the visitor should insert any new variables.
*/
exec_list *out_variables;
+
+ bool disable_varying_packing;
+ bool xfb_enabled;
};
} /* anonymous namespace */
@@ -238,7 +243,8 @@ private:
lower_packed_varyings_visitor::lower_packed_varyings_visitor(
void *mem_ctx, unsigned locations_used, ir_variable_mode mode,
unsigned gs_input_vertices, exec_list *out_instructions,
- exec_list *out_variables)
+ exec_list *out_variables, bool disable_varying_packing,
+ bool xfb_enabled)
: mem_ctx(mem_ctx),
locations_used(locations_used),
packed_varyings((ir_variable **)
@@ -247,7 +253,9 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
mode(mode),
gs_input_vertices(gs_input_vertices),
out_instructions(out_instructions),
- out_variables(out_variables)
+ out_variables(out_variables),
+ disable_varying_packing(disable_varying_packing),
+ xfb_enabled(xfb_enabled)
{
}
@@ -656,7 +664,18 @@ lower_packed_varyings_visitor::needs_lowering(ir_variable *var)
if (var->data.explicit_location)
return false;
- const glsl_type *type = var->type->without_array();
+ /* Override disable_varying_packing if the var is only used by transform
+ * feedback. Also override it if transform feedback is enabled and the
+ * variable is an array, struct or matrix as the elements of these types
+ * will always has the same interpolation and therefore asre safe to pack.
+ */
+ const glsl_type *type = var->type;
+ if (disable_varying_packing && !var->data.is_xfb_only &&
+ !((type->is_array() || type->is_record() || type->is_matrix()) &&
+ xfb_enabled))
+ return false;
+
+ type = type->without_array();
if (type->vector_elements == 4 && !type->is_double())
return false;
return true;
@@ -709,7 +728,8 @@ lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev)
void
lower_packed_varyings(void *mem_ctx, unsigned locations_used,
ir_variable_mode mode, unsigned gs_input_vertices,
- gl_shader *shader)
+ gl_shader *shader, bool disable_varying_packing,
+ bool xfb_enabled)
{
exec_list *instructions = shader->ir;
ir_function *main_func = shader->symbols->get_function("main");
@@ -720,7 +740,9 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
gs_input_vertices,
&new_instructions,
- &new_variables);
+ &new_variables,
+ disable_varying_packing,
+ xfb_enabled);
visitor.run(shader);
if (mode == ir_var_shader_out) {
if (shader->Stage == MESA_SHADER_GEOMETRY) {
diff --git a/src/compiler/glsl/opt_algebraic.cpp b/src/compiler/glsl/opt_algebraic.cpp
index 1e58062..f5858c8 100644
--- a/src/compiler/glsl/opt_algebraic.cpp
+++ b/src/compiler/glsl/opt_algebraic.cpp
@@ -58,6 +58,8 @@ public:
{
}
+ virtual ir_visitor_status visit_enter(ir_assignment *ir);
+
ir_rvalue *handle_expression(ir_expression *ir);
void handle_rvalue(ir_rvalue **rvalue);
bool reassociate_constant(ir_expression *ir1,
@@ -80,6 +82,23 @@ public:
} /* unnamed namespace */
+ir_visitor_status
+ir_algebraic_visitor::visit_enter(ir_assignment *ir)
+{
+ ir_variable *var = ir->lhs->variable_referenced();
+ if (var->data.invariant || var->data.precise) {
+ /* If we're assigning to an invariant or precise variable, just bail.
+ * Most of the algebraic optimizations aren't precision-safe.
+ *
+ * FINISHME: Find out which optimizations are precision-safe and enable
+ * then only for invariant or precise trees.
+ */
+ return visit_continue_with_parent;
+ } else {
+ return visit_continue;
+ }
+}
+
static inline bool
is_vec_zero(ir_constant *ir)
{
diff --git a/src/compiler/glsl/opt_rebalance_tree.cpp b/src/compiler/glsl/opt_rebalance_tree.cpp
index 095f2d7..8045d51 100644
--- a/src/compiler/glsl/opt_rebalance_tree.cpp
+++ b/src/compiler/glsl/opt_rebalance_tree.cpp
@@ -131,6 +131,8 @@ public:
progress = false;
}
+ virtual ir_visitor_status visit_enter(ir_assignment *ir);
+
void handle_rvalue(ir_rvalue **rvalue);
bool progress;
@@ -146,6 +148,20 @@ struct is_reduction_data {
} /* anonymous namespace */
+ir_visitor_status
+ir_rebalance_visitor::visit_enter(ir_assignment *ir)
+{
+ ir_variable *var = ir->lhs->variable_referenced();
+ if (var->data.invariant || var->data.precise) {
+ /* If we're assigning to an invariant variable, just bail. Tree
+ * rebalancing (reassociation) isn't precision-safe.
+ */
+ return visit_continue_with_parent;
+ } else {
+ return visit_continue;
+ }
+}
+
static bool
is_reduction_operation(ir_expression_operation operation)
{
diff --git a/src/compiler/glsl/propagate_invariance.cpp b/src/compiler/glsl/propagate_invariance.cpp
new file mode 100644
index 0000000..c137ff3
--- /dev/null
+++ b/src/compiler/glsl/propagate_invariance.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file propagate_invariance.cpp
+ * Propagate the "invariant" and "precise" qualifiers to variables used to
+ * compute invariant or precise values.
+ *
+ * The GLSL spec (depending on what version you read) says, among the
+ * conditions for geting bit-for-bit the same values on an invariant output:
+ *
+ * "All operations in the consuming expressions and any intermediate
+ * expressions must be the same, with the same order of operands and same
+ * associativity, to give the same order of evaluation."
+ *
+ * This effectively means that if a variable is used to compute an invariant
+ * value then that variable becomes invariant. The same should apply to the
+ * "precise" qualifier.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_optimization.h"
+#include "compiler/glsl_types.h"
+
+namespace {
+
+class ir_invariance_propagation_visitor : public ir_hierarchical_visitor {
+public:
+ ir_invariance_propagation_visitor()
+ {
+ this->progress = false;
+ this->dst_var = NULL;
+ }
+
+ virtual ~ir_invariance_propagation_visitor()
+ {
+ /* empty */
+ }
+
+ virtual ir_visitor_status visit_enter(ir_assignment *ir);
+ virtual ir_visitor_status visit_leave(ir_assignment *ir);
+ virtual ir_visitor_status visit(ir_dereference_variable *ir);
+
+ ir_variable *dst_var;
+ bool progress;
+};
+
+} /* unnamed namespace */
+
+ir_visitor_status
+ir_invariance_propagation_visitor::visit_enter(ir_assignment *ir)
+{
+ assert(this->dst_var == NULL);
+ ir_variable *var = ir->lhs->variable_referenced();
+ if (var->data.invariant || var->data.precise) {
+ this->dst_var = var;
+ return visit_continue;
+ } else {
+ return visit_continue_with_parent;
+ }
+}
+
+ir_visitor_status
+ir_invariance_propagation_visitor::visit_leave(ir_assignment *ir)
+{
+ this->dst_var = NULL;
+
+ return visit_continue;
+}
+
+ir_visitor_status
+ir_invariance_propagation_visitor::visit(ir_dereference_variable *ir)
+{
+ if (this->dst_var == NULL)
+ return visit_continue;
+
+ if (this->dst_var->data.invariant) {
+ if (!ir->var->data.invariant)
+ this->progress = true;
+
+ ir->var->data.invariant = true;
+ }
+
+ if (this->dst_var->data.precise) {
+ if (!ir->var->data.precise)
+ this->progress = true;
+
+ ir->var->data.precise = true;
+ }
+
+ return visit_continue;
+}
+
+void
+propagate_invariance(exec_list *instructions)
+{
+ ir_invariance_propagation_visitor visitor;
+
+ do {
+ visitor.progress = false;
+ visit_list_elements(&visitor, instructions);
+ } while (visitor.progress);
+}
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index da5d730..7b8b466 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -731,7 +731,7 @@ nir_visitor::visit(ir_call *ir)
ir_dereference *param =
(ir_dereference *) ir->actual_parameters.get_head();
instr->variables[0] = evaluate_deref(&instr->instr, param);
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
}
@@ -765,7 +765,7 @@ nir_visitor::visit(ir_call *ir)
const nir_intrinsic_info *info =
&nir_intrinsic_infos[instr->intrinsic];
nir_ssa_dest_init(&instr->instr, &instr->dest,
- info->dest_components, NULL);
+ info->dest_components, 32, NULL);
}
if (op == nir_intrinsic_image_size ||
@@ -826,7 +826,7 @@ nir_visitor::visit(ir_call *ir)
nir_builder_instr_insert(&b, &instr->instr);
break;
case nir_intrinsic_shader_clock:
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
case nir_intrinsic_store_ssbo: {
@@ -867,7 +867,7 @@ nir_visitor::visit(ir_call *ir)
/* Setup destination register */
nir_ssa_dest_init(&instr->instr, &instr->dest,
- type->vector_elements, NULL);
+ type->vector_elements, 32, NULL);
/* Insert the created nir instruction now since in the case of boolean
* result we will need to emit another instruction after it
@@ -890,7 +890,7 @@ nir_visitor::visit(ir_call *ir)
load_ssbo_compare->src[1].swizzle[i] = 0;
nir_ssa_dest_init(&load_ssbo_compare->instr,
&load_ssbo_compare->dest.dest,
- type->vector_elements, NULL);
+ type->vector_elements, 32, NULL);
load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
nir_builder_instr_insert(&b, &load_ssbo_compare->instr);
dest = &load_ssbo_compare->dest.dest;
@@ -936,7 +936,7 @@ nir_visitor::visit(ir_call *ir)
/* Atomic result */
assert(ir->return_deref);
nir_ssa_dest_init(&instr->instr, &instr->dest,
- ir->return_deref->type->vector_elements, NULL);
+ ir->return_deref->type->vector_elements, 32, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
}
@@ -951,8 +951,9 @@ nir_visitor::visit(ir_call *ir)
instr->num_components = type->vector_elements;
/* Setup destination register */
+ unsigned bit_size = glsl_get_bit_size(type->base_type);
nir_ssa_dest_init(&instr->instr, &instr->dest,
- type->vector_elements, NULL);
+ type->vector_elements, bit_size, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
@@ -1013,8 +1014,10 @@ nir_visitor::visit(ir_call *ir)
/* Atomic result */
assert(ir->return_deref);
+ unsigned bit_size = glsl_get_bit_size(ir->return_deref->type->base_type);
nir_ssa_dest_init(&instr->instr, &instr->dest,
- ir->return_deref->type->vector_elements, NULL);
+ ir->return_deref->type->vector_elements,
+ bit_size, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
}
@@ -1061,6 +1064,9 @@ nir_visitor::visit(ir_assignment *ir)
{
unsigned num_components = ir->lhs->type->vector_elements;
+ b.exact = ir->lhs->variable_referenced()->data.invariant ||
+ ir->lhs->variable_referenced()->data.precise;
+
if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
(ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
/* We're doing a plain-as-can-be copy, so emit a copy_var */
@@ -1163,7 +1169,7 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
nir_dest *dest = get_instr_dest(instr);
if (dest)
- nir_ssa_dest_init(instr, dest, num_components, NULL);
+ nir_ssa_dest_init(instr, dest, num_components, 32, NULL);
nir_builder_instr_insert(&b, instr);
@@ -1203,6 +1209,7 @@ nir_visitor::visit(ir_expression *ir)
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
load->num_components = ir->type->vector_elements;
+ load->dest.ssa.bit_size = glsl_get_bit_size(ir->type->base_type);
load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
add_instr(&load->instr, ir->type->vector_elements);
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 7e41ed3..b67916d 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -70,6 +70,7 @@ reg_create(void *mem_ctx, struct exec_list *list)
list_inithead(&reg->if_uses);
reg->num_components = 0;
+ reg->bit_size = 32;
reg->num_array_elems = 0;
reg->is_packed = false;
reg->name = NULL;
@@ -473,7 +474,7 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
instr_init(&instr->instr, nir_instr_type_load_const);
- nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
return instr;
}
@@ -562,7 +563,7 @@ nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
instr_init(&instr->instr, nir_instr_type_ssa_undef);
- nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
return instr;
}
@@ -699,10 +700,10 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
- load->value.u[i] = constant->value.u[matrix_offset + i];
+ load->value.u32[i] = constant->value.u[matrix_offset + i];
break;
case GLSL_TYPE_BOOL:
- load->value.u[i] = constant->value.b[matrix_offset + i] ?
+ load->value.u32[i] = constant->value.b[matrix_offset + i] ?
NIR_TRUE : NIR_FALSE;
break;
default:
@@ -731,18 +732,11 @@ reduce_cursor(nir_cursor cursor)
{
switch (cursor.option) {
case nir_cursor_before_block:
+ assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
+ nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
if (exec_list_is_empty(&cursor.block->instr_list)) {
/* Empty block. After is as good as before. */
cursor.option = nir_cursor_after_block;
- } else {
- /* Try to switch to after the previous block if there is one.
- * (This isn't likely, but it can happen.)
- */
- nir_cf_node *prev_node = nir_cf_node_prev(&cursor.block->cf_node);
- if (prev_node && prev_node->type == nir_cf_node_block) {
- cursor.block = nir_cf_node_as_block(prev_node);
- cursor.option = nir_cursor_after_block;
- }
}
return cursor;
@@ -1379,15 +1373,18 @@ nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
src_add_all_uses(dest->reg.indirect, instr, NULL);
}
+/* note: does *not* take ownership of 'name' */
void
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
- unsigned num_components, const char *name)
+ unsigned num_components,
+ unsigned bit_size, const char *name)
{
- def->name = name;
+ def->name = ralloc_strdup(instr, name);
def->parent_instr = instr;
list_inithead(&def->uses);
list_inithead(&def->if_uses);
def->num_components = num_components;
+ def->bit_size = bit_size;
if (instr->block) {
nir_function_impl *impl =
@@ -1399,12 +1396,14 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
}
}
+/* note: does *not* take ownership of 'name' */
void
nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
- unsigned num_components, const char *name)
+ unsigned num_components, unsigned bit_size,
+ const char *name)
{
dest->is_ssa = true;
- nir_ssa_def_init(instr, &dest->ssa, num_components, name);
+ nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
}
void
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ab1afdb..2fd75ec 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -101,6 +101,7 @@ union nir_constant_data {
int i[16];
float f[16];
bool b[16];
+ double d[16];
};
typedef struct nir_constant {
@@ -381,6 +382,9 @@ typedef struct nir_register {
unsigned num_components; /** < number of vector components */
unsigned num_array_elems; /** < size of array (0 for no array) */
+ /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
+ uint8_t bit_size;
+
/** generic register index. */
unsigned index;
@@ -488,6 +492,9 @@ typedef struct nir_ssa_def {
struct list_head if_uses;
uint8_t num_components;
+
+ /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
+ uint8_t bit_size;
} nir_ssa_def;
struct nir_src;
@@ -594,6 +601,18 @@ nir_dest_for_reg(nir_register *reg)
return dest;
}
+static inline unsigned
+nir_src_bit_size(nir_src src)
+{
+ return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size;
+}
+
+static inline unsigned
+nir_dest_bit_size(nir_dest dest)
+{
+ return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size;
+}
+
void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
@@ -649,9 +668,36 @@ typedef enum {
nir_type_float,
nir_type_int,
nir_type_uint,
- nir_type_bool
+ nir_type_bool,
+ nir_type_bool32 = 32 | nir_type_bool,
+ nir_type_int8 = 8 | nir_type_int,
+ nir_type_int16 = 16 | nir_type_int,
+ nir_type_int32 = 32 | nir_type_int,
+ nir_type_int64 = 64 | nir_type_int,
+ nir_type_uint8 = 8 | nir_type_uint,
+ nir_type_uint16 = 16 | nir_type_uint,
+ nir_type_uint32 = 32 | nir_type_uint,
+ nir_type_uint64 = 64 | nir_type_uint,
+ nir_type_float16 = 16 | nir_type_float,
+ nir_type_float32 = 32 | nir_type_float,
+ nir_type_float64 = 64 | nir_type_float,
} nir_alu_type;
+#define NIR_ALU_TYPE_SIZE_MASK 0xfffffff8
+#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x00000007
+
+static inline unsigned
+nir_alu_type_get_type_size(nir_alu_type type)
+{
+ return type & NIR_ALU_TYPE_SIZE_MASK;
+}
+
+static inline unsigned
+nir_alu_type_get_base_type(nir_alu_type type)
+{
+ return type & NIR_ALU_TYPE_BASE_TYPE_MASK;
+}
+
typedef enum {
NIR_OP_IS_COMMUTATIVE = (1 << 0),
NIR_OP_IS_ASSOCIATIVE = (1 << 1),
@@ -708,6 +754,17 @@ extern const nir_op_info nir_op_infos[nir_num_opcodes];
typedef struct nir_alu_instr {
nir_instr instr;
nir_op op;
+
+ /** Indicates that this ALU instruction generates an exact value
+ *
+ * This is kind of a mixture of GLSL "precise" and "invariant" and not
+ * really equivalent to either. This indicates that the value generated by
+ * this operation is high-precision and any code transformations that touch
+ * it must ensure that the resulting value is bit-for-bit identical to the
+ * original.
+ */
+ bool exact;
+
nir_alu_dest dest;
nir_alu_src src[];
} nir_alu_instr;
@@ -1218,9 +1275,12 @@ nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
typedef struct {
union {
- float f[4];
- int32_t i[4];
- uint32_t u[4];
+ float f32[4];
+ double f64[4];
+ int32_t i32[4];
+ uint32_t u32[4];
+ int64_t i64[4];
+ uint64_t u64[4];
};
} nir_const_value;
@@ -2061,9 +2121,11 @@ void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
nir_dest new_dest);
void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
- unsigned num_components, const char *name);
+ unsigned num_components, unsigned bit_size,
+ const char *name);
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
- unsigned num_components, const char *name);
+ unsigned num_components, unsigned bit_size,
+ const char *name);
void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
nir_instr *after_me);
@@ -2094,9 +2156,10 @@ void nir_index_blocks(nir_function_impl *impl);
void nir_print_shader(nir_shader *shader, FILE *fp);
void nir_print_instr(const nir_instr *instr, FILE *fp);
-nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
+nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
nir_function_impl *nir_function_impl_clone(const nir_function_impl *fi);
nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
+nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
#ifdef DEBUG
void nir_validate_shader(nir_shader *shader);
diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py
index 2357b57..d05564f 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -63,12 +63,13 @@ class Value(object):
static const ${val.c_type} ${val.name} = {
{ ${val.type_enum} },
% if isinstance(val, Constant):
- { ${hex(val)} /* ${val.value} */ },
+ ${val.type()}, { ${hex(val)} /* ${val.value} */ },
% elif isinstance(val, Variable):
${val.index}, /* ${val.var_name} */
${'true' if val.is_constant else 'false'},
- nir_type_${ val.required_type or 'invalid' },
+ ${val.type() or 'nir_type_invalid' },
% elif isinstance(val, Expression):
+ ${'true' if val.inexact else 'false'},
nir_op_${val.opcode},
{ ${', '.join(src.c_ptr for src in val.sources)} },
% endif
@@ -107,10 +108,18 @@ class Constant(Value):
if isinstance(self.value, (int, long)):
return hex(self.value)
elif isinstance(self.value, float):
- return hex(struct.unpack('I', struct.pack('f', self.value))[0])
+ return hex(struct.unpack('Q', struct.pack('d', self.value))[0])
else:
assert False
+ def type(self):
+ if isinstance(self.value, (bool)):
+ return "nir_type_bool32"
+ elif isinstance(self.value, (int, long)):
+ return "nir_type_int"
+ elif isinstance(self.value, float):
+ return "nir_type_float"
+
_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?")
class Variable(Value):
@@ -129,12 +138,26 @@ class Variable(Value):
self.index = varset[self.var_name]
+ def type(self):
+ if self.required_type == 'bool':
+ return "nir_type_bool32"
+ elif self.required_type in ('int', 'unsigned'):
+ return "nir_type_int"
+ elif self.required_type == 'float':
+ return "nir_type_float"
+
+_opcode_re = re.compile(r"(?P<inexact>~)?(?P<opcode>\w+)")
+
class Expression(Value):
def __init__(self, expr, name_base, varset):
Value.__init__(self, name_base, "expression")
assert isinstance(expr, tuple)
- self.opcode = expr[0]
+ m = _opcode_re.match(expr[0])
+ assert m and m.group('opcode') is not None
+
+ self.opcode = m.group('opcode')
+ self.inexact = m.group('inexact') is not None
self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
for (i, src) in enumerate(expr[1:]) ]
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index b4dde54..94f183c 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -31,6 +31,9 @@ struct exec_list;
typedef struct nir_builder {
nir_cursor cursor;
+ /* Whether new ALU instructions will be marked "exact" */
+ bool exact;
+
nir_shader *shader;
nir_function_impl *impl;
} nir_builder;
@@ -39,6 +42,7 @@ static inline void
nir_builder_init(nir_builder *build, nir_function_impl *impl)
{
memset(build, 0, sizeof(*build));
+ build->exact = false;
build->impl = impl;
build->shader = impl->function->shader;
}
@@ -50,6 +54,7 @@ nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx,
{
build->shader = nir_shader_create(mem_ctx, stage, options);
nir_function *func = nir_function_create(build->shader, "main");
+ build->exact = false;
build->impl = nir_function_impl_create(func);
build->cursor = nir_after_cf_list(&build->impl->body);
}
@@ -104,7 +109,7 @@ nir_imm_float(nir_builder *build, float x)
nir_const_value v;
memset(&v, 0, sizeof(v));
- v.f[0] = x;
+ v.f32[0] = x;
return nir_build_imm(build, 1, v);
}
@@ -115,10 +120,10 @@ nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
nir_const_value v;
memset(&v, 0, sizeof(v));
- v.f[0] = x;
- v.f[1] = y;
- v.f[2] = z;
- v.f[3] = w;
+ v.f32[0] = x;
+ v.f32[1] = y;
+ v.f32[2] = z;
+ v.f32[3] = w;
return nir_build_imm(build, 4, v);
}
@@ -129,7 +134,7 @@ nir_imm_int(nir_builder *build, int x)
nir_const_value v;
memset(&v, 0, sizeof(v));
- v.i[0] = x;
+ v.i32[0] = x;
return nir_build_imm(build, 1, v);
}
@@ -140,10 +145,10 @@ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
nir_const_value v;
memset(&v, 0, sizeof(v));
- v.i[0] = x;
- v.i[1] = y;
- v.i[2] = z;
- v.i[3] = w;
+ v.i32[0] = x;
+ v.i32[1] = y;
+ v.i32[2] = z;
+ v.i32[3] = w;
return nir_build_imm(build, 4, v);
}
@@ -157,6 +162,8 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
if (!instr)
return NULL;
+ instr->exact = build->exact;
+
instr->src[0].src = nir_src_for_ssa(src0);
if (src1)
instr->src[1].src = nir_src_for_ssa(src1);
@@ -178,6 +185,25 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
}
assert(num_components != 0);
+ /* Figure out the bitwidth based on the source bitwidth if the instruction
+ * is variable-width.
+ */
+ unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
+ if (bit_size == 0) {
+ for (unsigned i = 0; i < op_info->num_inputs; i++) {
+ unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
+ if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
+ if (bit_size)
+ assert(src_bit_size == bit_size);
+ else
+ bit_size = src_bit_size;
+ } else {
+ assert(src_bit_size ==
+ nir_alu_type_get_type_size(op_info->input_types[i]));
+ }
+ }
+ }
+
/* Make sure we don't swizzle from outside of our source vector (like if a
* scalar value was passed into a multiply with a vector).
*/
@@ -187,7 +213,8 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
}
}
- nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
+ bit_size, NULL);
instr->dest.write_mask = (1 << num_components) - 1;
nir_builder_instr_insert(build, &instr->instr);
@@ -252,7 +279,9 @@ static inline nir_ssa_def *
nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
{
nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,
+ nir_src_bit_size(src.src), NULL);
+ mov->exact = build->exact;
mov->dest.write_mask = (1 << num_components) - 1;
mov->src[0] = src;
nir_builder_instr_insert(build, &mov->instr);
@@ -264,7 +293,9 @@ static inline nir_ssa_def *
nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
{
nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,
+ nir_src_bit_size(src.src), NULL);
+ mov->exact = build->exact;
mov->dest.write_mask = (1 << num_components) - 1;
mov->src[0] = src;
nir_builder_instr_insert(build, &mov->instr);
@@ -360,7 +391,8 @@ nir_load_var(nir_builder *build, nir_variable *var)
nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
load->num_components = num_components;
load->variables[0] = nir_deref_var_create(load, var);
- nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components,
+ glsl_get_bit_size(glsl_get_base_type(var->type)), NULL);
nir_builder_instr_insert(build, &load->instr);
return &load->dest.ssa;
}
@@ -426,7 +458,7 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
load->num_components = nir_intrinsic_infos[op].dest_components;
load->const_index[0] = index;
nir_ssa_dest_init(&load->instr, &load->dest,
- nir_intrinsic_infos[op].dest_components, NULL);
+ nir_intrinsic_infos[op].dest_components, 32, NULL);
nir_builder_instr_insert(build, &load->instr);
return &load->dest.ssa;
}
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 3268deb..7d2e383 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -127,11 +127,10 @@ nir_constant_clone(const nir_constant *c, nir_variable *nvar)
/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
* having to deal with locals and globals separately:
*/
-static nir_variable *
-clone_variable(clone_state *state, const nir_variable *var)
+nir_variable *
+nir_variable_clone(const nir_variable *var, nir_shader *shader)
{
- nir_variable *nvar = rzalloc(state->ns, nir_variable);
- add_remap(state, nvar, var);
+ nir_variable *nvar = rzalloc(shader, nir_variable);
nvar->type = var->type;
nvar->name = ralloc_strdup(nvar, var->name);
@@ -149,6 +148,15 @@ clone_variable(clone_state *state, const nir_variable *var)
return nvar;
}
+static nir_variable *
+clone_variable(clone_state *state, const nir_variable *var)
+{
+ nir_variable *nvar = nir_variable_clone(var, state->ns);
+ add_remap(state, nvar, var);
+
+ return nvar;
+}
+
/* clone list of nir_variable: */
static void
clone_var_list(clone_state *state, struct exec_list *dst,
@@ -220,7 +228,8 @@ __clone_dst(clone_state *state, nir_instr *ninstr,
{
ndst->is_ssa = dst->is_ssa;
if (dst->is_ssa) {
- nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
+ nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components,
+ dst->ssa.bit_size, dst->ssa.name);
add_remap(state, &ndst->ssa, &dst->ssa);
} else {
ndst->reg.reg = remap_reg(state, dst->reg.reg);
@@ -303,6 +312,7 @@ static nir_alu_instr *
clone_alu(clone_state *state, const nir_alu_instr *alu)
{
nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
+ nalu->exact = alu->exact;
__clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
nalu->dest.saturate = alu->dest.saturate;
diff --git a/src/compiler/nir/nir_constant_expressions.h b/src/compiler/nir/nir_constant_expressions.h
index 97997f2..201f278 100644
--- a/src/compiler/nir/nir_constant_expressions.h
+++ b/src/compiler/nir/nir_constant_expressions.h
@@ -28,4 +28,4 @@
#include "nir.h"
nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
- nir_const_value *src);
+ unsigned bit_size, nir_const_value *src);
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
index 32784f6..e36dc48 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -1,4 +1,43 @@
#! /usr/bin/python2
+
+def type_has_size(type_):
+ return type_[-1:].isdigit()
+
+def type_sizes(type_):
+ if type_.endswith("8"):
+ return [8]
+ elif type_.endswith("16"):
+ return [16]
+ elif type_.endswith("32"):
+ return [32]
+ elif type_.endswith("64"):
+ return [64]
+ else:
+ return [32, 64]
+
+def type_add_size(type_, size):
+ if type_has_size(type_):
+ return type_
+ return type_ + str(size)
+
+def get_const_field(type_):
+ if type_ == "int32":
+ return "i32"
+ if type_ == "uint32":
+ return "u32"
+ if type_ == "int64":
+ return "i64"
+ if type_ == "uint64":
+ return "u64"
+ if type_ == "bool32":
+ return "u32"
+ if type_ == "float32":
+ return "f32"
+ if type_ == "float64":
+ return "f64"
+ raise Exception(str(type_))
+ assert(0)
+
template = """\
/*
* Copyright (C) 2014 Intel Corporation
@@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u)
}
/* Some typed vector structures to make things like src0.y work */
-% for type in ["float", "int", "uint", "bool"]:
-struct ${type}_vec {
- ${type} x;
- ${type} y;
- ${type} z;
- ${type} w;
+typedef float float32_t;
+typedef double float64_t;
+typedef bool bool32_t;
+% for type in ["float", "int", "uint"]:
+% for width in [32, 64]:
+struct ${type}${width}_vec {
+ ${type}${width}_t x;
+ ${type}${width}_t y;
+ ${type}${width}_t z;
+ ${type}${width}_t w;
};
% endfor
+% endfor
+
+struct bool32_vec {
+ bool x;
+ bool y;
+ bool z;
+ bool w;
+};
% for name, op in sorted(opcodes.iteritems()):
static nir_const_value
-evaluate_${name}(unsigned num_components, nir_const_value *_src)
+evaluate_${name}(unsigned num_components, unsigned bit_size,
+ nir_const_value *_src)
{
nir_const_value _dst_val = { { {0, 0, 0, 0} } };
- ## For each non-per-component input, create a variable srcN that
- ## contains x, y, z, and w elements which are filled in with the
- ## appropriately-typed values.
- % for j in range(op.num_inputs):
- % if op.input_sizes[j] == 0:
- <% continue %>
- % elif "src" + str(j) not in op.const_expr:
- ## Avoid unused variable warnings
- <% continue %>
- %endif
-
- struct ${op.input_types[j]}_vec src${j} = {
- % for k in range(op.input_sizes[j]):
- % if op.input_types[j] == "bool":
- _src[${j}].u[${k}] != 0,
- % else:
- _src[${j}].${op.input_types[j][:1]}[${k}],
- % endif
- % endfor
- };
- % endfor
+ switch (bit_size) {
+ % for bit_size in [32, 64]:
+ case ${bit_size}: {
+ <%
+ output_type = type_add_size(op.output_type, bit_size)
+ input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
+ %>
+
+ ## For each non-per-component input, create a variable srcN that
+ ## contains x, y, z, and w elements which are filled in with the
+ ## appropriately-typed values.
+ % for j in range(op.num_inputs):
+ % if op.input_sizes[j] == 0:
+ <% continue %>
+ % elif "src" + str(j) not in op.const_expr:
+ ## Avoid unused variable warnings
+ <% continue %>
+ %endif
- % if op.output_size == 0:
- ## For per-component instructions, we need to iterate over the
- ## components and apply the constant expression one component
- ## at a time.
- for (unsigned _i = 0; _i < num_components; _i++) {
- ## For each per-component input, create a variable srcN that
- ## contains the value of the current (_i'th) component.
- % for j in range(op.num_inputs):
- % if op.input_sizes[j] != 0:
- <% continue %>
- % elif "src" + str(j) not in op.const_expr:
- ## Avoid unused variable warnings
- <% continue %>
- % elif op.input_types[j] == "bool":
- bool src${j} = _src[${j}].u[_i] != 0;
+ struct ${input_types[j]}_vec src${j} = {
+ % for k in range(op.input_sizes[j]):
+ % if input_types[j] == "bool32":
+ _src[${j}].u32[${k}] != 0,
% else:
- ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i];
+ _src[${j}].${get_const_field(input_types[j])}[${k}],
% endif
% endfor
+ };
+ % endfor
+
+ % if op.output_size == 0:
+ ## For per-component instructions, we need to iterate over the
+ ## components and apply the constant expression one component
+ ## at a time.
+ for (unsigned _i = 0; _i < num_components; _i++) {
+ ## For each per-component input, create a variable srcN that
+ ## contains the value of the current (_i'th) component.
+ % for j in range(op.num_inputs):
+ % if op.input_sizes[j] != 0:
+ <% continue %>
+ % elif "src" + str(j) not in op.const_expr:
+ ## Avoid unused variable warnings
+ <% continue %>
+ % elif input_types[j] == "bool32":
+ bool src${j} = _src[${j}].u32[_i] != 0;
+ % else:
+ ${input_types[j]}_t src${j} =
+ _src[${j}].${get_const_field(input_types[j])}[_i];
+ % endif
+ % endfor
+
+ ## Create an appropriately-typed variable dst and assign the
+ ## result of the const_expr to it. If const_expr already contains
+ ## writes to dst, just include const_expr directly.
+ % if "dst" in op.const_expr:
+ ${output_type}_t dst;
+ ${op.const_expr}
+ % else:
+ ${output_type}_t dst = ${op.const_expr};
+ % endif
+
+ ## Store the current component of the actual destination to the
+ ## value of dst.
+ % if output_type == "bool32":
+ ## Sanitize the C value to a proper NIR bool
+ _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
+ % else:
+ _dst_val.${get_const_field(output_type)}[_i] = dst;
+ % endif
+ }
+ % else:
+ ## In the non-per-component case, create a struct dst with
+ ## appropriately-typed elements x, y, z, and w and assign the result
+ ## of the const_expr to all components of dst, or include the
+ ## const_expr directly if it writes to dst already.
+ struct ${output_type}_vec dst;
- ## Create an appropriately-typed variable dst and assign the
- ## result of the const_expr to it. If const_expr already contains
- ## writes to dst, just include const_expr directly.
% if "dst" in op.const_expr:
- ${op.output_type} dst;
${op.const_expr}
% else:
- ${op.output_type} dst = ${op.const_expr};
+ ## Splat the value to all components. This way expressions which
+ ## write the same value to all components don't need to explicitly
+ ## write to dest. One such example is fnoise which has a
+ ## const_expr of 0.0f.
+ dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
% endif
- ## Store the current component of the actual destination to the
- ## value of dst.
- % if op.output_type == "bool":
- ## Sanitize the C value to a proper NIR bool
- _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
- % else:
- _dst_val.${op.output_type[:1]}[_i] = dst;
- % endif
- }
- % else:
- ## In the non-per-component case, create a struct dst with
- ## appropriately-typed elements x, y, z, and w and assign the result
- ## of the const_expr to all components of dst, or include the
- ## const_expr directly if it writes to dst already.
- struct ${op.output_type}_vec dst;
-
- % if "dst" in op.const_expr:
- ${op.const_expr}
- % else:
- ## Splat the value to all components. This way expressions which
- ## write the same value to all components don't need to explicitly
- ## write to dest. One such example is fnoise which has a
- ## const_expr of 0.0f.
- dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
+ ## For each component in the destination, copy the value of dst to
+ ## the actual destination.
+ % for k in range(op.output_size):
+ % if output_type == "bool32":
+ ## Sanitize the C value to a proper NIR bool
+ _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
+ % else:
+ _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
+ % endif
+ % endfor
% endif
- ## For each component in the destination, copy the value of dst to
- ## the actual destination.
- % for k in range(op.output_size):
- % if op.output_type == "bool":
- ## Sanitize the C value to a proper NIR bool
- _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
- % else:
- _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
- % endif
- % endfor
- % endif
+ break;
+ }
+ % endfor
+
+ default:
+ unreachable("unknown bit width");
+ }
return _dst_val;
}
@@ -316,12 +385,12 @@ evaluate_${name}(unsigned num_components, nir_const_value *_src)
nir_const_value
nir_eval_const_opcode(nir_op op, unsigned num_components,
- nir_const_value *src)
+ unsigned bit_width, nir_const_value *src)
{
switch (op) {
% for name in sorted(opcodes.iterkeys()):
case nir_op_${name}: {
- return evaluate_${name}(num_components, src);
+ return evaluate_${name}(num_components, bit_width, src);
break;
}
% endfor
@@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned num_components,
from nir_opcodes import opcodes
from mako.template import Template
-print Template(template).render(opcodes=opcodes)
+print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
+ type_has_size=type_has_size,
+ type_add_size=type_add_size,
+ get_const_field=get_const_field)
diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c
index 8bc9f24..82317c2 100644
--- a/src/compiler/nir/nir_from_ssa.c
+++ b/src/compiler/nir/nir_from_ssa.c
@@ -342,7 +342,8 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
nir_parallel_copy_entry);
nir_ssa_dest_init(&pcopy->instr, &entry->dest,
- phi->dest.ssa.num_components, src->src.ssa->name);
+ phi->dest.ssa.num_components,
+ phi->dest.ssa.bit_size, src->src.ssa->name);
exec_list_push_tail(&pcopy->entries, &entry->node);
assert(src->src.is_ssa);
@@ -355,7 +356,8 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
nir_parallel_copy_entry);
nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
- phi->dest.ssa.num_components, phi->dest.ssa.name);
+ phi->dest.ssa.num_components, phi->dest.ssa.bit_size,
+ phi->dest.ssa.name);
exec_list_push_tail(&block_pcopy->entries, &entry->node);
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
diff --git a/src/compiler/nir/nir_gs_count_vertices.c b/src/compiler/nir/nir_gs_count_vertices.c
index db15d16..3c1bd2a 100644
--- a/src/compiler/nir/nir_gs_count_vertices.c
+++ b/src/compiler/nir/nir_gs_count_vertices.c
@@ -77,13 +77,13 @@ nir_gs_count_vertices(const nir_shader *shader)
return -1;
if (count == -1)
- count = val->i[0];
+ count = val->i32[0];
/* We've found contradictory set_vertex_count intrinsics.
* This can happen if there are early-returns in main() and
* different paths emit different numbers of vertices.
*/
- if (count != val->i[0])
+ if (count != val->i32[0])
return -1;
}
}
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index 159ded0..e244122 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -52,6 +52,7 @@ hash_alu(uint32_t hash, const nir_alu_instr *instr)
{
hash = HASH(hash, instr->op);
hash = HASH(hash, instr->dest.dest.ssa.num_components);
+ /* We explicitly don't hash instr->dest.dest.exact */
if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
assert(nir_op_infos[instr->op].num_inputs == 2);
@@ -81,9 +82,9 @@ hash_load_const(uint32_t hash, const nir_load_const_instr *instr)
{
hash = HASH(hash, instr->def.num_components);
- hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f,
+ hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32,
instr->def.num_components
- * sizeof(instr->value.f[0]));
+ * sizeof(instr->value.f32[0]));
return hash;
}
@@ -267,6 +268,8 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
return false;
+ /* We explicitly don't hash instr->dest.dest.exact */
+
if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
assert(nir_op_infos[alu1->op].num_inputs == 2);
return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
@@ -322,8 +325,8 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
if (load1->def.num_components != load2->def.num_components)
return false;
- return memcmp(load1->value.f, load2->value.f,
- load1->def.num_components * sizeof(*load2->value.f)) == 0;
+ return memcmp(load1->value.f32, load2->value.f32,
+ load1->def.num_components * sizeof(*load2->value.f32)) == 0;
}
case nir_instr_type_phi: {
nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
@@ -496,8 +499,17 @@ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr)
struct set_entry *entry = _mesa_set_search(instr_set, instr);
if (entry) {
nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr);
- nir_ssa_def *new_def =
- nir_instr_get_dest_ssa_def((nir_instr *) entry->key);
+ nir_instr *match = (nir_instr *) entry->key;
+ nir_ssa_def *new_def = nir_instr_get_dest_ssa_def(match);
+
+ /* It's safe to replace a exact instruction with an inexact one as
+ * long as we make it exact. If we got here, the two instructions are
+ * exactly identical in every other way so, once we've set the exact
+ * bit, they are the same.
+ */
+ if (instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->exact)
+ nir_instr_as_alu(match)->exact = true;
+
nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
return true;
}
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 312d2f9..e8ba640 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -31,9 +31,11 @@
*/
static void
-nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
+nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components,
+ unsigned bit_size)
{
- nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
+ bit_size, NULL);
instr->dest.write_mask = (1 << num_components) - 1;
}
@@ -46,7 +48,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
nir_ssa_def *last = NULL;
for (unsigned i = 0; i < num_components; i++) {
nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
- nir_alu_ssa_dest_init(chan, 1);
+ nir_alu_ssa_dest_init(chan, 1, instr->dest.dest.ssa.bit_size);
nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
if (nir_op_infos[chan_op].num_inputs > 1) {
@@ -80,6 +82,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
assert(instr->dest.write_mask != 0);
b->cursor = nir_before_instr(&instr->instr);
+ b->exact = instr->exact;
#define LOWER_REDUCTION(name, chan, merge) \
case name##2: \
@@ -220,7 +223,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
}
- nir_alu_ssa_dest_init(lower, 1);
+ nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size);
lower->dest.saturate = instr->dest.saturate;
comps[chan] = &lower->dest.dest.ssa;
diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c
index eefcb55..70381a7 100644
--- a/src/compiler/nir/nir_lower_atomics.c
+++ b/src/compiler/nir/nir_lower_atomics.c
@@ -75,7 +75,7 @@ lower_instr(nir_intrinsic_instr *instr,
state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index);
nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
- offset_const->value.u[0] = instr->variables[0]->var->data.offset;
+ offset_const->value.u32[0] = instr->variables[0]->var->data.offset;
nir_instr_insert_before(&instr->instr, &offset_const->instr);
@@ -90,17 +90,17 @@ lower_instr(nir_intrinsic_instr *instr,
unsigned child_array_elements = tail->child != NULL ?
glsl_get_aoa_size(tail->type) : 1;
- offset_const->value.u[0] += deref_array->base_offset *
+ offset_const->value.u32[0] += deref_array->base_offset *
child_array_elements * ATOMIC_COUNTER_SIZE;
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
nir_load_const_instr *atomic_counter_size =
nir_load_const_instr_create(mem_ctx, 1);
- atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
+ atomic_counter_size->value.u32[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
- nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
mul->dest.write_mask = 0x1;
nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
mul->src[1].src.is_ssa = true;
@@ -108,7 +108,7 @@ lower_instr(nir_intrinsic_instr *instr,
nir_instr_insert_before(&instr->instr, &mul->instr);
nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
- nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, 32, NULL);
add->dest.write_mask = 0x1;
add->src[0].src.is_ssa = true;
add->src[0].src.ssa = &mul->dest.dest.ssa;
@@ -125,7 +125,7 @@ lower_instr(nir_intrinsic_instr *instr,
if (instr->dest.is_ssa) {
nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
- instr->dest.ssa.num_components, NULL);
+ instr->dest.ssa.num_components, 32, NULL);
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
nir_src_for_ssa(&new_instr->dest.ssa));
} else {
diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c
index bcbad53..c711230 100644
--- a/src/compiler/nir/nir_lower_clip.c
+++ b/src/compiler/nir/nir_lower_clip.c
@@ -88,7 +88,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
load->num_components = 4;
nir_intrinsic_set_base(load, in->data.driver_location);
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
val[0] = nir_channel(b, &load->dest.ssa, 0);
diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c
index a4affa7..62b8c84 100644
--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@ -75,8 +75,9 @@ emit_indirect_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
if (src == NULL) {
/* We're a load. We need to insert a phi node */
nir_phi_instr *phi = nir_phi_instr_create(b->shader);
+ unsigned bit_size = then_dest->bit_size;
nir_ssa_dest_init(&phi->instr, &phi->dest,
- then_dest->num_components, NULL);
+ then_dest->num_components, bit_size, NULL);
nir_phi_src *src0 = ralloc(phi, nir_phi_src);
src0->pred = nir_cf_node_as_block(nir_if_last_then_node(if_stmt));
@@ -125,8 +126,9 @@ emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
load->num_components = orig_instr->num_components;
load->variables[0] =
nir_deref_as_var(nir_copy_deref(load, &deref->deref));
+ unsigned bit_size = orig_instr->dest.ssa.bit_size;
nir_ssa_dest_init(&load->instr, &load->dest,
- load->num_components, NULL);
+ load->num_components, bit_size, NULL);
nir_builder_instr_insert(b, &load->instr);
*dest = &load->dest.ssa;
} else {
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index 9d502ee..a30061d 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -289,7 +289,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
if (intrin->dest.is_ssa) {
nir_ssa_dest_init(&load->instr, &load->dest,
- intrin->num_components, NULL);
+ intrin->num_components,
+ intrin->dest.ssa.bit_size, NULL);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&load->dest.ssa));
} else {
@@ -369,7 +370,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
if (intrin->dest.is_ssa) {
nir_ssa_dest_init(&atomic->instr, &atomic->dest,
- intrin->dest.ssa.num_components, NULL);
+ intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size, NULL);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&atomic->dest.ssa));
} else {
diff --git a/src/compiler/nir/nir_lower_load_const_to_scalar.c b/src/compiler/nir/nir_lower_load_const_to_scalar.c
index 1eeed13..b5df464 100644
--- a/src/compiler/nir/nir_lower_load_const_to_scalar.c
+++ b/src/compiler/nir/nir_lower_load_const_to_scalar.c
@@ -49,7 +49,7 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
nir_ssa_def *loads[4];
for (unsigned i = 0; i < lower->def.num_components; i++) {
nir_load_const_instr *load_comp = nir_load_const_instr_create(b.shader, 1);
- load_comp->value.u[0] = lower->value.u[i];
+ load_comp->value.u32[0] = lower->value.u32[i];
nir_builder_instr_insert(&b, &load_comp->instr);
loads[i] = &load_comp->def;
}
diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c
index 45036fa..0438802 100644
--- a/src/compiler/nir/nir_lower_locals_to_regs.c
+++ b/src/compiler/nir/nir_lower_locals_to_regs.c
@@ -161,7 +161,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
if (src.reg.indirect) {
nir_load_const_instr *load_const =
nir_load_const_instr_create(state->shader, 1);
- load_const->value.u[0] = glsl_get_length(parent_type);
+ load_const->value.u32[0] = glsl_get_length(parent_type);
nir_instr_insert_before(instr, &load_const->instr);
nir_alu_instr *mul = nir_alu_instr_create(state->shader, nir_op_imul);
@@ -169,7 +169,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
mul->src[1].src.is_ssa = true;
mul->src[1].src.ssa = &load_const->def;
mul->dest.write_mask = 1;
- nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
nir_instr_insert_before(instr, &mul->instr);
src.reg.indirect->is_ssa = true;
@@ -187,7 +187,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
add->src[0].src = *src.reg.indirect;
nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
add->dest.write_mask = 1;
- nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, 32, NULL);
nir_instr_insert_before(instr, &add->instr);
src.reg.indirect->is_ssa = true;
@@ -221,7 +221,8 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
mov->dest.write_mask = (1 << intrin->num_components) - 1;
if (intrin->dest.is_ssa) {
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- intrin->num_components, NULL);
+ intrin->num_components,
+ intrin->dest.ssa.bit_size, NULL);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&mov->dest.dest.ssa));
} else {
diff --git a/src/compiler/nir/nir_lower_phis_to_scalar.c b/src/compiler/nir/nir_lower_phis_to_scalar.c
index dd2abcf..026c866 100644
--- a/src/compiler/nir/nir_lower_phis_to_scalar.c
+++ b/src/compiler/nir/nir_lower_phis_to_scalar.c
@@ -188,6 +188,8 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
if (!should_lower_phi(phi, state))
continue;
+ unsigned bit_size = phi->dest.ssa.bit_size;
+
/* Create a vecN operation to combine the results. Most of these
* will be redundant, but copy propagation should clean them up for
* us. No need to add the complexity here.
@@ -202,12 +204,14 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
- phi->dest.ssa.num_components, NULL);
+ phi->dest.ssa.num_components,
+ bit_size, NULL);
vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) {
nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx);
- nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL);
+ nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1,
+ phi->dest.ssa.bit_size, NULL);
vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa);
@@ -215,7 +219,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
/* We need to insert a mov to grab the i'th component of src */
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
nir_op_imov);
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, bit_size, NULL);
mov->dest.write_mask = 1;
nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);
mov->src[0].swizzle[0] = i;
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c
index 79f6bed..c1cd139 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -65,9 +65,9 @@ convert_block(nir_block *block, void *void_state)
*/
nir_const_value local_size;
- local_size.u[0] = b->shader->info.cs.local_size[0];
- local_size.u[1] = b->shader->info.cs.local_size[1];
- local_size.u[2] = b->shader->info.cs.local_size[2];
+ local_size.u32[0] = b->shader->info.cs.local_size[0];
+ local_size.u32[1] = b->shader->info.cs.local_size[1];
+ local_size.u32[2] = b->shader->info.cs.local_size[2];
nir_ssa_def *group_id =
nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0);
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index 806acd8..4999603 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -140,7 +140,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
txs->src[0].src_type = nir_tex_src_lod;
- nir_ssa_dest_init(&txs->instr, &txs->dest, 2, NULL);
+ nir_ssa_dest_init(&txs->instr, &txs->dest, 2, 32, NULL);
nir_builder_instr_insert(b, &txs->instr);
return nir_i2f(b, &txs->dest.ssa);
@@ -223,13 +223,13 @@ get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
memset(&v, 0, sizeof(v));
if (swizzle_val == 4) {
- v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0;
+ v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 0;
} else {
assert(swizzle_val == 5);
if (type == nir_type_float)
- v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0;
+ v.f32[0] = v.f32[1] = v.f32[2] = v.f32[3] = 1.0;
else
- v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1;
+ v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 1;
}
return nir_build_imm(b, 4, v);
diff --git a/src/compiler/nir/nir_lower_two_sided_color.c b/src/compiler/nir/nir_lower_two_sided_color.c
index fe3507c..c7fb67e 100644
--- a/src/compiler/nir/nir_lower_two_sided_color.c
+++ b/src/compiler/nir/nir_lower_two_sided_color.c
@@ -74,7 +74,7 @@ load_input(nir_builder *b, nir_variable *in)
load->num_components = 4;
nir_intrinsic_set_base(load, in->data.driver_location);
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
return &load->dest.ssa;
diff --git a/src/compiler/nir/nir_lower_var_copies.c b/src/compiler/nir/nir_lower_var_copies.c
index 7db9839..c994f0f 100644
--- a/src/compiler/nir/nir_lower_var_copies.c
+++ b/src/compiler/nir/nir_lower_var_copies.c
@@ -116,12 +116,15 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
assert(src_tail->type == dest_tail->type);
unsigned num_components = glsl_get_vector_elements(src_tail->type);
+ unsigned bit_size =
+ glsl_get_bit_size(glsl_get_base_type(src_tail->type));
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
load->num_components = num_components;
load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
- nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components, bit_size,
+ NULL);
nir_instr_insert_before(&copy_instr->instr, &load->instr);
diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c
index a3f3fcf..9f9e454 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -505,6 +505,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(state->shader,
intrin->num_components);
+ undef->def.bit_size = intrin->dest.ssa.bit_size;
nir_instr_insert_before(&intrin->instr, &undef->instr);
nir_instr_remove(&intrin->instr);
@@ -528,7 +529,8 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
mov->dest.write_mask = (1 << intrin->num_components) - 1;
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- intrin->num_components, NULL);
+ intrin->num_components,
+ intrin->dest.ssa.bit_size, NULL);
nir_instr_insert_before(&intrin->instr, &mov->instr);
nir_instr_remove(&intrin->instr);
@@ -719,6 +721,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
node->pb_value =
nir_phi_builder_add_value(state.phi_builder,
glsl_get_vector_elements(node->type),
+ glsl_get_bit_size(glsl_get_base_type(node->type)),
store_blocks);
if (node->deref->var->constant_initializer) {
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 60ade4a..d6b658d 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -90,8 +90,12 @@ class Opcode(object):
# helper variables for strings
tfloat = "float"
tint = "int"
-tbool = "bool"
+tbool = "bool32"
tuint = "uint"
+tfloat32 = "float32"
+tint32 = "int32"
+tuint32 = "uint32"
+tfloat64 = "float64"
commutative = "commutative "
associative = "associative "
@@ -155,57 +159,57 @@ unop("frsq", tfloat, "1.0f / sqrtf(src0)")
unop("fsqrt", tfloat, "sqrtf(src0)")
unop("fexp2", tfloat, "exp2f(src0)")
unop("flog2", tfloat, "log2f(src0)")
-unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion.
-unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion
-unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion.
+unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion.
+unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
+unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
# Float-to-boolean conversion
-unop_convert("f2b", tbool, tfloat, "src0 != 0.0f")
+unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
# Boolean-to-float conversion
-unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f")
+unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
-unop_convert("i2b", tbool, tint, "src0 != 0")
-unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
-unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion.
+unop_convert("i2b", tbool, tint32, "src0 != 0")
+unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
+unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
# Unary floating-point rounding operations.
-unop("ftrunc", tfloat, "truncf(src0)")
-unop("fceil", tfloat, "ceilf(src0)")
-unop("ffloor", tfloat, "floorf(src0)")
-unop("ffract", tfloat, "src0 - floorf(src0)")
-unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+unop("ftrunc", tfloat, "bit_size == 64 ? trunc(src0) : truncf(src0)")
+unop("fceil", tfloat, "bit_size == 64 ? ceil(src0) : ceilf(src0)")
+unop("ffloor", tfloat, "bit_size == 64 ? floor(src0) : floorf(src0)")
+unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
+unop("fround_even", tfloat, "bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0)")
unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))")
# Trigonometric operations.
-unop("fsin", tfloat, "sinf(src0)")
-unop("fcos", tfloat, "cosf(src0)")
+unop("fsin", tfloat, "bit_size == 64 ? sin(src0) : sinf(src0)")
+unop("fcos", tfloat, "bit_size == 64 ? cos(src0) : cosf(src0)")
# Partial derivatives.
-unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
-unop("fddy", tfloat, "0.0f")
-unop("fddx_fine", tfloat, "0.0f")
-unop("fddy_fine", tfloat, "0.0f")
-unop("fddx_coarse", tfloat, "0.0f")
-unop("fddy_coarse", tfloat, "0.0f")
+unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
+unop("fddy", tfloat, "0.0")
+unop("fddx_fine", tfloat, "0.0")
+unop("fddy_fine", tfloat, "0.0")
+unop("fddx_coarse", tfloat, "0.0")
+unop("fddy_coarse", tfloat, "0.0")
# Floating point pack and unpack operations.
def pack_2x16(fmt):
- unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
+ unop_horiz("pack_" + fmt + "_2x16", 1, tuint32, 2, tfloat32, """
dst.x = (uint32_t) pack_fmt_1x16(src0.x);
dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
""".replace("fmt", fmt))
def pack_4x8(fmt):
- unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
+ unop_horiz("pack_" + fmt + "_4x8", 1, tuint32, 4, tfloat32, """
dst.x = (uint32_t) pack_fmt_1x8(src0.x);
dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
@@ -213,13 +217,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
""".replace("fmt", fmt))
def unpack_2x16(fmt):
- unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
+ unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat32, 1, tuint32, """
dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
""".replace("fmt", fmt))
def unpack_4x8(fmt):
- unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
+ unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat32, 1, tuint32, """
dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
@@ -238,11 +242,11 @@ unpack_2x16("unorm")
unpack_4x8("unorm")
unpack_2x16("half")
-unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
+unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
dst.x = (src0.x & 0xffff) | (src0.y >> 16);
""")
-unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
+unop_horiz("pack_uvec4_to_uint", 1, tuint32, 4, tuint32, """
dst.x = (src0.x << 0) |
(src0.y << 8) |
(src0.z << 16) |
@@ -252,22 +256,22 @@ dst.x = (src0.x << 0) |
# Lowered floating point unpacking operations.
-unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat32, 1, tuint32,
"unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
-unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
"unpack_half_1x16((uint16_t)(src0.x >> 16))")
# Bit operations, part of ARB_gpu_shader5.
-unop("bitfield_reverse", tuint, """
+unop("bitfield_reverse", tuint32, """
/* we're not winning any awards for speed here, but that's ok */
dst = 0;
for (unsigned bit = 0; bit < 32; bit++)
dst |= ((src0 >> bit) & 1) << (31 - bit);
""")
-unop("bit_count", tuint, """
+unop("bit_count", tuint32, """
dst = 0;
for (unsigned bit = 0; bit < 32; bit++) {
if ((src0 >> bit) & 1)
@@ -275,7 +279,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
}
""")
-unop_convert("ufind_msb", tint, tuint, """
+unop_convert("ufind_msb", tint32, tuint32, """
dst = -1;
for (int bit = 31; bit > 0; bit--) {
if ((src0 >> bit) & 1) {
@@ -285,7 +289,7 @@ for (int bit = 31; bit > 0; bit--) {
}
""")
-unop("ifind_msb", tint, """
+unop("ifind_msb", tint32, """
dst = -1;
for (int bit = 31; bit >= 0; bit--) {
/* If src0 < 0, we're looking for the first 0 bit.
@@ -299,7 +303,7 @@ for (int bit = 31; bit >= 0; bit--) {
}
""")
-unop("find_lsb", tint, """
+unop("find_lsb", tint32, """
dst = -1;
for (unsigned bit = 0; bit < 32; bit++) {
if ((src0 >> bit) & 1) {
@@ -359,10 +363,10 @@ binop("fmul", tfloat, commutative + associative, "src0 * src1")
# low 32-bits of signed/unsigned integer multiply
binop("imul", tint, commutative + associative, "src0 * src1")
# high 32-bits of signed integer multiply
-binop("imul_high", tint, commutative,
+binop("imul_high", tint32, commutative,
"(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
# high 32-bits of unsigned integer multiply
-binop("umul_high", tuint, commutative,
+binop("umul_high", tuint32, commutative,
"(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
binop("fdiv", tfloat, "", "src0 / src1")
@@ -427,18 +431,18 @@ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
# non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
-binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}",
+binop_reduce("fall_equal", 1, tfloat32, tfloat32, "{src0} == {src1}",
"{src0} && {src1}", "{src} ? 1.0f : 0.0f")
-binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
+binop_reduce("fany_nequal", 1, tfloat32, tfloat32, "{src0} != {src1}",
"{src0} || {src1}", "{src} ? 1.0f : 0.0f")
# These comparisons for integer-less hardware return 1.0 and 0.0 for true
# and false respectively
-binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
-binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
-binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
-binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+binop("slt", tfloat32, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+binop("sge", tfloat32, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+binop("seq", tfloat32, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+binop("sne", tfloat32, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
binop("ishl", tint, "", "src0 << src1")
@@ -461,11 +465,11 @@ binop("ixor", tuint, commutative + associative, "src0 ^ src1")
# These use (src != 0.0) for testing the truth of the input, and output 1.0
# for true and 0.0 for false
-binop("fand", tfloat, commutative,
+binop("fand", tfloat32, commutative,
"((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
-binop("for", tfloat, commutative,
+binop("for", tfloat32, commutative,
"((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
-binop("fxor", tfloat, commutative,
+binop("fxor", tfloat32, commutative,
"(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
@@ -487,7 +491,7 @@ binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
# Saturated vector add for 4 8bit ints.
-binop("usadd_4x8", tint, commutative + associative, """
+binop("usadd_4x8", tint32, commutative + associative, """
dst = 0;
for (int i = 0; i < 32; i += 8) {
dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
@@ -495,7 +499,7 @@ for (int i = 0; i < 32; i += 8) {
""")
# Saturated vector subtract for 4 8bit ints.
-binop("ussub_4x8", tint, "", """
+binop("ussub_4x8", tint32, "", """
dst = 0;
for (int i = 0; i < 32; i += 8) {
int src0_chan = (src0 >> i) & 0xff;
@@ -506,7 +510,7 @@ for (int i = 0; i < 32; i += 8) {
""")
# vector min for 4 8bit ints.
-binop("umin_4x8", tint, commutative + associative, """
+binop("umin_4x8", tint32, commutative + associative, """
dst = 0;
for (int i = 0; i < 32; i += 8) {
dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
@@ -514,7 +518,7 @@ for (int i = 0; i < 32; i += 8) {
""")
# vector max for 4 8bit ints.
-binop("umax_4x8", tint, commutative + associative, """
+binop("umax_4x8", tint32, commutative + associative, """
dst = 0;
for (int i = 0; i < 32; i += 8) {
dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
@@ -522,7 +526,7 @@ for (int i = 0; i < 32; i += 8) {
""")
# unorm multiply: (a * b) / 255.
-binop("umul_unorm_4x8", tint, commutative + associative, """
+binop("umul_unorm_4x8", tint32, commutative + associative, """
dst = 0;
for (int i = 0; i < 32; i += 8) {
int src0_chan = (src0 >> i) & 0xff;
@@ -531,15 +535,15 @@ for (int i = 0; i < 32; i += 8) {
}
""")
-binop("fpow", tfloat, "", "powf(src0, src1)")
+binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
-binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
+binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
"pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
# and that of the "bfi1" i965 instruction. That is, it has undefined behavior
# if either of its arguments are 32.
-binop_convert("bfm", tuint, tint, "", """
+binop_convert("bfm", tuint32, tint32, "", """
int bits = src0, offset = src1;
if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
dst = 0; /* undefined */
@@ -548,7 +552,7 @@ else
""")
opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
-dst = ldexpf(src0, src1);
+dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1);
/* flush denormals to zero. */
if (!isnormal(dst))
dst = copysignf(0.0f, src0);
@@ -588,12 +592,12 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
-triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+triop("fcsel", tfloat32, "(src0 != 0.0f) ? src1 : src2")
opcode("bcsel", 0, tuint, [0, 0, 0],
[tbool, tuint, tuint], "", "src0 ? src1 : src2")
# SM5 bfi assembly
-triop("bfi", tuint, """
+triop("bfi", tuint32, """
unsigned mask = src0, insert = src1, base = src2;
if (mask == 0) {
dst = base;
@@ -608,8 +612,8 @@ if (mask == 0) {
""")
# SM5 ubfe/ibfe assembly
-opcode("ubfe", 0, tuint,
- [0, 0, 0], [tuint, tint, tint], "", """
+opcode("ubfe", 0, tuint32,
+ [0, 0, 0], [tuint32, tint32, tint32], "", """
unsigned base = src0;
int offset = src1, bits = src2;
if (bits == 0) {
@@ -622,8 +626,8 @@ if (bits == 0) {
dst = base >> offset;
}
""")
-opcode("ibfe", 0, tint,
- [0, 0, 0], [tint, tint, tint], "", """
+opcode("ibfe", 0, tint32,
+ [0, 0, 0], [tint32, tint32, tint32], "", """
int base = src0;
int offset = src1, bits = src2;
if (bits == 0) {
@@ -638,8 +642,8 @@ if (bits == 0) {
""")
# GLSL bitfieldExtract()
-opcode("ubitfield_extract", 0, tuint,
- [0, 0, 0], [tuint, tint, tint], "", """
+opcode("ubitfield_extract", 0, tuint32,
+ [0, 0, 0], [tuint32, tint32, tint32], "", """
unsigned base = src0;
int offset = src1, bits = src2;
if (bits == 0) {
@@ -650,8 +654,8 @@ if (bits == 0) {
dst = (base >> offset) & ((1ull << bits) - 1);
}
""")
-opcode("ibitfield_extract", 0, tint,
- [0, 0, 0], [tint, tint, tint], "", """
+opcode("ibitfield_extract", 0, tint32,
+ [0, 0, 0], [tint32, tint32, tint32], "", """
int base = src0;
int offset = src1, bits = src2;
if (bits == 0) {
@@ -678,8 +682,8 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
[tuint, tuint, tuint, tuint],
"", const_expr)
-opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
- [tuint, tuint, tint, tint], "", """
+opcode("bitfield_insert", 0, tuint32, [0, 0, 0, 0],
+ [tuint32, tuint32, tint32, tint32], "", """
unsigned base = src0, insert = src1;
int offset = src2, bits = src3;
if (bits == 0) {
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 54f7d86..ed21c5d 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -35,10 +35,17 @@ d = 'd'
# Written in the form (<search>, <replace>) where <search> is an expression
# and <replace> is either an expression or a value. An expression is
-# defined as a tuple of the form (<op>, <src0>, <src1>, <src2>, <src3>)
+# defined as a tuple of the form ([~]<op>, <src0>, <src1>, <src2>, <src3>)
# where each source is either an expression or a value. A value can be
# either a numeric constant or a string representing a variable name.
#
+# If the opcode in a search expression is prefixed by a '~' character, this
+# indicates that the operation is inexact. Such operations will only get
+# applied to SSA values that do not have the exact bit set. This should be
+# used by by any optimizations that are not bit-for-bit exact. It should not,
+# however, be used for backend-requested lowering operations as those need to
+# happen regardless of precision.
+#
# Variable names are specified as "[#]name[@type]" where "#" inicates that
# the given variable will only match constants and the type indicates that
# the given variable will only match values from ALU instructions with the
@@ -55,19 +62,19 @@ optimizations = [
(('fabs', ('fneg', a)), ('fabs', a)),
(('iabs', ('iabs', a)), ('iabs', a)),
(('iabs', ('ineg', a)), ('iabs', a)),
- (('fadd', a, 0.0), a),
+ (('~fadd', a, 0.0), a),
(('iadd', a, 0), a),
(('usadd_4x8', a, 0), a),
(('usadd_4x8', a, ~0), ~0),
- (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
+ (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
- (('fadd', ('fneg', a), a), 0.0),
+ (('~fadd', ('fneg', a), a), 0.0),
(('iadd', ('ineg', a), a), 0),
(('iadd', ('ineg', a), ('iadd', a, b)), b),
(('iadd', a, ('iadd', ('ineg', a), b)), b),
- (('fadd', ('fneg', a), ('fadd', a, b)), b),
- (('fadd', a, ('fadd', ('fneg', a), b)), b),
- (('fmul', a, 0.0), 0.0),
+ (('~fadd', ('fneg', a), ('fadd', a, b)), b),
+ (('~fadd', a, ('fadd', ('fneg', a), b)), b),
+ (('~fmul', a, 0.0), 0.0),
(('imul', a, 0), 0),
(('umul_unorm_4x8', a, 0), 0),
(('umul_unorm_4x8', a, ~0), a),
@@ -76,32 +83,48 @@ optimizations = [
(('fmul', a, -1.0), ('fneg', a)),
(('imul', a, -1), ('ineg', a)),
(('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
- (('ffma', 0.0, a, b), b),
- (('ffma', a, 0.0, b), b),
- (('ffma', a, b, 0.0), ('fmul', a, b)),
+ (('~ffma', 0.0, a, b), b),
+ (('~ffma', a, 0.0, b), b),
+ (('~ffma', a, b, 0.0), ('fmul', a, b)),
(('ffma', a, 1.0, b), ('fadd', a, b)),
(('ffma', 1.0, a, b), ('fadd', a, b)),
- (('flrp', a, b, 0.0), a),
- (('flrp', a, b, 1.0), b),
- (('flrp', a, a, b), a),
- (('flrp', 0.0, a, b), ('fmul', a, b)),
+ (('~flrp', a, b, 0.0), a),
+ (('~flrp', a, b, 1.0), b),
+ (('~flrp', a, a, b), a),
+ (('~flrp', 0.0, a, b), ('fmul', a, b)),
+ (('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp'),
(('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
- (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
- (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp'),
+ (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp'),
+ (('~fadd', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
- (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+ (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
# Comparison simplifications
- (('inot', ('flt', a, b)), ('fge', a, b)),
- (('inot', ('fge', a, b)), ('flt', a, b)),
- (('inot', ('feq', a, b)), ('fne', a, b)),
- (('inot', ('fne', a, b)), ('feq', a, b)),
+ (('~inot', ('flt', a, b)), ('fge', a, b)),
+ (('~inot', ('fge', a, b)), ('flt', a, b)),
+ (('~inot', ('feq', a, b)), ('fne', a, b)),
+ (('~inot', ('fne', a, b)), ('feq', a, b)),
(('inot', ('ilt', a, b)), ('ige', a, b)),
(('inot', ('ige', a, b)), ('ilt', a, b)),
(('inot', ('ieq', a, b)), ('ine', a, b)),
(('inot', ('ine', a, b)), ('ieq', a, b)),
+
+ # 0.0 >= b2f(a)
+ # b2f(a) <= 0.0
+ # b2f(a) == 0.0 because b2f(a) can only be 0 or 1
+ # inot(a)
+ (('fge', 0.0, ('b2f', a)), ('inot', a)),
+
+ # 0.0 < fabs(a)
+ # fabs(a) > 0.0
+ # fabs(a) != 0.0 because fabs(a) must be >= 0
+ # a != 0.0
+ (('flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
+
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
- (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
+ (('bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
(('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)),
(('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
@@ -111,15 +134,19 @@ optimizations = [
(('imax', a, a), a),
(('umin', a, a), a),
(('umax', a, a), a),
- (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
- (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
+ (('~fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
+ (('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
(('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
- (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
- (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
- (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
- (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
+ (('~ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+ (('~ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
+ (('~ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+ (('~ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
+ (('fabs', ('slt', a, b)), ('slt', a, b)),
+ (('fabs', ('sge', a, b)), ('sge', a, b)),
+ (('fabs', ('seq', a, b)), ('seq', a, b)),
+ (('fabs', ('sne', a, b)), ('sne', a, b)),
(('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
(('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
(('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
@@ -151,7 +178,6 @@ optimizations = [
(('ior', a, 0), a),
(('fxor', a, a), 0.0),
(('ixor', a, a), 0),
- (('fxor', a, 0.0), a),
(('ixor', a, 0), a),
(('inot', ('inot', a)), a),
# DeMorgan's Laws
@@ -167,35 +193,35 @@ optimizations = [
(('iand', 0xff, ('ushr', a, 24)), ('ushr', a, 24)),
(('iand', 0xffff, ('ushr', a, 16)), ('ushr', a, 16)),
# Exponential/logarithmic identities
- (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
- (('flog2', ('fexp2', a)), a), # lg2(2^a) = a
+ (('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
+ (('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
(('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
- (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
- (('fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
- ('fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
- (('fpow', a, 1.0), a),
- (('fpow', a, 2.0), ('fmul', a, a)),
- (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
- (('fpow', 2.0, a), ('fexp2', a)),
- (('fpow', ('fpow', a, 2.2), 0.454545), a),
- (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
- (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
- (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
- (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
- (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
- (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
- (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
- (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
- (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
- (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
- (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+ (('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
+ (('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
+ ('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
+ (('~fpow', a, 1.0), a),
+ (('~fpow', a, 2.0), ('fmul', a, a)),
+ (('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
+ (('~fpow', 2.0, a), ('fexp2', a)),
+ (('~fpow', ('fpow', a, 2.2), 0.454545), a),
+ (('~fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
+ (('~fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+ (('~frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+ (('~frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+ (('~flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+ (('~flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+ (('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+ (('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+ (('~fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+ (('~fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+ (('~fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
# Division and reciprocal
- (('fdiv', 1.0, a), ('frcp', a)),
+ (('~fdiv', 1.0, a), ('frcp', a)),
(('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
- (('frcp', ('frcp', a)), a),
- (('frcp', ('fsqrt', a)), ('frsq', a)),
+ (('~frcp', ('frcp', a)), a),
+ (('~frcp', ('fsqrt', a)), ('frsq', a)),
(('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
- (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
+ (('~frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
# Boolean simplifications
(('ieq', 'a@bool', True), a),
(('ine', 'a@bool', True), ('inot', a)),
@@ -216,6 +242,10 @@ optimizations = [
(('i2b', ('b2i', a)), a),
(('f2i', ('ftrunc', a)), ('f2i', a)),
(('f2u', ('ftrunc', a)), ('f2u', a)),
+ (('i2b', ('ineg', a)), ('i2b', a)),
+ (('i2b', ('iabs', a)), ('i2b', a)),
+ (('fabs', ('b2f', a)), ('b2f', a)),
+ (('iabs', ('b2i', a)), ('b2i', a)),
# Byte extraction
(('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
@@ -228,7 +258,7 @@ optimizations = [
(('iand', 0xffff, a), ('extract_u16', a, 0), '!options->lower_extract_word'),
# Subtracts
- (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
+ (('~fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
(('isub', a, ('isub', 0, b)), ('iadd', a, b)),
(('ussub_4x8', a, 0), a),
(('ussub_4x8', a, ~0), 0),
@@ -236,7 +266,7 @@ optimizations = [
(('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
(('ineg', a), ('isub', 0, a), 'options->lower_negate'),
- (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
+ (('~fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
(('iadd', a, ('isub', 0, b)), ('isub', a, b)),
(('fabs', ('fsub', 0.0, a)), ('fabs', a)),
(('iabs', ('isub', 0, a)), ('iabs', a)),
@@ -368,10 +398,13 @@ for op in ['flt', 'fge', 'feq', 'fne',
# they help code generation but do not necessarily produce code that is
# more easily optimizable.
late_optimizations = [
+ # Most of these optimizations aren't quite safe when you get infinity or
+ # Nan involved but the first one should be fine.
(('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
- (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
- (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+ (('~fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+ (('~feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+ (('~fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+
(('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
(('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
(('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
index 04876a4..e64ca36 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -46,10 +46,28 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
if (!instr->dest.dest.is_ssa)
return false;
+ /* In the case that any outputs/inputs have unsized types, then we need to
+ * guess the bit-size. In this case, the validator ensures that all
+ * bit-sizes match so we can just take the bit-size from first
+ * output/input with an unsized type. If all the outputs/inputs are sized
+ * then we don't need to guess the bit-size at all because the code we
+ * generate for constant opcodes in this case already knows the sizes of
+ * the types involved and does not need the provided bit-size for anything
+ * (although it still requires to receive a valid bit-size).
+ */
+ unsigned bit_size = 0;
+ if (!nir_alu_type_get_type_size(nir_op_infos[instr->op].output_type))
+ bit_size = instr->dest.dest.ssa.bit_size;
+
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
if (!instr->src[i].src.is_ssa)
return false;
+ if (bit_size == 0 &&
+ !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_sizes[i])) {
+ bit_size = instr->src[i].src.ssa->bit_size;
+ }
+
nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
if (src_instr->type != nir_instr_type_load_const)
@@ -58,24 +76,31 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i);
j++) {
- src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
+ if (load_const->def.bit_size == 64)
+ src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]];
+ else
+ src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]];
}
/* We shouldn't have any source modifiers in the optimization loop. */
assert(!instr->src[i].abs && !instr->src[i].negate);
}
+ if (bit_size == 0)
+ bit_size = 32;
+
/* We shouldn't have any saturate modifiers in the optimization loop. */
assert(!instr->dest.saturate);
nir_const_value dest =
nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
- src);
+ bit_size, src);
nir_load_const_instr *new_instr =
nir_load_const_instr_create(mem_ctx,
instr->dest.dest.ssa.num_components);
+ new_instr->def.bit_size = instr->dest.dest.ssa.bit_size;
new_instr->value = dest;
nir_instr_insert_before(&instr->instr, &new_instr->instr);
@@ -106,7 +131,7 @@ constant_fold_deref(nir_instr *instr, nir_deref_var *deref)
nir_load_const_instr *indirect =
nir_instr_as_load_const(arr->indirect.ssa->parent_instr);
- arr->base_offset += indirect->value.u[0];
+ arr->base_offset += indirect->value.u32[0];
/* Clear out the source */
nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL));
diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c
index 4cc6798..4658b23 100644
--- a/src/compiler/nir/nir_opt_dead_cf.c
+++ b/src/compiler/nir/nir_opt_dead_cf.c
@@ -228,7 +228,7 @@ dead_cf_block(nir_block *block)
if (!const_value)
return false;
- opt_constant_if(following_if, const_value->u[0] != 0);
+ opt_constant_if(following_if, const_value->u32[0] != 0);
return true;
}
diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c
index 0fc658d..bad9dc4 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -210,7 +210,8 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
}
nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
- phi->dest.ssa.num_components, phi->dest.ssa.name);
+ phi->dest.ssa.num_components,
+ phi->dest.ssa.bit_size, phi->dest.ssa.name);
sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
diff --git a/src/compiler/nir/nir_phi_builder.c b/src/compiler/nir/nir_phi_builder.c
index 5429083..a39e360 100644
--- a/src/compiler/nir/nir_phi_builder.c
+++ b/src/compiler/nir/nir_phi_builder.c
@@ -52,6 +52,7 @@ struct nir_phi_builder_value {
/* Needed so we can create phis and undefs */
unsigned num_components;
+ unsigned bit_size;
/* The list of phi nodes associated with this value. Phi nodes are not
* added directly. Instead, they are created, the instr->block pointer
@@ -61,8 +62,18 @@ struct nir_phi_builder_value {
*/
struct exec_list phis;
- /* Array of SSA defs, indexed by block. If a phi needs to be inserted
- * in a given block, it will have the magic value NEEDS_PHI.
+ /* Array of SSA defs, indexed by block. For each block, this array has has
+ * one of three types of values:
+ *
+ * - NULL. Indicates that there is no known definition in this block. If
+ * you need to find one, look at the block's immediate dominator.
+ *
+ * - NEEDS_PHI. Indicates that the block may need a phi node but none has
+ * been created yet. If a def is requested for a block, a phi will need
+ * to be created.
+ *
+ * - A regular SSA def. This will be either the result of a phi node or
+ * one of the defs provided by nir_phi_builder_value_set_blocK_def().
*/
nir_ssa_def *defs[0];
};
@@ -101,7 +112,7 @@ nir_phi_builder_create(nir_function_impl *impl)
struct nir_phi_builder_value *
nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
- const BITSET_WORD *defs)
+ unsigned bit_size, const BITSET_WORD *defs)
{
struct nir_phi_builder_value *val;
unsigned i, w_start = 0, w_end = 0;
@@ -109,6 +120,7 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks);
val->builder = pb;
val->num_components = num_components;
+ val->bit_size = bit_size;
exec_list_make_empty(&val->phis);
exec_list_push_tail(&pb->values, &val->node);
@@ -127,8 +139,7 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
set_foreach(cur->dom_frontier, dom_entry) {
nir_block *next = (nir_block *) dom_entry->key;
- /*
- * If there's more than one return statement, then the end block
+ /* If there's more than one return statement, then the end block
* can be a join point for some definitions. However, there are
* no instructions in the end block, so nothing would use those
* phi nodes. Of course, we couldn't place those phi nodes
@@ -139,6 +150,10 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
continue;
if (val->defs[next->index] == NULL) {
+ /* Instead of creating a phi node immediately, we simply set the
+ * value to the magic value NEEDS_PHI. Later, we create phi nodes
+ * on demand in nir_phi_builder_value_get_block_def().
+ */
val->defs[next->index] = NEEDS_PHI;
if (pb->work[next->index] < pb->iter_count) {
@@ -163,7 +178,9 @@ nir_ssa_def *
nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
nir_block *block)
{
+ /* For each block, we have one of three types of values */
if (val->defs[block->index] == NULL) {
+ /* NULL indicates that we have no SSA def for this block. */
if (block->imm_dom) {
/* Grab it from our immediate dominator. We'll stash it here for
* easy access later.
@@ -185,17 +202,36 @@ nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
return &undef->def;
}
} else if (val->defs[block->index] == NEEDS_PHI) {
- /* If we need a phi instruction, go ahead and create one but don't
- * add it to the program yet. Later, we'll go through and set up phi
- * sources and add the instructions will be added at that time.
+ /* The magic value NEEDS_PHI indicates that the block needs a phi node
+ * but none has been created. We need to create one now so we can
+ * return it to the caller.
+ *
+ * Because a phi node may use SSA defs that it does not dominate (this
+ * happens in loops), we do not yet have enough information to fully
+ * fill out the phi node. Instead, the phi nodes we create here will be
+ * empty (have no sources) and won't actually be placed in the block's
+ * instruction list yet. Later, in nir_phi_builder_finish(), we walk
+ * over all of the phi instructions, fill out the sources lists, and
+ * place them at the top of their respective block's instruction list.
+ *
+ * Creating phi nodes on-demand allows us to avoid creating dead phi
+ * nodes that will just get deleted later. While this probably isn't a
+ * big win for a full into-SSA pass, other users may use the phi builder
+ * to make small SSA form repairs where most of the phi nodes will never
+ * be used.
*/
nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader);
- nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components, NULL);
+ nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components,
+ val->bit_size, NULL);
phi->instr.block = block;
exec_list_push_tail(&val->phis, &phi->instr.node);
val->defs[block->index] = &phi->dest.ssa;
return &phi->dest.ssa;
} else {
+ /* In this case, we have an actual SSA def. It's either the result of a
+ * phi node created by the case above or one passed to us through
+ * nir_phi_builder_value_set_block_def().
+ */
return val->defs[block->index];
}
}
@@ -216,9 +252,14 @@ nir_phi_builder_finish(struct nir_phi_builder *pb)
NIR_VLA(nir_block *, preds, num_blocks);
foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) {
- /* We can't iterate over the list of phis normally because we are
- * removing them as we go and, in some cases, adding new phis as we
- * build the source lists of others.
+ /* We treat the linked list of phi nodes like a worklist. The list is
+ * pre-populated by calls to nir_phi_builder_value_get_block_def() that
+ * create phi nodes. As we fill in the sources of phi nodes, more may
+ * be created and are added to the end of the list.
+ *
+ * Because we are adding and removing phi nodes from the list as we go,
+ * we can't iterate over it normally. Instead, we just iterate until
+ * the list is empty.
*/
while (!exec_list_is_empty(&val->phis)) {
struct exec_node *head = exec_list_get_head(&val->phis);
diff --git a/src/compiler/nir/nir_phi_builder.h b/src/compiler/nir/nir_phi_builder.h
index 50251bf..edc5302 100644
--- a/src/compiler/nir/nir_phi_builder.h
+++ b/src/compiler/nir/nir_phi_builder.h
@@ -25,7 +25,38 @@
#include "nir.h"
+/** A helper for placing phi nodes in a NIR shader
+ *
+ * Basic usage goes something like this:
+ *
+ * each variable, var, has:
+ * a bitset var.defs of blocks where the variable is defined
+ * a struct nir_phi_builder_value *pb_val
+ *
+ * // initialize bitsets
+ * foreach block:
+ * foreach def of variable var:
+ * var.defs[def.block] = true;
+ *
+ * // initialize phi builder
+ * pb = nir_phi_builder_create()
+ * foreach var:
+ * var.pb_val = nir_phi_builder_add_value(pb, var.defs)
+ *
+ * // Visit each block. This needs to visit dominators first;
+ * // nir_for_each_block() will be ok.
+ * foreach block:
+ * foreach instruction:
+ * foreach use of variable var:
+ * replace use with nir_phi_builder_get_block_def(var.pb_val)
+ * foreach def of variable var:
+ * create ssa def, register with
+ * nir_phi_builder_set_block_def(var.pb_val)
+ *
+ * nir_phi_builder_finish(pb)
+ */
struct nir_phi_builder;
+
struct nir_phi_builder_value;
/* Create a new phi builder.
@@ -43,7 +74,7 @@ struct nir_phi_builder *nir_phi_builder_create(nir_function_impl *impl);
*/
struct nir_phi_builder_value *
nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
- const BITSET_WORD *defs);
+ unsigned bit_size, const BITSET_WORD *defs);
/* Register a definition for the given value and block.
*
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 24d5281..60b74d1 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -207,6 +207,8 @@ print_alu_instr(nir_alu_instr *instr, print_state *state)
print_alu_dest(&instr->dest, state);
fprintf(fp, " = %s", nir_op_infos[instr->op].name);
+ if (instr->exact)
+ fprintf(fp, "!");
if (instr->dest.saturate)
fprintf(fp, ".sat");
fprintf(fp, " ");
@@ -714,7 +716,7 @@ print_load_const_instr(nir_load_const_instr *instr, print_state *state)
* and then print the float in a comment for readability.
*/
- fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]);
+ fprintf(fp, "0x%08x /* %f */", instr->value.u32[i], instr->value.f32[i]);
}
fprintf(fp, ")");
diff --git a/src/compiler/nir/nir_repair_ssa.c b/src/compiler/nir/nir_repair_ssa.c
index 3ab4f0f..96c791c 100644
--- a/src/compiler/nir/nir_repair_ssa.c
+++ b/src/compiler/nir/nir_repair_ssa.c
@@ -85,7 +85,8 @@ repair_ssa_def(nir_ssa_def *def, void *void_state)
BITSET_SET(state->def_set, def->parent_instr->block->index);
struct nir_phi_builder_value *val =
- nir_phi_builder_add_value(pb, def->num_components, state->def_set);
+ nir_phi_builder_add_value(pb, def->num_components, def->bit_size,
+ state->def_set);
nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def);
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 56d7e81..6e63063 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -62,7 +62,8 @@ alu_instr_is_bool(nir_alu_instr *instr)
case nir_op_inot:
return src_is_bool(instr->src[0].src);
default:
- return nir_op_infos[instr->op].output_type == nir_type_bool;
+ return (nir_alu_type_get_base_type(nir_op_infos[instr->op].output_type)
+ == nir_type_bool);
}
}
@@ -125,8 +126,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
nir_alu_instr *src_alu =
nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
- if (nir_op_infos[src_alu->op].output_type != var->type &&
- !(var->type == nir_type_bool && alu_instr_is_bool(src_alu)))
+ if (nir_alu_type_get_base_type(nir_op_infos[src_alu->op].output_type) !=
+ var->type &&
+ !(nir_alu_type_get_base_type(var->type) == nir_type_bool &&
+ alu_instr_is_bool(src_alu)))
return false;
}
@@ -158,21 +161,65 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
nir_load_const_instr *load =
nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
- switch (nir_op_infos[instr->op].input_types[src]) {
+ switch (const_val->type) {
case nir_type_float:
for (unsigned i = 0; i < num_components; ++i) {
- if (load->value.f[new_swizzle[i]] != const_val->data.f)
+ double val;
+ switch (load->def.bit_size) {
+ case 32:
+ val = load->value.f32[new_swizzle[i]];
+ break;
+ case 64:
+ val = load->value.f64[new_swizzle[i]];
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
+
+ if (val != const_val->data.d)
return false;
}
return true;
+
case nir_type_int:
+ for (unsigned i = 0; i < num_components; ++i) {
+ int64_t val;
+ switch (load->def.bit_size) {
+ case 32:
+ val = load->value.i32[new_swizzle[i]];
+ break;
+ case 64:
+ val = load->value.i64[new_swizzle[i]];
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
+
+ if (val != const_val->data.i)
+ return false;
+ }
+ return true;
+
case nir_type_uint:
- case nir_type_bool:
+ case nir_type_bool32:
for (unsigned i = 0; i < num_components; ++i) {
- if (load->value.i[new_swizzle[i]] != const_val->data.i)
+ uint64_t val;
+ switch (load->def.bit_size) {
+ case 32:
+ val = load->value.u32[new_swizzle[i]];
+ break;
+ case 64:
+ val = load->value.u64[new_swizzle[i]];
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
+
+ if (val != const_val->data.u)
return false;
}
return true;
+
default:
unreachable("Invalid alu source type");
}
@@ -191,6 +238,10 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
if (instr->op != expr->opcode)
return false;
+ assert(instr->dest.dest.is_ssa);
+ if (expr->inexact && instr->exact)
+ return false;
+
assert(!instr->dest.saturate);
assert(nir_op_infos[instr->op].num_inputs > 0);
@@ -244,9 +295,123 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
}
}
+typedef struct bitsize_tree {
+ unsigned num_srcs;
+ struct bitsize_tree *srcs[4];
+
+ unsigned common_size;
+ bool is_src_sized[4];
+ bool is_dest_sized;
+
+ unsigned dest_size;
+ unsigned src_size[4];
+} bitsize_tree;
+
+static bitsize_tree *
+build_bitsize_tree(void *mem_ctx, struct match_state *state,
+ const nir_search_value *value)
+{
+ bitsize_tree *tree = ralloc(mem_ctx, bitsize_tree);
+
+ switch (value->type) {
+ case nir_search_value_expression: {
+ nir_search_expression *expr = nir_search_value_as_expression(value);
+ nir_op_info info = nir_op_infos[expr->opcode];
+ tree->num_srcs = info.num_inputs;
+ tree->common_size = 0;
+ for (unsigned i = 0; i < info.num_inputs; i++) {
+ tree->is_src_sized[i] = !!nir_alu_type_get_type_size(info.input_types[i]);
+ if (tree->is_src_sized[i])
+ tree->src_size[i] = nir_alu_type_get_type_size(info.input_types[i]);
+ tree->srcs[i] = build_bitsize_tree(mem_ctx, state, expr->srcs[i]);
+ }
+ tree->is_dest_sized = !!nir_alu_type_get_type_size(info.output_type);
+ if (tree->is_dest_sized)
+ tree->dest_size = nir_alu_type_get_type_size(info.output_type);
+ break;
+ }
+
+ case nir_search_value_variable: {
+ nir_search_variable *var = nir_search_value_as_variable(value);
+ tree->num_srcs = 0;
+ tree->is_dest_sized = true;
+ tree->dest_size = nir_src_bit_size(state->variables[var->variable].src);
+ break;
+ }
+
+ case nir_search_value_constant: {
+ tree->num_srcs = 0;
+ tree->is_dest_sized = false;
+ tree->common_size = 0;
+ break;
+ }
+ }
+
+ return tree;
+}
+
+static unsigned
+bitsize_tree_filter_up(bitsize_tree *tree)
+{
+ for (unsigned i = 0; i < tree->num_srcs; i++) {
+ unsigned src_size = bitsize_tree_filter_up(tree->srcs[i]);
+ if (src_size == 0)
+ continue;
+
+ if (tree->is_src_sized[i]) {
+ assert(src_size == tree->src_size[i]);
+ } else if (tree->common_size != 0) {
+ assert(src_size == tree->common_size);
+ tree->src_size[i] = src_size;
+ } else {
+ tree->common_size = src_size;
+ tree->src_size[i] = src_size;
+ }
+ }
+
+ if (tree->num_srcs && tree->common_size) {
+ if (tree->dest_size == 0)
+ tree->dest_size = tree->common_size;
+ else if (!tree->is_dest_sized)
+ assert(tree->dest_size == tree->common_size);
+
+ for (unsigned i = 0; i < tree->num_srcs; i++) {
+ if (!tree->src_size[i])
+ tree->src_size[i] = tree->common_size;
+ }
+ }
+
+ return tree->dest_size;
+}
+
+static void
+bitsize_tree_filter_down(bitsize_tree *tree, unsigned size)
+{
+ if (tree->dest_size)
+ assert(tree->dest_size == size);
+ else
+ tree->dest_size = size;
+
+ if (!tree->is_dest_sized) {
+ if (tree->common_size)
+ assert(tree->common_size == size);
+ else
+ tree->common_size = size;
+ }
+
+ for (unsigned i = 0; i < tree->num_srcs; i++) {
+ if (!tree->src_size[i]) {
+ assert(tree->common_size);
+ tree->src_size[i] = tree->common_size;
+ }
+ bitsize_tree_filter_down(tree->srcs[i], tree->src_size[i]);
+ }
+}
+
static nir_alu_src
-construct_value(const nir_search_value *value, nir_alu_type type,
- unsigned num_components, struct match_state *state,
+construct_value(const nir_search_value *value,
+ unsigned num_components, bitsize_tree *bitsize, bool exact,
+ struct match_state *state,
nir_instr *instr, void *mem_ctx)
{
switch (value->type) {
@@ -257,7 +422,9 @@ construct_value(const nir_search_value *value, nir_alu_type type,
num_components = nir_op_infos[expr->opcode].output_size;
nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
- nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components,
+ bitsize->dest_size, NULL);
+ alu->exact = exact;
alu->dest.write_mask = (1 << num_components) - 1;
alu->dest.saturate = false;
@@ -269,8 +436,7 @@ construct_value(const nir_search_value *value, nir_alu_type type,
num_components = nir_op_infos[alu->op].input_sizes[i];
alu->src[i] = construct_value(expr->srcs[i],
- nir_op_infos[alu->op].input_types[i],
- num_components,
+ num_components, bitsize->srcs[i], exact,
state, instr, mem_ctx);
}
@@ -301,23 +467,57 @@ construct_value(const nir_search_value *value, nir_alu_type type,
const nir_search_constant *c = nir_search_value_as_constant(value);
nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);
- switch (type) {
+ switch (c->type) {
case nir_type_float:
- load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f);
- load->value.f[0] = c->data.f;
+ load->def.name = ralloc_asprintf(load, "%f", c->data.d);
+ switch (bitsize->dest_size) {
+ case 32:
+ load->value.f32[0] = c->data.d;
+ break;
+ case 64:
+ load->value.f64[0] = c->data.d;
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
break;
+
case nir_type_int:
- load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
- load->value.i[0] = c->data.i;
+ load->def.name = ralloc_asprintf(load, "%ld", c->data.i);
+ switch (bitsize->dest_size) {
+ case 32:
+ load->value.i32[0] = c->data.i;
+ break;
+ case 64:
+ load->value.i64[0] = c->data.i;
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
break;
+
case nir_type_uint:
- case nir_type_bool:
- load->value.u[0] = c->data.u;
+ load->def.name = ralloc_asprintf(load, "%lu", c->data.u);
+ switch (bitsize->dest_size) {
+ case 32:
+ load->value.u32[0] = c->data.u;
+ break;
+ case 64:
+ load->value.u64[0] = c->data.u;
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
+
+ case nir_type_bool32:
+ load->value.u32[0] = c->data.u;
break;
default:
unreachable("Invalid alu source type");
}
+ load->def.bit_size = bitsize->dest_size;
+
nir_instr_insert_before(instr, &load->instr);
nir_alu_src val;
@@ -352,6 +552,11 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
swizzle, &state))
return NULL;
+ void *bitsize_ctx = ralloc_context(NULL);
+ bitsize_tree *tree = build_bitsize_tree(bitsize_ctx, &state, replace);
+ bitsize_tree_filter_up(tree);
+ bitsize_tree_filter_down(tree, instr->dest.dest.ssa.bit_size);
+
/* Inserting a mov may be unnecessary. However, it's much easier to
* simply let copy propagation clean this up than to try to go through
* and rewrite swizzles ourselves.
@@ -359,11 +564,12 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
mov->dest.write_mask = instr->dest.write_mask;
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- instr->dest.dest.ssa.num_components, NULL);
+ instr->dest.dest.ssa.num_components,
+ instr->dest.dest.ssa.bit_size, NULL);
- mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type,
- instr->dest.dest.ssa.num_components, &state,
- &instr->instr, mem_ctx);
+ mov->src[0] = construct_value(replace,
+ instr->dest.dest.ssa.num_components, tree,
+ instr->exact, &state, &instr->instr, mem_ctx);
nir_instr_insert_before(&instr->instr, &mov->instr);
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
@@ -375,5 +581,7 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
*/
nir_instr_remove(&instr->instr);
+ ralloc_free(bitsize_ctx);
+
return mov;
}
diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h
index 7d47792..61742f1 100644
--- a/src/compiler/nir/nir_search.h
+++ b/src/compiler/nir/nir_search.h
@@ -71,16 +71,24 @@ typedef struct {
typedef struct {
nir_search_value value;
+ nir_alu_type type;
+
union {
- uint32_t u;
- int32_t i;
- float f;
+ uint64_t u;
+ int64_t i;
+ double d;
} data;
} nir_search_constant;
typedef struct {
nir_search_value value;
+ /* When set on a search expression, the expression will only match an SSA
+ * value that does *not* have the exact bit set. If unset, the exact bit
+ * on the SSA value is ignored.
+ */
+ bool inexact;
+
nir_op opcode;
const nir_search_value *srcs[4];
} nir_search_expression;
diff --git a/src/compiler/nir/nir_to_ssa.c b/src/compiler/nir/nir_to_ssa.c
index 44a5054..d588d7d 100644
--- a/src/compiler/nir/nir_to_ssa.c
+++ b/src/compiler/nir/nir_to_ssa.c
@@ -219,7 +219,9 @@ rewrite_def_forwards(nir_dest *dest, void *_state)
state->states[index].num_defs);
list_del(&dest->reg.def_link);
- nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name);
+ nir_ssa_dest_init(state->parent_instr, dest, reg->num_components,
+ reg->bit_size, name);
+ ralloc_free(name);
/* push our SSA destination on the stack */
state->states[index].index++;
@@ -271,7 +273,9 @@ rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state)
instr->dest.write_mask = (1 << num_components) - 1;
list_del(&instr->dest.dest.reg.def_link);
- nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name);
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
+ reg->bit_size, name);
+ ralloc_free(name);
if (nir_op_infos[instr->op].output_size == 0) {
/*
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index 0c32d5f..9f18d1c 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -179,9 +179,12 @@ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
nir_alu_src *src = &instr->src[index];
unsigned num_components;
- if (src->src.is_ssa)
+ unsigned src_bit_size;
+ if (src->src.is_ssa) {
+ src_bit_size = src->src.ssa->bit_size;
num_components = src->src.ssa->num_components;
- else {
+ } else {
+ src_bit_size = src->src.reg.reg->bit_size;
if (src->src.reg.reg->is_packed)
num_components = 4; /* can't check anything */
else
@@ -194,6 +197,24 @@ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
assert(src->swizzle[i] < num_components);
}
+ nir_alu_type src_type = nir_op_infos[instr->op].input_types[index];
+
+ /* 8-bit float isn't a thing */
+ if (nir_alu_type_get_base_type(src_type) == nir_type_float)
+ assert(src_bit_size == 16 || src_bit_size == 32 || src_bit_size == 64);
+
+ if (nir_alu_type_get_type_size(src_type)) {
+ /* This source has an explicit bit size */
+ assert(nir_alu_type_get_type_size(src_type) == src_bit_size);
+ } else {
+ if (!nir_alu_type_get_type_size(nir_op_infos[instr->op].output_type)) {
+ unsigned dest_bit_size =
+ instr->dest.dest.is_ssa ? instr->dest.dest.ssa.bit_size
+ : instr->dest.dest.reg.reg->bit_size;
+ assert(dest_bit_size == src_bit_size);
+ }
+ }
+
validate_src(&src->src, state);
}
@@ -263,8 +284,10 @@ validate_dest(nir_dest *dest, validate_state *state)
}
static void
-validate_alu_dest(nir_alu_dest *dest, validate_state *state)
+validate_alu_dest(nir_alu_instr *instr, validate_state *state)
{
+ nir_alu_dest *dest = &instr->dest;
+
unsigned dest_size =
dest->dest.is_ssa ? dest->dest.ssa.num_components
: dest->dest.reg.reg->num_components;
@@ -282,6 +305,17 @@ validate_alu_dest(nir_alu_dest *dest, validate_state *state)
assert(nir_op_infos[alu->op].output_type == nir_type_float ||
!dest->saturate);
+ unsigned bit_size = dest->dest.is_ssa ? dest->dest.ssa.bit_size
+ : dest->dest.reg.reg->bit_size;
+ nir_alu_type type = nir_op_infos[instr->op].output_type;
+
+ /* 8-bit float isn't a thing */
+ if (nir_alu_type_get_base_type(type) == nir_type_float)
+ assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
+
+ assert(nir_alu_type_get_type_size(type) == 0 ||
+ nir_alu_type_get_type_size(type) == bit_size);
+
validate_dest(&dest->dest, state);
}
@@ -294,7 +328,7 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
validate_alu_src(instr, i, state);
}
- validate_alu_dest(&instr->dest, state);
+ validate_alu_dest(instr, state);
}
static void
diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c
index 5a7184a..42a1f95 100644
--- a/src/compiler/nir/spirv/spirv_to_nir.c
+++ b/src/compiler/nir/spirv/spirv_to_nir.c
@@ -92,7 +92,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
nir_load_const_instr_create(b->shader, num_components);
for (unsigned i = 0; i < num_components; i++)
- load->value.u[i] = constant->value.u[i];
+ load->value.u32[i] = constant->value.u[i];
nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
val->def = &load->def;
@@ -109,7 +109,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
nir_load_const_instr_create(b->shader, rows);
for (unsigned j = 0; j < rows; j++)
- load->value.u[j] = constant->value.u[rows * i + j];
+ load->value.u32[j] = constant->value.u[rows * i + j];
nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
col_val->def = &load->def;
@@ -1035,6 +1035,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
unsigned num_components = glsl_get_vector_elements(val->const_type);
+ unsigned bit_size =
+ glsl_get_bit_size(glsl_get_base_type(val->const_type));
nir_const_value src[3];
assert(count <= 7);
@@ -1043,14 +1045,16 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
unsigned j = swap ? 1 - i : i;
+ assert(bit_size == 32);
for (unsigned k = 0; k < num_components; k++)
- src[j].u[k] = c->value.u[k];
+ src[j].u32[k] = c->value.u[k];
}
- nir_const_value res = nir_eval_const_opcode(op, num_components, src);
+ nir_const_value res = nir_eval_const_opcode(op, num_components,
+ bit_size, src);
for (unsigned k = 0; k < num_components; k++)
- val->constant->value.u[k] = res.u[k];
+ val->constant->value.u[k] = res.u32[k];
return;
} /* default */
@@ -1414,7 +1418,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
}
nir_ssa_dest_init(&instr->instr, &instr->dest,
- nir_tex_instr_dest_size(instr), NULL);
+ nir_tex_instr_dest_size(instr), 32, NULL);
assert(glsl_get_vector_elements(ret_type->type) ==
nir_tex_instr_dest_size(instr));
@@ -1600,7 +1604,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
if (opcode != SpvOpImageWrite) {
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
- nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL);
+ nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL);
nir_builder_instr_insert(&b->nb, &intrin->instr);
@@ -1738,7 +1742,7 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
}
- nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL);
+ nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
@@ -1750,7 +1754,7 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
}
static nir_alu_instr *
-create_vec(nir_shader *shader, unsigned num_components)
+create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size)
{
nir_op op;
switch (num_components) {
@@ -1762,7 +1766,8 @@ create_vec(nir_shader *shader, unsigned num_components)
}
nir_alu_instr *vec = nir_alu_instr_create(shader, op);
- nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components,
+ bit_size, NULL);
vec->dest.write_mask = (1 << num_components) - 1;
return vec;
@@ -1779,7 +1784,8 @@ vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
nir_alu_instr *vec = create_vec(b->shader,
- glsl_get_matrix_columns(src->type));
+ glsl_get_matrix_columns(src->type),
+ glsl_get_bit_size(glsl_get_base_type(src->type)));
if (glsl_type_is_vector_or_scalar(src->type)) {
vec->src[0].src = nir_src_for_ssa(src->def);
vec->src[0].swizzle[0] = i;
@@ -1809,7 +1815,8 @@ nir_ssa_def *
vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
unsigned index)
{
- nir_alu_instr *vec = create_vec(b->shader, src->num_components);
+ nir_alu_instr *vec = create_vec(b->shader, src->num_components,
+ src->bit_size);
for (unsigned i = 0; i < src->num_components; i++) {
if (i == index) {
@@ -1854,7 +1861,7 @@ vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
nir_ssa_def *src0, nir_ssa_def *src1,
const uint32_t *indices)
{
- nir_alu_instr *vec = create_vec(b->shader, num_components);
+ nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size);
nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1);
nir_builder_instr_insert(&b->nb, &undef->instr);
@@ -1884,7 +1891,8 @@ static nir_ssa_def *
vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
unsigned num_srcs, nir_ssa_def **srcs)
{
- nir_alu_instr *vec = create_vec(b->shader, num_components);
+ nir_alu_instr *vec = create_vec(b->shader, num_components,
+ srcs[0]->bit_size);
unsigned dest_idx = 0;
for (unsigned i = 0; i < num_srcs; i++) {
diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c
index 6b649fd..3360fda 100644
--- a/src/compiler/nir/spirv/vtn_glsl450.c
+++ b/src/compiler/nir/spirv/vtn_glsl450.c
@@ -627,7 +627,9 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
- glsl_get_vector_elements(val->ssa->type), val->name);
+ glsl_get_vector_elements(val->ssa->type),
+ glsl_get_bit_size(glsl_get_base_type(val->ssa->type)),
+ val->name);
instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1;
val->ssa->def = &instr->dest.dest.ssa;
diff --git a/src/compiler/nir/spirv/vtn_variables.c b/src/compiler/nir/spirv/vtn_variables.c
index 31bf416..3cbac1e 100644
--- a/src/compiler/nir/spirv/vtn_variables.c
+++ b/src/compiler/nir/spirv/vtn_variables.c
@@ -190,7 +190,9 @@ _vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
if (load) {
nir_ssa_dest_init(&intrin->instr, &intrin->dest,
- intrin->num_components, NULL);
+ intrin->num_components,
+ glsl_get_bit_size(glsl_get_base_type(tail->type)),
+ NULL);
inout->def = &intrin->dest.ssa;
} else {
nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1);
@@ -322,7 +324,7 @@ get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set);
nir_intrinsic_set_binding(instr, chain->var->binding);
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
nir_builder_instr_insert(&b->nb, &instr->instr);
return &instr->dest.ssa;
@@ -411,7 +413,8 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
if (load) {
nir_ssa_dest_init(&instr->instr, &instr->dest,
- instr->num_components, NULL);
+ instr->num_components,
+ glsl_get_bit_size(glsl_get_base_type(type)), NULL);
(*inout)->def = &instr->dest.ssa;
}
@@ -1385,7 +1388,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
nir_intrinsic_instr_create(b->nb.shader,
nir_intrinsic_get_buffer_size);
instr->src[0] = nir_src_for_ssa(index);
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
nir_builder_instr_insert(&b->nb, &instr->instr);
nir_ssa_def *buf_size = &instr->dest.ssa;
diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
index d92605b..5efdd85 100644
--- a/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@ -80,6 +80,27 @@ enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type);
unsigned glsl_get_record_location_offset(const struct glsl_type *type,
unsigned length);
+static inline unsigned
+glsl_get_bit_size(enum glsl_base_type type)
+{
+ switch (type) {
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_FLOAT: /* TODO handle mediump */
+ case GLSL_TYPE_SUBROUTINE:
+ return 32;
+
+ case GLSL_TYPE_DOUBLE:
+ return 64;
+
+ default:
+ unreachable("unknown base type");
+ }
+
+ return 0;
+}
+
bool glsl_type_is_void(const struct glsl_type *type);
bool glsl_type_is_error(const struct glsl_type *type);
bool glsl_type_is_vector(const struct glsl_type *type);
diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c
index c445d9b..d79c0e1 100644
--- a/src/egl/main/eglconfig.c
+++ b/src/egl/main/eglconfig.c
@@ -44,7 +44,6 @@
#include "egllog.h"
-#define MIN2(A, B) (((A) < (B)) ? (A) : (B))
/**
diff --git a/src/egl/main/egldefines.h b/src/egl/main/egldefines.h
index a32cab2..13a7563 100644
--- a/src/egl/main/egldefines.h
+++ b/src/egl/main/egldefines.h
@@ -40,9 +40,16 @@ extern "C" {
#define _EGL_MAX_EXTENSIONS_LEN 1000
+/* Hardcoded, conservative default for EGL_LARGEST_PBUFFER,
+ * this is used to implement EGL_LARGEST_PBUFFER.
+ */
+#define _EGL_MAX_PBUFFER_WIDTH 4096
+#define _EGL_MAX_PBUFFER_HEIGHT 4096
+
#define _EGL_VENDOR_STRING "Mesa Project"
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+#define MIN2(A, B) (((A) < (B)) ? (A) : (B))
#ifdef __cplusplus
}
diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c
index 4fa43f3..2971bb0 100644
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -307,6 +307,12 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
if (err != EGL_SUCCESS)
return _eglError(err, func);
+ /* if EGL_LARGEST_PBUFFER in use, clamp width and height */
+ if (surf->LargestPbuffer) {
+ surf->Width = MIN2(surf->Width, _EGL_MAX_PBUFFER_WIDTH);
+ surf->Height = MIN2(surf->Height, _EGL_MAX_PBUFFER_HEIGHT);
+ }
+
return EGL_TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 6b33341..fcef31b 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -206,12 +206,6 @@ static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
{
struct tgsi_exec_machine *machine = shader->machine;
- tgsi_set_exec_mask(machine,
- 1,
- input_primitives > 1,
- input_primitives > 2,
- input_primitives > 3);
-
/* run interpreter */
tgsi_exec_machine_run(machine);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index e85ae16..cd9ee54 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -264,11 +264,11 @@ aa_transform_epilog(struct tgsi_transform_context *ctx)
if (aactx->colorOutput != -1) {
/* insert texture sampling code for antialiasing. */
- /* TEX texTemp, input_coord, sampler */
- tgsi_transform_tex_2d_inst(ctx,
- TGSI_FILE_TEMPORARY, aactx->texTemp,
- TGSI_FILE_INPUT, aactx->maxInput + 1,
- aactx->freeSampler);
+ /* TEX texTemp, input_coord, sampler, 2D */
+ tgsi_transform_tex_inst(ctx,
+ TGSI_FILE_TEMPORARY, aactx->texTemp,
+ TGSI_FILE_INPUT, aactx->maxInput + 1,
+ TGSI_TEXTURE_2D, aactx->freeSampler);
/* MOV rgb */
tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index abd64f5..3fd8ef3 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -159,12 +159,6 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
input = (const float (*)[4])((const char *)input + input_stride);
}
- tgsi_set_exec_mask(machine,
- 1,
- max_vertices > 1,
- max_vertices > 2,
- max_vertices > 3);
-
/* run interpreter */
tgsi_exec_machine_run( machine );
diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index fb99834..4673458 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -1191,6 +1191,7 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
"FRAG\n"
"DCL IN[0], GENERIC[0], LINEAR\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], RECT, FLOAT\n"
"DCL OUT[0], COLOR[0]\n"
"DCL TEMP[0]\n"
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index b719176..7ec8b66 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -459,7 +459,7 @@ ttn_emit_immediate(struct ttn_compile *c)
c->next_imm++;
for (i = 0; i < 4; i++)
- load_const->value.u[i] = tgsi_imm->u[i].Uint;
+ load_const->value.u32[i] = tgsi_imm->u[i].Uint;
nir_builder_instr_insert(b, &load_const->instr);
}
@@ -515,8 +515,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
nir_intrinsic_load_var);
load->num_components = 4;
load->variables[0] = ttn_array_deref(c, load, var, offset, indirect);
-
- nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ 4, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
src = nir_src_for_ssa(&load->dest.ssa);
@@ -567,7 +567,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
load = nir_intrinsic_instr_create(b->shader, op);
load->num_components = ncomp;
- nir_ssa_dest_init(&load->instr, &load->dest, ncomp, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, ncomp, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
src = nir_src_for_ssa(&load->dest.ssa);
@@ -632,7 +632,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
}
load->src[srcn++] = nir_src_for_ssa(offset);
- nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
src = nir_src_for_ssa(&load->dest.ssa);
@@ -1425,7 +1425,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
assert(src_number == num_srcs);
- nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &instr->instr);
/* Resolve the writemask on the texture op. */
@@ -1464,10 +1464,10 @@ ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
txs->src[0].src = nir_src_for_ssa(ttn_channel(b, src[0], X));
txs->src[0].src_type = nir_tex_src_lod;
- nir_ssa_dest_init(&txs->instr, &txs->dest, 3, NULL);
+ nir_ssa_dest_init(&txs->instr, &txs->dest, 3, 32, NULL);
nir_builder_instr_insert(b, &txs->instr);
- nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, NULL);
+ nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &qlv->instr);
ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.h b/src/gallium/auxiliary/nir/tgsi_to_nir.h
index 0651870..f480009 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.h
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.h
@@ -23,8 +23,6 @@
#include "compiler/nir/nir.h"
-struct nir_shader_compiler_options *options;
-
struct nir_shader *
tgsi_to_nir(const void *tgsi_tokens,
const struct nir_shader_compiler_options *options);
diff --git a/src/gallium/auxiliary/postprocess/pp_colors.h b/src/gallium/auxiliary/postprocess/pp_colors.h
index a79858e..76c4ab4 100644
--- a/src/gallium/auxiliary/postprocess/pp_colors.h
+++ b/src/gallium/auxiliary/postprocess/pp_colors.h
@@ -33,6 +33,7 @@ static const char nored[] = "FRAG\n"
"DCL IN[0], GENERIC[0], PERSPECTIVE\n"
"DCL OUT[0], COLOR\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], 2D, FLOAT\n"
"DCL TEMP[0]\n"
"IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n"
" 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
@@ -46,6 +47,7 @@ static const char nogreen[] = "FRAG\n"
"DCL IN[0], GENERIC[0], PERSPECTIVE\n"
"DCL OUT[0], COLOR\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], 2D, FLOAT\n"
"DCL TEMP[0]\n"
"IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n"
" 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
@@ -59,6 +61,7 @@ static const char noblue[] = "FRAG\n"
"DCL IN[0], GENERIC[0], PERSPECTIVE\n"
"DCL OUT[0], COLOR\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], 2D, FLOAT\n"
"DCL TEMP[0]\n"
"IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n"
" 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.h b/src/gallium/auxiliary/postprocess/pp_mlaa.h
index 93a8a8a..0b2c363 100644
--- a/src/gallium/auxiliary/postprocess/pp_mlaa.h
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.h
@@ -50,6 +50,7 @@ static const char depth1fs[] = "FRAG\n"
"DCL IN[2], GENERIC[11], PERSPECTIVE\n"
"DCL OUT[0], COLOR\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], 2D, FLOAT\n"
"DCL TEMP[0..2]\n"
"IMM FLT32 { 0.0030, 0.0000, 1.0000, 0.0000}\n"
" 0: TEX TEMP[0].x, IN[1].xyyy, SAMP[0], 2D\n"
@@ -80,6 +81,7 @@ static const char color1fs[] = "FRAG\n"
"DCL IN[2], GENERIC[11], PERSPECTIVE\n"
"DCL OUT[0], COLOR\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], 2D, FLOAT\n"
"DCL TEMP[0..2]\n"
"IMM FLT32 { 0.2126, 0.7152, 0.0722, 0.1000}\n"
"IMM FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}\n"
@@ -112,6 +114,7 @@ static const char neigh3fs[] = "FRAG\n"
"DCL IN[2], GENERIC[11], PERSPECTIVE\n"
"DCL OUT[0], COLOR\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], 2D, FLOAT\n"
"DCL SAMP[1]\n"
"DCL TEMP[0..8]\n"
"IMM FLT32 { 1.0000, 0.00001, 0.0000, 0.0000}\n"
@@ -175,8 +178,11 @@ static const char blend2fs_1[] = "FRAG\n"
"DCL IN[0], GENERIC[0], PERSPECTIVE\n"
"DCL OUT[0], COLOR\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], 2D, FLOAT\n"
"DCL SAMP[1]\n"
+ "DCL SVIEW[1], 2D, FLOAT\n"
"DCL SAMP[2]\n"
+ "DCL SVIEW[2], 2D, FLOAT\n"
"DCL CONST[0]\n"
"DCL TEMP[0..6]\n"
"IMM FLT32 { 0.0000, -0.2500, 0.00609756, 0.5000}\n"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index e5355f5..7e30bb6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -111,7 +111,7 @@ tgsi_default_declaration( void )
declaration.Local = 0;
declaration.Array = 0;
declaration.Atomic = 0;
- declaration.Shared = 0;
+ declaration.MemType = TGSI_MEMORY_TYPE_GLOBAL;
declaration.Padding = 0;
return declaration;
@@ -127,6 +127,8 @@ tgsi_build_declaration(
unsigned invariant,
unsigned local,
unsigned array,
+ unsigned atomic,
+ unsigned mem_type,
struct tgsi_header *header )
{
struct tgsi_declaration declaration;
@@ -143,6 +145,8 @@ tgsi_build_declaration(
declaration.Invariant = invariant;
declaration.Local = local;
declaration.Array = array;
+ declaration.Atomic = atomic;
+ declaration.MemType = mem_type;
header_bodysize_grow( header );
return declaration;
@@ -401,6 +405,8 @@ tgsi_build_full_declaration(
full_decl->Declaration.Invariant,
full_decl->Declaration.Local,
full_decl->Declaration.Array,
+ full_decl->Declaration.Atomic,
+ full_decl->Declaration.MemType,
header );
if (maxsize <= size)
@@ -775,6 +781,8 @@ tgsi_default_instruction_memory( void )
struct tgsi_instruction_memory instruction_memory;
instruction_memory.Qualifier = 0;
+ instruction_memory.Texture = 0;
+ instruction_memory.Format = 0;
instruction_memory.Padding = 0;
return instruction_memory;
@@ -790,6 +798,8 @@ tgsi_build_instruction_memory(
struct tgsi_instruction_memory instruction_memory;
instruction_memory.Qualifier = qualifier;
+ instruction_memory.Texture = 0;
+ instruction_memory.Format = 0;
instruction_memory.Padding = 0;
instruction->Memory = 1;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index c8b91bb..6d39ef2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -365,8 +365,13 @@ iter_declaration(
}
if (decl->Declaration.File == TGSI_FILE_MEMORY) {
- if (decl->Declaration.Shared)
- TXT(", SHARED");
+ switch (decl->Declaration.MemType) {
+ /* Note: ,GLOBAL is optional / the default */
+ case TGSI_MEMORY_TYPE_GLOBAL: TXT(", GLOBAL"); break;
+ case TGSI_MEMORY_TYPE_SHARED: TXT(", SHARED"); break;
+ case TGSI_MEMORY_TYPE_PRIVATE: TXT(", PRIVATE"); break;
+ case TGSI_MEMORY_TYPE_INPUT: TXT(", INPUT"); break;
+ }
}
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 12a6875..991c3bf 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -196,10 +196,6 @@ struct tgsi_sampler
#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3)
#define TGSI_EXEC_TEMP_HALF_C 0
-/* execution mask, each value is either 0 or ~0 */
-#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_MASK_C 1
-
/* 4 register buffer for various purposes */
#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
#define TGSI_EXEC_NUM_TEMP_R 4
@@ -397,27 +393,6 @@ boolean
tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst);
-static inline void
-tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
-{
- mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] =
- mask;
-}
-
-
-/** Set execution mask values prior to executing the shader */
-static inline void
-tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
- boolean ch0, boolean ch1, boolean ch2, boolean ch3)
-{
- int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i;
- mask[0] = ch0 ? ~0 : 0;
- mask[1] = ch1 ? ~0 : 0;
- mask[2] = ch2 ? ~0 : 0;
- mask[3] = ch3 ? ~0 : 0;
-}
-
-
extern void
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
unsigned num_bufs,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 8e24cc6..d32c3a1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -38,6 +38,7 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_prim.h"
+#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_scan.h"
@@ -192,8 +193,17 @@ scan_instruction(struct tgsi_shader_info *info,
}
}
- if (is_memory_file(src->Register.File))
+ if (is_memory_file(src->Register.File)) {
is_mem_inst = true;
+
+ if (tgsi_get_opcode_info(fullinst->Instruction.Opcode)->is_store) {
+ info->writes_memory = TRUE;
+
+ if (src->Register.File == TGSI_FILE_IMAGE &&
+ !src->Register.Indirect)
+ info->images_writemask |= 1 << src->Register.Index;
+ }
+ }
}
/* check for indirect register writes */
@@ -204,8 +214,16 @@ scan_instruction(struct tgsi_shader_info *info,
info->indirect_files_written |= (1 << dst->Register.File);
}
- if (is_memory_file(dst->Register.File))
+ if (is_memory_file(dst->Register.File)) {
+ assert(fullinst->Instruction.Opcode == TGSI_OPCODE_STORE);
+
is_mem_inst = true;
+ info->writes_memory = TRUE;
+
+ if (dst->Register.File == TGSI_FILE_IMAGE &&
+ !dst->Register.Indirect)
+ info->images_writemask |= 1 << dst->Register.Index;
+ }
}
if (is_mem_inst)
@@ -413,6 +431,9 @@ scan_declaration(struct tgsi_shader_info *info,
}
} else if (file == TGSI_FILE_SAMPLER) {
info->samplers_declared |= 1 << reg;
+ } else if (file == TGSI_FILE_IMAGE) {
+ if (fulldecl->Image.Resource == TGSI_TEXTURE_BUFFER)
+ info->images_buffers |= 1 << reg;
}
}
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index d65dec7..76d8925 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -111,6 +111,7 @@ struct tgsi_shader_info
boolean writes_clipvertex;
boolean writes_viewport_index;
boolean writes_layer;
+ boolean writes_memory; /**< contains stores or atomics to buffers or images */
boolean is_msaa_sampler[PIPE_MAX_SAMPLERS];
boolean uses_doubles; /**< uses any of the double instructions */
unsigned clipdist_writemask;
@@ -118,6 +119,15 @@ struct tgsi_shader_info
unsigned num_written_culldistance;
unsigned num_written_clipdistance;
/**
+ * Bitmask indicating which images are written to (STORE / ATOM*).
+ * Indirect image accesses are not reflected in this mask.
+ */
+ unsigned images_writemask;
+ /**
+ * Bitmask indicating which declared image is a buffer.
+ */
+ unsigned images_buffers;
+ /**
* Bitmask indicating which register files are accessed with
* indirect addressing. The bits are (1 << TGSI_FILE_x), etc.
*/
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index 6bd1a2e..ae779a8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -145,6 +145,7 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
"NUM_CLIPDIST_ENABLED",
"NUM_CULLDIST_ENABLED",
"FS_EARLY_DEPTH_STENCIL",
+ "NEXT_SHADER",
};
const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 77598d2..028633c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -1390,8 +1390,18 @@ static boolean parse_declaration( struct translate_ctx *ctx )
ctx->cur = cur;
}
} else if (file == TGSI_FILE_MEMORY) {
- if (str_match_nocase_whole(&cur, "SHARED")) {
- decl.Declaration.Shared = 1;
+ if (str_match_nocase_whole(&cur, "GLOBAL")) {
+ /* Note this is a no-op global is the default */
+ decl.Declaration.MemType = TGSI_MEMORY_TYPE_GLOBAL;
+ ctx->cur = cur;
+ } else if (str_match_nocase_whole(&cur, "SHARED")) {
+ decl.Declaration.MemType = TGSI_MEMORY_TYPE_SHARED;
+ ctx->cur = cur;
+ } else if (str_match_nocase_whole(&cur, "PRIVATE")) {
+ decl.Declaration.MemType = TGSI_MEMORY_TYPE_PRIVATE;
+ ctx->cur = cur;
+ } else if (str_match_nocase_whole(&cur, "INPUT")) {
+ decl.Declaration.MemType = TGSI_MEMORY_TYPE_INPUT;
ctx->cur = cur;
}
} else {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
index 27e6179..c21ff95 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -302,6 +302,40 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx,
static inline void
+tgsi_transform_op3_inst(struct tgsi_transform_context *ctx,
+ unsigned opcode,
+ unsigned dst_file,
+ unsigned dst_index,
+ unsigned dst_writemask,
+ unsigned src0_file,
+ unsigned src0_index,
+ unsigned src1_file,
+ unsigned src1_index,
+ unsigned src2_file,
+ unsigned src2_index)
+{
+ struct tgsi_full_instruction inst;
+
+ inst = tgsi_default_full_instruction();
+ inst.Instruction.Opcode = opcode;
+ inst.Instruction.NumDstRegs = 1;
+ inst.Dst[0].Register.File = dst_file,
+ inst.Dst[0].Register.Index = dst_index;
+ inst.Dst[0].Register.WriteMask = dst_writemask;
+ inst.Instruction.NumSrcRegs = 3;
+ inst.Src[0].Register.File = src0_file;
+ inst.Src[0].Register.Index = src0_index;
+ inst.Src[1].Register.File = src1_file;
+ inst.Src[1].Register.Index = src1_index;
+ inst.Src[2].Register.File = src2_file;
+ inst.Src[2].Register.Index = src2_index;
+
+ ctx->emit_instruction(ctx, &inst);
+}
+
+
+
+static inline void
tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx,
unsigned opcode,
unsigned dst_file,
@@ -482,15 +516,18 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
static inline void
-tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx,
- unsigned dst_file,
- unsigned dst_index,
- unsigned src_file,
- unsigned src_index,
- unsigned sampler_index)
+tgsi_transform_tex_inst(struct tgsi_transform_context *ctx,
+ unsigned dst_file,
+ unsigned dst_index,
+ unsigned src_file,
+ unsigned src_index,
+ unsigned tex_target,
+ unsigned sampler_index)
{
struct tgsi_full_instruction inst;
+ assert(tex_target < TGSI_TEXTURE_COUNT);
+
inst = tgsi_default_full_instruction();
inst.Instruction.Opcode = TGSI_OPCODE_TEX;
inst.Instruction.NumDstRegs = 1;
@@ -498,7 +535,7 @@ tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx,
inst.Dst[0].Register.Index = dst_index;
inst.Instruction.NumSrcRegs = 2;
inst.Instruction.Texture = TRUE;
- inst.Texture.Texture = TGSI_TEXTURE_2D;
+ inst.Texture.Texture = tex_target;
inst.Src[0].Register.File = src_file;
inst.Src[0].Register.Index = src_index;
inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index ab1d034..297e257 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -101,6 +101,7 @@ struct ureg_program
{
unsigned processor;
bool supports_any_inout_decl_range;
+ int next_shader_processor;
struct {
unsigned semantic_name;
@@ -190,7 +191,7 @@ struct ureg_program
struct ureg_tokens domain[2];
- bool use_shared_memory;
+ bool use_memory[TGSI_MEMORY_TYPE_COUNT];
};
static union tgsi_any_token error_tokens[32];
@@ -729,13 +730,14 @@ struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
return reg;
}
-/* Allocate a shared memory area.
+/* Allocate a memory area.
*/
-struct ureg_src ureg_DECL_shared_memory(struct ureg_program *ureg)
+struct ureg_src ureg_DECL_memory(struct ureg_program *ureg,
+ unsigned memory_type)
{
- struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, 0);
+ struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, memory_type);
- ureg->use_shared_memory = true;
+ ureg->use_memory[memory_type] = true;
return reg;
}
@@ -1672,7 +1674,7 @@ emit_decl_buffer(struct ureg_program *ureg,
}
static void
-emit_decl_shared_memory(struct ureg_program *ureg)
+emit_decl_memory(struct ureg_program *ureg, unsigned memory_type)
{
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
@@ -1681,11 +1683,11 @@ emit_decl_shared_memory(struct ureg_program *ureg)
out[0].decl.NrTokens = 2;
out[0].decl.File = TGSI_FILE_MEMORY;
out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
- out[0].decl.Shared = true;
+ out[0].decl.MemType = memory_type;
out[1].value = 0;
- out[1].decl_range.First = 0;
- out[1].decl_range.Last = 0;
+ out[1].decl_range.First = memory_type;
+ out[1].decl_range.Last = memory_type;
}
static void
@@ -1860,8 +1862,10 @@ static void emit_decls( struct ureg_program *ureg )
emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
}
- if (ureg->use_shared_memory)
- emit_decl_shared_memory(ureg);
+ for (i = 0; i < TGSI_MEMORY_TYPE_COUNT; i++) {
+ if (ureg->use_memory[i])
+ emit_decl_memory(ureg, i);
+ }
if (ureg->const_decls.nr_constant_ranges) {
for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
@@ -1966,6 +1970,16 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
{
const struct tgsi_token *tokens;
+ switch (ureg->processor) {
+ case TGSI_PROCESSOR_VERTEX:
+ case TGSI_PROCESSOR_TESS_EVAL:
+ ureg_property(ureg, TGSI_PROPERTY_NEXT_SHADER,
+ ureg->next_shader_processor == -1 ?
+ TGSI_PROCESSOR_FRAGMENT :
+ ureg->next_shader_processor);
+ break;
+ }
+
emit_header( ureg );
emit_decls( ureg );
copy_instructions( ureg );
@@ -2079,6 +2093,7 @@ ureg_create_with_screen(unsigned processor, struct pipe_screen *screen)
screen->get_shader_param(screen,
util_pipe_shader_from_tgsi_processor(processor),
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0;
+ ureg->next_shader_processor = -1;
for (i = 0; i < Elements(ureg->properties); i++)
ureg->properties[i] = ~0;
@@ -2108,6 +2123,13 @@ no_ureg:
}
+void
+ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor)
+{
+ ureg->next_shader_processor = processor;
+}
+
+
unsigned
ureg_get_nr_outputs( const struct ureg_program *ureg )
{
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 04a62a6..b4258fd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -114,6 +114,8 @@ ureg_create_shader( struct ureg_program *,
struct pipe_context *pipe,
const struct pipe_stream_output_info *so );
+void
+ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor);
/* Alternately, return the built token stream and hand ownership of
* that memory to the caller:
@@ -338,7 +340,7 @@ struct ureg_src
ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic);
struct ureg_src
-ureg_DECL_shared_memory(struct ureg_program *ureg);
+ureg_DECL_memory(struct ureg_program *ureg, unsigned memory_type);
static inline struct ureg_src
ureg_imm4f( struct ureg_program *ureg,
diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c
index 74e6f99..bcbe2a2 100644
--- a/src/gallium/auxiliary/util/u_pstipple.c
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -344,11 +344,11 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
pctx->wincoordFile, wincoordInput,
TGSI_FILE_IMMEDIATE, pctx->numImmed);
- /* TEX texTemp, texTemp, sampler; */
- tgsi_transform_tex_2d_inst(ctx,
- TGSI_FILE_TEMPORARY, texTemp,
- TGSI_FILE_TEMPORARY, texTemp,
- sampIdx);
+ /* TEX texTemp, texTemp, sampler, 2D; */
+ tgsi_transform_tex_inst(ctx,
+ TGSI_FILE_TEMPORARY, texTemp,
+ TGSI_FILE_TEMPORARY, texTemp,
+ TGSI_TEXTURE_2D, sampIdx);
/* KILL_IF -texTemp; # if -texTemp < 0, kill fragment */
tgsi_transform_kill_inst(ctx,
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index 7ffb271..76950a1 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -646,6 +646,7 @@ util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe,
"FRAG\n"
"DCL IN[0], GENERIC[0], LINEAR\n"
"DCL SAMP[0..1]\n"
+ "DCL SVIEW[0..1], %s, FLOAT\n"
"DCL OUT[0], POSITION\n"
"DCL OUT[1], STENCIL\n"
"DCL TEMP[0]\n"
@@ -663,7 +664,7 @@ util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe,
assert(tgsi_tex == TGSI_TEXTURE_2D_MSAA ||
tgsi_tex == TGSI_TEXTURE_2D_ARRAY_MSAA);
- sprintf(text, shader_templ, type, type);
+ sprintf(text, shader_templ, type, type, type);
if (!tgsi_text_translate(text, tokens, Elements(tokens))) {
assert(0);
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index af2df22..6366f7e 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -3213,6 +3213,14 @@ Whether depth test, stencil test, and occlusion query should run before
the fragment shader (regardless of fragment shader side effects). Corresponds
to GLSL early_fragment_tests.
+NEXT_SHADER
+"""""""""""
+
+Which shader stage will MOST LIKELY follow after this shader when the shader
+is bound. This is only a hint to the driver and doesn't have to be precise.
+Only set for VS and TES.
+
+
Texture Sampling and Texture Formats
------------------------------------
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 7a1812f..54315d2 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1017,7 +1017,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
const_offset = nir_src_as_const_value(intr->src[1]);
if (const_offset) {
- off += const_offset->u[0];
+ off += const_offset->u32[0];
} else {
/* For load_ubo_indirect, second src is indirect offset: */
src1 = get_src(ctx, &intr->src[1])[0];
@@ -1159,7 +1159,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
idx = nir_intrinsic_base(intr);
const_offset = nir_src_as_const_value(intr->src[0]);
if (const_offset) {
- idx += const_offset->u[0];
+ idx += const_offset->u32[0];
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
dst[i] = create_uniform(ctx, n);
@@ -1186,7 +1186,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
idx = nir_intrinsic_base(intr);
const_offset = nir_src_as_const_value(intr->src[0]);
if (const_offset) {
- idx += const_offset->u[0];
+ idx += const_offset->u32[0];
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
dst[i] = ctx->ir->inputs[n];
@@ -1213,7 +1213,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
idx = nir_intrinsic_base(intr);
const_offset = nir_src_as_const_value(intr->src[1]);
compile_assert(ctx, const_offset != NULL);
- idx += const_offset->u[0];
+ idx += const_offset->u32[0];
src = get_src(ctx, &intr->src[0]);
for (int i = 0; i < intr->num_components; i++) {
@@ -1301,7 +1301,7 @@ emit_load_const(struct ir3_compile *ctx, nir_load_const_instr *instr)
struct ir3_instruction **dst = get_dst_ssa(ctx, &instr->def,
instr->def.num_components);
for (int i = 0; i < instr->def.num_components; i++)
- dst[i] = create_immed(ctx->block, instr->value.u[i]);
+ dst[i] = create_immed(ctx->block, instr->value.u32[i]);
}
static void
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
index 8815ac9..ec76b0b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
@@ -290,7 +290,7 @@ lower_if_else_block(nir_block *block, void *void_state)
}
nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
- phi->dest.ssa.num_components, phi->dest.ssa.name);
+ phi->dest.ssa.num_components, 32, phi->dest.ssa.name);
sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 9f7d257..21523a2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -160,7 +160,7 @@ struct nv50_ir_prog_info
uint8_t clipDistances; /* number of clip distance outputs */
uint8_t cullDistances; /* number of cull distance outputs */
int8_t genUserClip; /* request user clip planes for ClipVertex */
- uint8_t auxCBSlot; /* constant buffer index of UCP/draw data */
+ uint8_t auxCBSlot; /* driver constant buffer slot */
uint16_t ucpBase; /* base address for UCPs */
uint16_t drawInfoBase; /* base address for draw parameters */
uint8_t pointSize; /* output index for PointSize */
@@ -175,7 +175,6 @@ struct nv50_ir_prog_info
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
bool fp64; /* program uses fp64 math */
bool nv50styleSurfaces; /* generate gX[] access for raw buffers */
- uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */
uint16_t sampleInfoBase; /* base address for sample positions */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 0d7d95e..70f3c3f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1655,10 +1655,8 @@ CodeEmitterGK110::emitSTORE(const Instruction *i)
break;
}
- if (i->src(0).getFile() != FILE_MEMORY_GLOBAL)
- offset &= 0xffffff;
-
if (code[0] & 0x2) {
+ offset &= 0xffffff;
emitLoadStoreType(i->dType, 0x33);
if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
emitCachingMode(i->cache, 0x2f);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 682a19d..bd62006 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -1634,7 +1634,9 @@ CodeEmitterNV50::emitTEX(const TexInstruction *i)
code[1] |= (i->tex.mask & 0xc) << 12;
if (i->tex.liveOnly)
- code[1] |= 4;
+ code[1] |= 1 << 2;
+ if (i->tex.derivAll)
+ code[1] |= 1 << 3;
defId(i->def(0), 2);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index d284446..611d5f9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -856,15 +856,17 @@ public:
};
std::vector<TextureView> textureViews;
+ /*
struct Resource {
uint8_t target; // TGSI_TEXTURE_*
bool raw;
uint8_t slot; // $surface index
};
std::vector<Resource> resources;
+ */
struct MemoryFile {
- bool shared;
+ uint8_t mem_type; // TGSI_MEMORY_TYPE_*
};
std::vector<MemoryFile> memoryFiles;
@@ -1037,6 +1039,9 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
info->io.cullDistances = prop->u[0].Data;
break;
+ case TGSI_PROPERTY_NEXT_SHADER:
+ /* Do not need to know the next shader stage. */
+ break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
@@ -1222,7 +1227,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
break;
case TGSI_FILE_MEMORY:
for (i = first; i <= last; ++i)
- memoryFiles[i].shared = decl->Declaration.Shared;
+ memoryFiles[i].mem_type = decl->Declaration.MemType;
break;
case TGSI_FILE_NULL:
case TGSI_FILE_TEMPORARY:
@@ -1261,9 +1266,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
info->numBarriers = 1;
if (insn.dstCount()) {
- if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
- Instruction::DstRegister dst = insn.getDst(0);
+ Instruction::DstRegister dst = insn.getDst(0);
+ if (dst.getFile() == TGSI_FILE_OUTPUT) {
if (dst.isIndirect(0))
for (unsigned i = 0; i < info->numOutputs; ++i)
info->out[i].mask = 0xf;
@@ -1280,11 +1285,11 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (isEdgeFlagPassthrough(insn))
info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
} else
- if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
- if (insn.getDst(0).isIndirect(0))
- indirectTempArrays.insert(insn.getDst(0).getArrayId());
+ if (dst.getFile() == TGSI_FILE_TEMPORARY) {
+ if (dst.isIndirect(0))
+ indirectTempArrays.insert(dst.getArrayId());
} else
- if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ if (dst.getFile() == TGSI_FILE_BUFFER) {
info->io.globalAccess |= 0x2;
}
}
@@ -1419,8 +1424,8 @@ private:
void handleLIT(Value *dst0[4]);
void handleUserClipPlanes();
- Symbol *getResourceBase(int r);
- void getResourceCoords(std::vector<Value *>&, int r, int s);
+ // Symbol *getResourceBase(int r);
+ // void getResourceCoords(std::vector<Value *>&, int r, int s);
void handleLOAD(Value *dst0[4]);
void handleSTORE();
@@ -1527,8 +1532,21 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
sym->reg.fileIndex = fileIdx;
- if (tgsiFile == TGSI_FILE_MEMORY && code->memoryFiles[fileIdx].shared)
- sym->setFile(FILE_MEMORY_SHARED);
+ if (tgsiFile == TGSI_FILE_MEMORY) {
+ switch (code->memoryFiles[fileIdx].mem_type) {
+ case TGSI_MEMORY_TYPE_SHARED:
+ sym->setFile(FILE_MEMORY_SHARED);
+ break;
+ case TGSI_MEMORY_TYPE_INPUT:
+ assert(prog->getType() == Program::TYPE_COMPUTE);
+ assert(idx == -1);
+ sym->setFile(FILE_SHADER_INPUT);
+ address += info->prop.cp.inputOffset;
+ break;
+ default:
+ assert(0); /* TODO: Add support for global and private memory */
+ }
+ }
if (idx >= 0) {
if (sym->reg.file == FILE_SHADER_INPUT)
@@ -1989,7 +2007,6 @@ Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
void
Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
{
- Value *val;
Value *arg[4], *src[8];
Value *lod = NULL, *shd = NULL;
unsigned int s, c, d;
@@ -2032,17 +2049,6 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
shd = src[n - 1];
}
- if (tgt.isCube()) {
- for (c = 0; c < 3; ++c)
- src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
- val = getScratch();
- mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
- mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
- mkOp1(OP_RCP, TYPE_F32, val, val);
- for (c = 0; c < 3; ++c)
- src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
- }
-
for (c = 0, d = 0; c < 4; ++c) {
if (dst[c]) {
texi->setDef(d++, dst[c]);
@@ -2148,6 +2154,7 @@ Converter::handleLIT(Value *dst0[4])
}
}
+/* Keep this around for now as reference when adding img support
static inline bool
isResourceSpecial(const int r)
{
@@ -2178,7 +2185,8 @@ Converter::getResourceBase(const int r)
switch (r) {
case TGSI_RESOURCE_GLOBAL:
- sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
+ sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,
+ info->io.auxCBSlot);
break;
case TGSI_RESOURCE_LOCAL:
assert(prog->getType() == Program::TYPE_COMPUTE);
@@ -2243,6 +2251,7 @@ partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
}
return n + 1;
}
+*/
// For raw loads, granularity is 4 byte.
// Usage of the texture read mask on OP_SULDP is not allowed.
@@ -2253,8 +2262,9 @@ Converter::handleLOAD(Value *dst0[4])
int c;
std::vector<Value *> off, src, ldv, def;
- if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
- tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
+ switch (tgsi.getSrc(0).getFile()) {
+ case TGSI_FILE_BUFFER:
+ case TGSI_FILE_MEMORY:
for (c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
@@ -2274,9 +2284,12 @@ Converter::handleLOAD(Value *dst0[4])
if (tgsi.getSrc(0).isIndirect(0))
ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
}
- return;
+ break;
+ default:
+ assert(!"Unsupported srcFile for LOAD");
}
+/* Keep this around for now as reference when adding img support
getResourceCoords(off, r, 1);
if (isResourceRaw(code, r)) {
@@ -2342,6 +2355,7 @@ Converter::handleLOAD(Value *dst0[4])
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
if (dst0[c] != def[c])
mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
+*/
}
// For formatted stores, the write mask on OP_SUSTP can be used.
@@ -2353,8 +2367,9 @@ Converter::handleSTORE()
int c;
std::vector<Value *> off, src, dummy;
- if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER ||
- tgsi.getDst(0).getFile() == TGSI_FILE_MEMORY) {
+ switch (tgsi.getDst(0).getFile()) {
+ case TGSI_FILE_BUFFER:
+ case TGSI_FILE_MEMORY:
for (c = 0; c < 4; ++c) {
if (!(tgsi.getDst(0).getMask() & (1 << c)))
continue;
@@ -2375,9 +2390,12 @@ Converter::handleSTORE()
if (tgsi.getDst(0).isIndirect(0))
st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
}
- return;
+ break;
+ default:
+ assert(!"Unsupported dstFile for STORE");
}
+/* Keep this around for now as reference when adding img support
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
@@ -2425,6 +2443,7 @@ Converter::handleSTORE()
mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
dummy, src)->tex.mask = tgsi.getDst(0).getMask();
}
+*/
}
// XXX: These only work on resources with the single-component u32/s32 formats.
@@ -2439,8 +2458,9 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
std::vector<Value *> defv;
LValue *dst = getScratch();
- if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
- tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
+ switch (tgsi.getSrc(0).getFile()) {
+ case TGSI_FILE_BUFFER:
+ case TGSI_FILE_MEMORY:
for (int c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
@@ -2468,10 +2488,12 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
- return;
+ break;
+ default:
+ assert(!"Unsupported srcFile for ATOM");
}
-
+/* Keep this around for now as reference when adding img support
getResourceCoords(srcv, r, 1);
if (isResourceSpecial(r)) {
@@ -2499,6 +2521,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
+*/
}
void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 0b90378..a5deaef 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -67,6 +67,7 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
tmp = bld.getScratch();
for (l = 0; l < 4; ++l) {
+ Value *src[3], *val;
// mov coordinates from lane l to all lanes
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (c = 0; c < dim; ++c) {
@@ -92,10 +93,25 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
add->lanes = 1; /* abused for .ndv */
}
+ // normalize cube coordinates if necessary
+ if (i->tex.target.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
+ } else {
+ for (c = 0; c < dim; ++c)
+ src[c] = crd[c];
+ }
+
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
- tex->setSrc(c + array, crd[c]);
+ tex->setSrc(c + array, src[c]);
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
// save results
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 12c5f69..02c4f1a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -682,7 +682,7 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms,
Value **ms_x, Value **ms_y) {
// This loads the texture-indexed ms setting from the constant buffer
Value *tmp = new_LValue(func, FILE_GPR);
- uint8_t b = prog->driver->io.resInfoCBSlot;
+ uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
if (prog->getType() > Program::TYPE_VERTEX)
off += 16 * 2 * 4;
@@ -724,6 +724,23 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
const int dref = arg;
const int lod = i->tex.target.isShadow() ? (arg + 1) : arg;
+ /* Only normalize in the non-explicit derivatives case.
+ */
+ if (i->tex.target.isCube() && i->op != OP_TXD) {
+ Value *src[3], *val;
+ int c;
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c));
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c) {
+ i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(),
+ i->getSrc(c), val));
+ }
+ }
+
// handle MS, which means looking up the MS params for this texture, and
// adjusting the input coordinates to point at the right sample.
if (i->tex.target.isMS()) {
@@ -934,12 +951,14 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
handleTEX(i);
i->op = OP_TEX; // no need to clone dPdx/dPdy later
+ i->tex.derivAll = true;
for (c = 0; c < dim; ++c)
crd[c] = bld.getScratch();
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (l = 0; l < 4; ++l) {
+ Value *src[3], *val;
// mov coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
@@ -949,10 +968,24 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
+ // normalize cube coordinates if necessary
+ if (i->tex.target.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
+ } else {
+ for (c = 0; c < dim; ++c)
+ src[c] = crd[c];
+ }
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
- tex->setSrc(c, crd[c]);
+ tex->setSrc(c, src[c]);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
@@ -1174,7 +1207,7 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i)
bld.mkLoad(TYPE_F32,
def,
bld.mkSymbol(
- FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
TYPE_U32, prog->driver->io.sampleInfoBase + 4 * idx),
off);
break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index d0936d8..e8f8e30 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -600,7 +600,7 @@ NVC0LoweringPass::visit(BasicBlock *bb)
inline Value *
NVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot)
{
- uint8_t b = prog->driver->io.resInfoCBSlot;
+ uint8_t b = prog->driver->io.auxCBSlot;
uint32_t off = prog->driver->io.texBindBase + slot * 4;
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
@@ -615,6 +615,24 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
const int lyr = arg - (i->tex.target.isMS() ? 2 : 1);
const int chipset = prog->getTarget()->getChipset();
+ /* Only normalize in the non-explicit derivatives case. For explicit
+ * derivatives, this is handled in handleManualTXD.
+ */
+ if (i->tex.target.isCube() && i->dPdx[0].get() == NULL) {
+ Value *src[3], *val;
+ int c;
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c));
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c) {
+ i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(),
+ i->getSrc(c), val));
+ }
+ }
+
// Arguments to the TEX instruction are a little insane. Even though the
// encoding is identical between SM20 and SM30, the arguments mean
// different things between Fermi and Kepler+. A lot of arguments are
@@ -728,9 +746,13 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
}
Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL;
- for (int s = dim; s >= 1; --s)
- i->setSrc(s, i->getSrc(s - 1));
- i->setSrc(0, arrayIndex);
+ if (arrayIndex) {
+ for (int s = dim; s >= 1; --s)
+ i->setSrc(s, i->getSrc(s - 1));
+ i->setSrc(0, arrayIndex);
+ } else {
+ i->moveSources(0, 1);
+ }
if (arrayIndex) {
int sat = (i->op == OP_TXF) ? 1 : 0;
@@ -861,6 +883,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (l = 0; l < 4; ++l) {
+ Value *src[3], *val;
// mov coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero);
@@ -870,10 +893,24 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
+ // normalize cube coordinates
+ if (i->tex.target.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
+ } else {
+ for (c = 0; c < dim; ++c)
+ src[c] = crd[c];
+ }
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
- tex->setSrc(c + array, crd[c]);
+ tex->setSrc(c + array, src[c]);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
@@ -1098,6 +1135,7 @@ NVC0LoweringPass::handleSharedATOM(Instruction *atom)
break;
default:
assert(0);
+ return;
}
Instruction *i =
@@ -1204,7 +1242,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
inline Value *
NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
{
- uint8_t b = prog->driver->io.resInfoCBSlot;
+ uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
@@ -1213,7 +1251,7 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
inline Value *
NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
{
- uint8_t b = prog->driver->io.resInfoCBSlot;
+ uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
if (ptr)
@@ -1226,7 +1264,7 @@ NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
inline Value *
NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
{
- uint8_t b = prog->driver->io.resInfoCBSlot;
+ uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
if (ptr)
@@ -1540,7 +1578,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
call->indirect = 1;
call->absolute = 1;
call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST,
- prog->driver->io.resInfoCBSlot, TYPE_U32,
+ prog->driver->io.auxCBSlot, TYPE_U32,
prog->driver->io.suInfoBase + base));
call->setSrc(1, r[2]);
call->setSrc(2, r[4]);
@@ -1698,7 +1736,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
}
addr += prog->driver->prop.cp.gridInfoBase;
bld.mkLoad(TYPE_U32, i->getDef(0),
- bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL);
+ bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, addr), NULL);
break;
case SV_SAMPLE_INDEX:
// TODO: Properly pass source as an address in the PIX address space
@@ -1715,7 +1754,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
bld.mkLoad(TYPE_F32,
i->getDef(0),
bld.mkSymbol(
- FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
TYPE_U32, prog->driver->io.sampleInfoBase +
4 * sym->reg.data.sv.index),
off);
@@ -1780,7 +1819,7 @@ NVC0LoweringPass::handleSQRT(Instruction *i)
{
if (i->dType == TYPE_F64) {
Value *pred = bld.getSSA(1, FILE_PREDICATE);
- Value *zero = bld.loadImm(NULL, 0.0d);
+ Value *zero = bld.loadImm(NULL, 0.0);
Value *dst = bld.getSSA(8);
bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0));
bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index cfa85ec..066faa3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -204,6 +204,11 @@ static const char *ldstSubOpStr[] =
"", "lock", "unlock"
};
+static const char *subfmOpStr[] =
+{
+ "", "3d"
+};
+
static const char *DataTypeStr[] =
{
"-",
@@ -548,6 +553,10 @@ void Instruction::print() const
if (subOp < Elements(ldstSubOpStr))
PRINT("%s ", ldstSubOpStr[subOp]);
break;
+ case OP_SUBFM:
+ if (subOp < Elements(subfmOpStr))
+ PRINT("%s ", subfmOpStr[subOp]);
+ break;
default:
if (subOp)
PRINT("(SUBOP:%u) ", subOp);
diff --git a/src/gallium/drivers/nouveau/nouveau_compiler.c b/src/gallium/drivers/nouveau/nouveau_compiler.c
index cd44aa1..ca73fd1 100644
--- a/src/gallium/drivers/nouveau/nouveau_compiler.c
+++ b/src/gallium/drivers/nouveau/nouveau_compiler.c
@@ -114,8 +114,6 @@ nouveau_codegen(int chipset, int type, struct tgsi_token tokens[],
info.io.auxCBSlot = 15;
info.io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
-
- info.io.resInfoCBSlot = 15;
info.io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
info.io.msInfoCBSlot = 15;
info.io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
index 04488d6..d781f6f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -67,122 +67,94 @@ nv50_screen_compute_setup(struct nv50_screen *screen,
if (ret)
return ret;
- BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->compute->handle);
- BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
+ BEGIN_NV04(push, NV50_CP(UNK02A0), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_STACK), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
+ BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->stack_bo->offset);
PUSH_DATA (push, screen->stack_bo->offset);
- BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
+ BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1);
PUSH_DATA (push, 4);
- BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
+ BEGIN_NV04(push, NV50_CP(UNK0290), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
+ BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
+ BEGIN_NV04(push, NV50_CP(REG_MODE), 1);
PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
- BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
+ BEGIN_NV04(push, NV50_CP(UNK0384), 1);
PUSH_DATA (push, 0x100);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1);
PUSH_DATA (push, fifo->vram);
for (i = 0; i < 15; i++) {
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1);
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
}
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1);
PUSH_DATA (push, ~0);
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1);
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
- BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
+ BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1);
PUSH_DATA (push, 7);
- BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
+ BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
+ BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1);
PUSH_DATA (push, 7);
- BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
+ BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
+ BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1);
PUSH_DATA (push, 0x54);
- BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
+ BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_TIC), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset);
PUSH_DATA (push, screen->txc->offset);
PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_TSC), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset + 65536);
PUSH_DATA (push, screen->txc->offset + 65536);
PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
+ BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->tls_bo->offset + 65536);
PUSH_DATA (push, screen->tls_bo->offset + 65536);
- BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
+ BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1);
PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
return 0;
}
-static bool
-nv50_compute_validate_program(struct nv50_context *nv50)
-{
- struct nv50_program *prog = nv50->compprog;
-
- if (prog->mem)
- return true;
-
- if (!prog->translated) {
- prog->translated = nv50_program_translate(
- prog, nv50->screen->base.device->chipset, &nv50->base.debug);
- if (!prog->translated)
- return false;
- }
- if (unlikely(!prog->code_size))
- return false;
-
- if (likely(prog->code_size)) {
- if (nv50_program_upload_code(nv50, prog)) {
- struct nouveau_pushbuf *push = nv50->base.pushbuf;
- BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
- PUSH_DATA (push, 0);
- return true;
- }
- }
- return false;
-}
-
static void
nv50_compute_validate_globals(struct nv50_context *nv50)
{
@@ -198,26 +170,25 @@ nv50_compute_validate_globals(struct nv50_context *nv50)
}
}
+static struct nv50_state_validate
+validate_list_cp[] = {
+ { nv50_compprog_validate, NV50_NEW_CP_PROGRAM },
+ { nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS },
+};
+
static bool
-nv50_compute_state_validate(struct nv50_context *nv50)
+nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask)
{
- if (!nv50_compute_validate_program(nv50))
- return false;
-
- if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS)
- nv50_compute_validate_globals(nv50);
+ bool ret;
/* TODO: validate textures, samplers, surfaces */
+ ret = nv50_state_validate(nv50, mask, validate_list_cp,
+ ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp,
+ nv50->bufctx_cp);
- nv50_bufctx_fence(nv50->bufctx_cp, false);
-
- nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp);
- if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf)))
- return false;
if (unlikely(nv50->state.flushed))
nv50_bufctx_fence(nv50->bufctx_cp, true);
-
- return true;
+ return ret;
}
static void
@@ -227,7 +198,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
struct nouveau_pushbuf *push = screen->base.pushbuf;
unsigned size = align(nv50->compprog->parm_size, 0x4);
- BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
PUSH_DATA (push, (size / 4) << 8);
if (size) {
@@ -245,7 +216,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
nouveau_pushbuf_bufctx(push, nv50->bufctx);
nouveau_pushbuf_validate(push);
- BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
+ BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4);
nouveau_pushbuf_data(push, bo, offset, size);
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
@@ -278,7 +249,7 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
struct nv50_program *cp = nv50->compprog;
bool ret;
- ret = !nv50_compute_state_validate(nv50);
+ ret = !nv50_state_validate_cp(nv50, ~0);
if (ret) {
NOUVEAU_ERR("Failed to launch grid !\n");
return;
@@ -286,33 +257,33 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
nv50_compute_upload_input(nv50, info->input);
- BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
+ BEGIN_NV04(push, NV50_CP(CP_START_ID), 1);
PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
- BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
+ BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1);
PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
- BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
+ BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1);
PUSH_DATA (push, cp->max_gpr);
/* grid/block setup */
- BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
+ BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2);
PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
PUSH_DATA (push, info->block[2]);
- BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
+ BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1);
PUSH_DATA (push, 1 << 16 | block_size);
- BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
+ BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
+ BEGIN_NV04(push, NV50_CP(GRIDDIM), 1);
PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
- BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
+ BEGIN_NV04(push, NV50_CP(GRIDID), 1);
PUSH_DATA (push, 1);
/* kernel launching */
- BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
+ BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
/* bind a compute shader clobbers fragment shader state */
- nv50->dirty |= NV50_NEW_FRAGPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 4874b77..61a52c4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -176,8 +176,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
if (nv50->framebuffer.cbufs[i] &&
nv50->framebuffer.cbufs[i]->texture == res) {
- nv50->dirty |= NV50_NEW_FRAMEBUFFER;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
if (!--ref)
return ref;
}
@@ -186,8 +186,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (nv50->framebuffer.zsbuf &&
nv50->framebuffer.zsbuf->texture == res) {
- nv50->dirty |= NV50_NEW_FRAMEBUFFER;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
if (!--ref)
return ref;
}
@@ -202,8 +202,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
for (i = 0; i < nv50->num_vtxbufs; ++i) {
if (nv50->vtxbuf[i].buffer == res) {
- nv50->dirty |= NV50_NEW_ARRAYS;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
+ nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX);
if (!--ref)
return ref;
}
@@ -211,8 +211,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
if (nv50->idxbuf.buffer == res) {
/* Just rebind to the bufctx as there is no separate dirty bit */
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
- BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);
+ BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(res), RD);
if (!--ref)
return ref;
}
@@ -222,8 +222,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
for (i = 0; i < nv50->num_textures[s]; ++i) {
if (nv50->textures[s][i] &&
nv50->textures[s][i]->texture == res) {
- nv50->dirty |= NV50_NEW_TEXTURES;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
if (!--ref)
return ref;
}
@@ -236,9 +236,9 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
continue;
if (!nv50->constbuf[s][i].user &&
nv50->constbuf[s][i].u.buf == res) {
- nv50->dirty |= NV50_NEW_CONSTBUF;
+ nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
nv50->constbuf_dirty[s] |= 1 << i;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
if (!--ref)
return ref;
}
@@ -345,10 +345,10 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
- BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->code);
- BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms);
- BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc);
- BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->code);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->uniforms);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->stack_bo);
if (screen->compute) {
BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
@@ -357,7 +357,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
- BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo);
BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
if (screen->compute)
BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 2620d03..2317fa2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -26,43 +26,43 @@
#include "nv50/nv50_3d.xml.h"
#include "nv50/nv50_2d.xml.h"
-#define NV50_NEW_BLEND (1 << 0)
-#define NV50_NEW_RASTERIZER (1 << 1)
-#define NV50_NEW_ZSA (1 << 2)
-#define NV50_NEW_VERTPROG (1 << 3)
-#define NV50_NEW_GMTYPROG (1 << 6)
-#define NV50_NEW_FRAGPROG (1 << 7)
-#define NV50_NEW_BLEND_COLOUR (1 << 8)
-#define NV50_NEW_STENCIL_REF (1 << 9)
-#define NV50_NEW_CLIP (1 << 10)
-#define NV50_NEW_SAMPLE_MASK (1 << 11)
-#define NV50_NEW_FRAMEBUFFER (1 << 12)
-#define NV50_NEW_STIPPLE (1 << 13)
-#define NV50_NEW_SCISSOR (1 << 14)
-#define NV50_NEW_VIEWPORT (1 << 15)
-#define NV50_NEW_ARRAYS (1 << 16)
-#define NV50_NEW_VERTEX (1 << 17)
-#define NV50_NEW_CONSTBUF (1 << 18)
-#define NV50_NEW_TEXTURES (1 << 19)
-#define NV50_NEW_SAMPLERS (1 << 20)
-#define NV50_NEW_STRMOUT (1 << 21)
-#define NV50_NEW_MIN_SAMPLES (1 << 22)
-#define NV50_NEW_CONTEXT (1 << 31)
+#define NV50_NEW_3D_BLEND (1 << 0)
+#define NV50_NEW_3D_RASTERIZER (1 << 1)
+#define NV50_NEW_3D_ZSA (1 << 2)
+#define NV50_NEW_3D_VERTPROG (1 << 3)
+#define NV50_NEW_3D_GMTYPROG (1 << 6)
+#define NV50_NEW_3D_FRAGPROG (1 << 7)
+#define NV50_NEW_3D_BLEND_COLOUR (1 << 8)
+#define NV50_NEW_3D_STENCIL_REF (1 << 9)
+#define NV50_NEW_3D_CLIP (1 << 10)
+#define NV50_NEW_3D_SAMPLE_MASK (1 << 11)
+#define NV50_NEW_3D_FRAMEBUFFER (1 << 12)
+#define NV50_NEW_3D_STIPPLE (1 << 13)
+#define NV50_NEW_3D_SCISSOR (1 << 14)
+#define NV50_NEW_3D_VIEWPORT (1 << 15)
+#define NV50_NEW_3D_ARRAYS (1 << 16)
+#define NV50_NEW_3D_VERTEX (1 << 17)
+#define NV50_NEW_3D_CONSTBUF (1 << 18)
+#define NV50_NEW_3D_TEXTURES (1 << 19)
+#define NV50_NEW_3D_SAMPLERS (1 << 20)
+#define NV50_NEW_3D_STRMOUT (1 << 21)
+#define NV50_NEW_3D_MIN_SAMPLES (1 << 22)
+#define NV50_NEW_3D_CONTEXT (1 << 31)
#define NV50_NEW_CP_PROGRAM (1 << 0)
#define NV50_NEW_CP_GLOBALS (1 << 1)
/* 3d bufctx (during draw_vbo, blit_3d) */
-#define NV50_BIND_FB 0
-#define NV50_BIND_VERTEX 1
-#define NV50_BIND_VERTEX_TMP 2
-#define NV50_BIND_INDEX 3
-#define NV50_BIND_TEXTURES 4
-#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i))
-#define NV50_BIND_SO 53
-#define NV50_BIND_SCREEN 54
-#define NV50_BIND_TLS 55
-#define NV50_BIND_3D_COUNT 56
+#define NV50_BIND_3D_FB 0
+#define NV50_BIND_3D_VERTEX 1
+#define NV50_BIND_3D_VERTEX_TMP 2
+#define NV50_BIND_3D_INDEX 3
+#define NV50_BIND_3D_TEXTURES 4
+#define NV50_BIND_3D_CB(s, i) (5 + 16 * (s) + (i))
+#define NV50_BIND_3D_SO 53
+#define NV50_BIND_3D_SCREEN 54
+#define NV50_BIND_3D_TLS 55
+#define NV50_BIND_3D_COUNT 56
/* compute bufctx (during launch_grid) */
#define NV50_BIND_CP_GLOBAL 0
@@ -115,7 +115,7 @@ struct nv50_context {
struct nouveau_bufctx *bufctx;
struct nouveau_bufctx *bufctx_cp;
- uint32_t dirty;
+ uint32_t dirty_3d; /* dirty flags for 3d state */
uint32_t dirty_cp; /* dirty flags for compute state */
bool cb_dirty;
@@ -221,6 +221,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
void nv50_vertprog_validate(struct nv50_context *);
void nv50_gmtyprog_validate(struct nv50_context *);
void nv50_fragprog_validate(struct nv50_context *);
+void nv50_compprog_validate(struct nv50_context *);
void nv50_fp_linkage_validate(struct nv50_context *);
void nv50_gp_linkage_validate(struct nv50_context *);
void nv50_constbufs_validate(struct nv50_context *);
@@ -231,7 +232,15 @@ void nv50_stream_output_validate(struct nv50_context *);
extern void nv50_init_state_functions(struct nv50_context *);
/* nv50_state_validate.c */
-bool nv50_state_validate(struct nv50_context *, uint32_t state_mask);
+struct nv50_state_validate {
+ void (*func)(struct nv50_context *);
+ uint32_t states;
+};
+
+bool nv50_state_validate(struct nv50_context *, uint32_t,
+ struct nv50_state_validate *, int, uint32_t *,
+ struct nouveau_bufctx *);
+bool nv50_state_validate_3d(struct nv50_context *, uint32_t);
/* nv50_surface.c */
extern void nv50_clear(struct pipe_context *, unsigned buffers,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index a67ef28..3444b31 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -335,7 +335,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info->io.genUserClip = prog->vp.clpd_nr;
- info->io.resInfoCBSlot = 15;
info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
info->io.msInfoCBSlot = 15;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
index be19c0f..0a73090 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -202,10 +202,10 @@ nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
func = nv50_hw_sm_get_func(c);
/* configure and reset the counter(s) */
- BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+ BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(c)), 1);
PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
| cfg->ctr[i].unit | cfg->ctr[i].mode);
- BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1);
+ BEGIN_NV04(push, NV50_CP(MP_PM_SET(c)), 1);
PUSH_DATA (push, 0);
}
return true;
@@ -240,7 +240,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
PUSH_SPACE(push, 8);
for (c = 0; c < 4; c++) {
if (screen->pm.mp_counter[c]) {
- BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+ BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(c)), 1);
PUSH_DATA (push, 0);
}
}
@@ -257,7 +257,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
hq->bo);
PUSH_SPACE(push, 2);
- BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
pipe->bind_compute_state(pipe, screen->pm.prog);
@@ -295,7 +295,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
mask |= 1 << hsq->ctr[i];
func = nv50_hw_sm_get_func(hsq->ctr[i]);
- BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1);
+ BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(hsq->ctr[i])), 1);
PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
| cfg->ctr[i].unit | cfg->ctr[i].mode);
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 8e4b2b4..3d2ebfb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -29,6 +29,8 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_compute.xml.h"
+
void
nv50_constbufs_validate(struct nv50_context *nv50)
{
@@ -94,7 +96,7 @@ nv50_constbufs_validate(struct nv50_context *nv50)
BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
- BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
+ BCTX_REFN(nv50->bufctx_3d, 3D_CB(s, i), res, RD);
nv50->cb_dirty = 1; /* Force cache flush for UBO. */
} else {
@@ -131,14 +133,14 @@ nv50_program_update_context_state(struct nv50_context *nv50,
if (prog && prog->tls_space) {
if (nv50->state.new_tls_space)
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS);
if (!nv50->state.tls_required || nv50->state.new_tls_space)
- BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_TLS, flags, nv50->screen->tls_bo);
nv50->state.new_tls_space = false;
nv50->state.tls_required |= 1 << stage;
} else {
if (nv50->state.tls_required == (1 << stage))
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS);
nv50->state.tls_required &= ~(1 << stage);
}
}
@@ -181,7 +183,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
fp->fp.force_persample_interp = rast->force_persample_interp;
}
- if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES)))
+ if (fp->mem && !(nv50->dirty_3d & (NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_MIN_SAMPLES)))
return;
if (!nv50_program_validate(nv50, fp))
@@ -238,6 +240,19 @@ nv50_gmtyprog_validate(struct nv50_context *nv50)
/* GP_ENABLE is updated in linkage validation */
}
+void
+nv50_compprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *cp = nv50->compprog;
+
+ if (cp && !nv50_program_validate(nv50, cp))
+ return;
+
+ BEGIN_NV04(push, NV50_CP(CODE_CB_FLUSH), 1);
+ PUSH_DATA (push, 0);
+}
+
static void
nv50_sprite_coords_validate(struct nv50_context *nv50)
{
@@ -309,7 +324,7 @@ nv50_validate_derived_rs(struct nv50_context *nv50)
PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
}
- if (nv50->dirty & NV50_NEW_FRAGPROG)
+ if (nv50->dirty_3d & NV50_NEW_3D_FRAGPROG)
return;
psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN;
@@ -378,9 +393,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
uint8_t map[64];
uint8_t so_map[64];
- if (!(nv50->dirty & (NV50_NEW_VERTPROG |
- NV50_NEW_FRAGPROG |
- NV50_NEW_GMTYPROG))) {
+ if (!(nv50->dirty_3d & (NV50_NEW_3D_VERTPROG |
+ NV50_NEW_3D_FRAGPROG |
+ NV50_NEW_3D_GMTYPROG))) {
uint8_t bfc, ffc;
ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);
bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)
@@ -633,8 +648,6 @@ nv50_stream_output_validate(struct nv50_context *nv50)
BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
PUSH_DATA (push, ctrl);
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
-
for (i = 0; i < nv50->num_so_targets; ++i) {
struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
@@ -664,7 +677,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
prims = MIN2(prims, limit);
}
targ->stride = so->stride[i];
- BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
+ BCTX_REFN(nv50->bufctx_3d, 3D_SO, buf, WR);
}
if (prims != ~0) {
BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 8504ba4..86e74d6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -200,7 +200,7 @@ nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->blend = hwcso;
- nv50->dirty |= NV50_NEW_BLEND;
+ nv50->dirty_3d |= NV50_NEW_3D_BLEND;
}
static void
@@ -337,7 +337,7 @@ nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->rast = hwcso;
- nv50->dirty |= NV50_NEW_RASTERIZER;
+ nv50->dirty_3d |= NV50_NEW_3D_RASTERIZER;
}
static void
@@ -426,7 +426,7 @@ nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->zsa = hwcso;
- nv50->dirty |= NV50_NEW_ZSA;
+ nv50->dirty_3d |= NV50_NEW_3D_ZSA;
}
static void
@@ -605,7 +605,7 @@ nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
nv50->num_samplers[s] = nr;
- nv50->dirty |= NV50_NEW_SAMPLERS;
+ nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
}
static void
@@ -698,9 +698,9 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
nv50->num_textures[s] = nr;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
- nv50->dirty |= NV50_NEW_TEXTURES;
+ nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
}
static void
@@ -776,7 +776,7 @@ nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->vertprog = hwcso;
- nv50->dirty |= NV50_NEW_VERTPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_VERTPROG;
}
static void *
@@ -792,7 +792,7 @@ nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->fragprog = hwcso;
- nv50->dirty |= NV50_NEW_FRAGPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
}
static void *
@@ -808,7 +808,7 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->gmtyprog = hwcso;
- nv50->dirty |= NV50_NEW_GMTYPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_GMTYPROG;
}
static void *
@@ -857,7 +857,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
nv50->constbuf[s][i].u.buf = NULL;
else
if (nv50->constbuf[s][i].u.buf)
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res);
@@ -882,7 +882,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
}
nv50->constbuf_dirty[s] |= 1 << i;
- nv50->dirty |= NV50_NEW_CONSTBUF;
+ nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
}
/* =============================================================================
@@ -895,7 +895,7 @@ nv50_set_blend_color(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
nv50->blend_colour = *bcol;
- nv50->dirty |= NV50_NEW_BLEND_COLOUR;
+ nv50->dirty_3d |= NV50_NEW_3D_BLEND_COLOUR;
}
static void
@@ -905,7 +905,7 @@ nv50_set_stencil_ref(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
nv50->stencil_ref = *sr;
- nv50->dirty |= NV50_NEW_STENCIL_REF;
+ nv50->dirty_3d |= NV50_NEW_3D_STENCIL_REF;
}
static void
@@ -916,7 +916,7 @@ nv50_set_clip_state(struct pipe_context *pipe,
memcpy(nv50->clip.ucp, clip->ucp, sizeof(clip->ucp));
- nv50->dirty |= NV50_NEW_CLIP;
+ nv50->dirty_3d |= NV50_NEW_3D_CLIP;
}
static void
@@ -925,7 +925,7 @@ nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->sample_mask = sample_mask;
- nv50->dirty |= NV50_NEW_SAMPLE_MASK;
+ nv50->dirty_3d |= NV50_NEW_3D_SAMPLE_MASK;
}
static void
@@ -935,7 +935,7 @@ nv50_set_min_samples(struct pipe_context *pipe, unsigned min_samples)
if (nv50->min_samples != min_samples) {
nv50->min_samples = min_samples;
- nv50->dirty |= NV50_NEW_MIN_SAMPLES;
+ nv50->dirty_3d |= NV50_NEW_3D_MIN_SAMPLES;
}
}
@@ -945,11 +945,11 @@ nv50_set_framebuffer_state(struct pipe_context *pipe,
{
struct nv50_context *nv50 = nv50_context(pipe);
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
util_copy_framebuffer_state(&nv50->framebuffer, fb);
- nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
}
static void
@@ -959,7 +959,7 @@ nv50_set_polygon_stipple(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
nv50->stipple = *stipple;
- nv50->dirty |= NV50_NEW_STIPPLE;
+ nv50->dirty_3d |= NV50_NEW_3D_STIPPLE;
}
static void
@@ -977,7 +977,7 @@ nv50_set_scissor_states(struct pipe_context *pipe,
continue;
nv50->scissors[start_slot + i] = scissor[i];
nv50->scissors_dirty |= 1 << (start_slot + i);
- nv50->dirty |= NV50_NEW_SCISSOR;
+ nv50->dirty_3d |= NV50_NEW_3D_SCISSOR;
}
}
@@ -996,7 +996,7 @@ nv50_set_viewport_states(struct pipe_context *pipe,
continue;
nv50->viewports[start_slot + i] = vpt[i];
nv50->viewports_dirty |= 1 << (start_slot + i);
- nv50->dirty |= NV50_NEW_VIEWPORT;
+ nv50->dirty_3d |= NV50_NEW_3D_VIEWPORT;
}
}
@@ -1008,8 +1008,8 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
unsigned i;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
- nv50->dirty |= NV50_NEW_ARRAYS;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX);
+ nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb,
start_slot, count);
@@ -1051,14 +1051,14 @@ nv50_set_index_buffer(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
if (nv50->idxbuf.buffer)
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);
if (ib) {
pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer);
nv50->idxbuf.index_size = ib->index_size;
if (ib->buffer) {
nv50->idxbuf.offset = ib->offset;
- BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(ib->buffer), RD);
+ BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(ib->buffer), RD);
} else {
nv50->idxbuf.user_buffer = ib->user_buffer;
}
@@ -1073,7 +1073,7 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->vertex = hwcso;
- nv50->dirty |= NV50_NEW_VERTEX;
+ nv50->dirty_3d |= NV50_NEW_3D_VERTEX;
}
static struct pipe_stream_output_target *
@@ -1180,8 +1180,10 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
}
nv50->num_so_targets = num_targets;
- if (nv50->so_targets_dirty)
- nv50->dirty |= NV50_NEW_STRMOUT;
+ if (nv50->so_targets_dirty) {
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_SO);
+ nv50->dirty_3d |= NV50_NEW_3D_STRMOUT;
+ }
}
static void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 5536978..5120493 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -25,7 +25,7 @@ nv50_validate_fb(struct nv50_context *nv50)
unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1;
uint32_t array_size = 0xffff, array_mode = 0;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
@@ -90,7 +90,7 @@ nv50_validate_fb(struct nv50_context *nv50)
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
/* only register for writing, otherwise we'd always serialize here */
- BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
+ BCTX_REFN(nv50->bufctx_3d, 3D_FB, &mt->base, WR);
}
if (fb->zsbuf) {
@@ -118,7 +118,7 @@ nv50_validate_fb(struct nv50_context *nv50)
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
- BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
+ BCTX_REFN(nv50->bufctx_3d, 3D_FB, &mt->base, WR);
} else {
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -187,8 +187,8 @@ nv50_validate_scissor(struct nv50_context *nv50)
#ifdef NV50_SCISSORS_CLIPPING
int minx, maxx, miny, maxy, i;
- if (!(nv50->dirty &
- (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) &&
+ if (!(nv50->dirty_3d &
+ (NV50_NEW_3D_SCISSOR | NV50_NEW_3D_VIEWPORT | NV50_NEW_3D_FRAMEBUFFER)) &&
nv50->state.scissor == nv50->rast->pipe.scissor)
return;
@@ -197,7 +197,7 @@ nv50_validate_scissor(struct nv50_context *nv50)
nv50->state.scissor = nv50->rast->pipe.scissor;
- if ((nv50->dirty & NV50_NEW_FRAMEBUFFER) && !nv50->state.scissor)
+ if ((nv50->dirty_3d & NV50_NEW_3D_FRAMEBUFFER) && !nv50->state.scissor)
nv50->scissors_dirty = (1 << NV50_MAX_VIEWPORTS) - 1;
for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
@@ -290,10 +290,10 @@ nv50_check_program_ucps(struct nv50_context *nv50,
vp->vp.clpd_nr = n;
if (likely(vp == nv50->vertprog)) {
- nv50->dirty |= NV50_NEW_VERTPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_VERTPROG;
nv50_vertprog_validate(nv50);
} else {
- nv50->dirty |= NV50_NEW_GMTYPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_GMTYPROG;
nv50_gmtyprog_validate(nv50);
}
nv50_fp_linkage_validate(nv50);
@@ -342,7 +342,7 @@ nv50_validate_clip(struct nv50_context *nv50)
struct nv50_program *vp;
uint8_t clip_enable;
- if (nv50->dirty & NV50_NEW_CLIP) {
+ if (nv50->dirty_3d & NV50_NEW_3D_CLIP) {
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
PUSH_DATA (push, (NV50_CB_AUX_UCP_OFFSET << 8) | NV50_CB_AUX);
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), PIPE_MAX_CLIP_PLANES * 4);
@@ -436,7 +436,8 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to)
else
ctx_to->state = ctx_to->screen->save_state;
- ctx_to->dirty = ~0;
+ ctx_to->dirty_3d = ~0;
+ ctx_to->dirty_cp = ~0;
ctx_to->viewports_dirty = ~0;
ctx_to->scissors_dirty = ~0;
@@ -445,71 +446,71 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to)
ctx_to->constbuf_dirty[2] = (1 << NV50_MAX_PIPE_CONSTBUFS) - 1;
if (!ctx_to->vertex)
- ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS);
+ ctx_to->dirty_3d &= ~(NV50_NEW_3D_VERTEX | NV50_NEW_3D_ARRAYS);
if (!ctx_to->vertprog)
- ctx_to->dirty &= ~NV50_NEW_VERTPROG;
+ ctx_to->dirty_3d &= ~NV50_NEW_3D_VERTPROG;
if (!ctx_to->fragprog)
- ctx_to->dirty &= ~NV50_NEW_FRAGPROG;
+ ctx_to->dirty_3d &= ~NV50_NEW_3D_FRAGPROG;
if (!ctx_to->blend)
- ctx_to->dirty &= ~NV50_NEW_BLEND;
+ ctx_to->dirty_3d &= ~NV50_NEW_3D_BLEND;
if (!ctx_to->rast)
#ifdef NV50_SCISSORS_CLIPPING
- ctx_to->dirty &= ~(NV50_NEW_RASTERIZER | NV50_NEW_SCISSOR);
+ ctx_to->dirty_3d &= ~(NV50_NEW_3D_RASTERIZER | NV50_NEW_3D_SCISSOR);
#else
- ctx_to->dirty &= ~NV50_NEW_RASTERIZER;
+ ctx_to->dirty_3d &= ~NV50_NEW_3D_RASTERIZER;
#endif
if (!ctx_to->zsa)
- ctx_to->dirty &= ~NV50_NEW_ZSA;
+ ctx_to->dirty_3d &= ~NV50_NEW_3D_ZSA;
ctx_to->screen->cur_ctx = ctx_to;
}
-static struct state_validate {
- void (*func)(struct nv50_context *);
- uint32_t states;
-} validate_list[] = {
- { nv50_validate_fb, NV50_NEW_FRAMEBUFFER },
- { nv50_validate_blend, NV50_NEW_BLEND },
- { nv50_validate_zsa, NV50_NEW_ZSA },
- { nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK },
- { nv50_validate_rasterizer, NV50_NEW_RASTERIZER },
- { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR },
- { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF },
- { nv50_validate_stipple, NV50_NEW_STIPPLE },
+static struct nv50_state_validate
+validate_list_3d[] = {
+ { nv50_validate_fb, NV50_NEW_3D_FRAMEBUFFER },
+ { nv50_validate_blend, NV50_NEW_3D_BLEND },
+ { nv50_validate_zsa, NV50_NEW_3D_ZSA },
+ { nv50_validate_sample_mask, NV50_NEW_3D_SAMPLE_MASK },
+ { nv50_validate_rasterizer, NV50_NEW_3D_RASTERIZER },
+ { nv50_validate_blend_colour, NV50_NEW_3D_BLEND_COLOUR },
+ { nv50_validate_stencil_ref, NV50_NEW_3D_STENCIL_REF },
+ { nv50_validate_stipple, NV50_NEW_3D_STIPPLE },
#ifdef NV50_SCISSORS_CLIPPING
- { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT |
- NV50_NEW_RASTERIZER |
- NV50_NEW_FRAMEBUFFER },
+ { nv50_validate_scissor, NV50_NEW_3D_SCISSOR | NV50_NEW_3D_VIEWPORT |
+ NV50_NEW_3D_RASTERIZER |
+ NV50_NEW_3D_FRAMEBUFFER },
#else
- { nv50_validate_scissor, NV50_NEW_SCISSOR },
+ { nv50_validate_scissor, NV50_NEW_3D_SCISSOR },
#endif
- { nv50_validate_viewport, NV50_NEW_VIEWPORT },
- { nv50_vertprog_validate, NV50_NEW_VERTPROG },
- { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
- { nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
- NV50_NEW_MIN_SAMPLES },
- { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
- NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },
- { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
- { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
- NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
- { nv50_validate_derived_3, NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER },
- { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
- NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_constbufs_validate, NV50_NEW_CONSTBUF },
- { nv50_validate_textures, NV50_NEW_TEXTURES },
- { nv50_validate_samplers, NV50_NEW_SAMPLERS },
- { nv50_stream_output_validate, NV50_NEW_STRMOUT |
- NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
- { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES },
+ { nv50_validate_viewport, NV50_NEW_3D_VIEWPORT },
+ { nv50_vertprog_validate, NV50_NEW_3D_VERTPROG },
+ { nv50_gmtyprog_validate, NV50_NEW_3D_GMTYPROG },
+ { nv50_fragprog_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER |
+ NV50_NEW_3D_MIN_SAMPLES },
+ { nv50_fp_linkage_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_VERTPROG |
+ NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_RASTERIZER },
+ { nv50_gp_linkage_validate, NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_VERTPROG },
+ { nv50_validate_derived_rs, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER |
+ NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG },
+ { nv50_validate_derived_2, NV50_NEW_3D_ZSA | NV50_NEW_3D_FRAMEBUFFER },
+ { nv50_validate_derived_3, NV50_NEW_3D_BLEND | NV50_NEW_3D_FRAMEBUFFER },
+ { nv50_validate_clip, NV50_NEW_3D_CLIP | NV50_NEW_3D_RASTERIZER |
+ NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG },
+ { nv50_constbufs_validate, NV50_NEW_3D_CONSTBUF },
+ { nv50_validate_textures, NV50_NEW_3D_TEXTURES },
+ { nv50_validate_samplers, NV50_NEW_3D_SAMPLERS },
+ { nv50_stream_output_validate, NV50_NEW_3D_STRMOUT |
+ NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG },
+ { nv50_vertex_arrays_validate, NV50_NEW_3D_VERTEX | NV50_NEW_3D_ARRAYS },
+ { nv50_validate_min_samples, NV50_NEW_3D_MIN_SAMPLES },
};
bool
-nv50_state_validate(struct nv50_context *nv50, uint32_t mask)
+nv50_state_validate(struct nv50_context *nv50, uint32_t mask,
+ struct nv50_state_validate *validate_list, int size,
+ uint32_t *dirty, struct nouveau_bufctx *bufctx)
{
uint32_t state_mask;
int ret;
@@ -518,16 +519,16 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask)
if (nv50->screen->cur_ctx != nv50)
nv50_switch_pipe_context(nv50);
- state_mask = nv50->dirty & mask;
+ state_mask = *dirty & mask;
if (state_mask) {
- for (i = 0; i < ARRAY_SIZE(validate_list); ++i) {
- struct state_validate *validate = &validate_list[i];
+ for (i = 0; i < size; i++) {
+ struct nv50_state_validate *validate = &validate_list[i];
if (state_mask & validate->states)
validate->func(nv50);
}
- nv50->dirty &= ~state_mask;
+ *dirty &= ~state_mask;
if (nv50->state.rt_serialize) {
nv50->state.rt_serialize = false;
@@ -535,14 +536,26 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask)
PUSH_DATA (nv50->base.pushbuf, 0);
}
- nv50_bufctx_fence(nv50->bufctx_3d, false);
+ nv50_bufctx_fence(bufctx, false);
}
- nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d);
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, bufctx);
ret = nouveau_pushbuf_validate(nv50->base.pushbuf);
+ return !ret;
+}
+
+bool
+nv50_state_validate_3d(struct nv50_context *nv50, uint32_t mask)
+{
+ bool ret;
+
+ ret = nv50_state_validate(nv50, mask, validate_list_3d,
+ ARRAY_SIZE(validate_list_3d), &nv50->dirty_3d,
+ nv50->bufctx_3d);
+
if (unlikely(nv50->state.flushed)) {
nv50->state.flushed = false;
nv50_bufctx_fence(nv50->bufctx_3d, true);
}
- return !ret;
+ return ret;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 84646f6..68b0e18 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -353,7 +353,7 @@ nv50_clear_render_target(struct pipe_context *pipe,
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
PUSH_DATA (push, nv50->cond_condmode);
- nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
}
static void
@@ -436,7 +436,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
PUSH_DATA (push, nv50->cond_condmode);
- nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
}
void
@@ -525,7 +525,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
uint32_t mode = 0;
/* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
- if (!nv50_state_validate(nv50, NV50_NEW_FRAMEBUFFER))
+ if (!nv50_state_validate_3d(nv50, NV50_NEW_3D_FRAMEBUFFER))
return;
/* We have to clear ALL of the layers, not up to the min number of layers
@@ -798,7 +798,7 @@ nv50_clear_buffer(struct pipe_context *pipe,
data, data_size);
}
- nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
}
/* =============================== BLIT CODE ===================================
@@ -834,7 +834,7 @@ struct nv50_blitctx
struct pipe_sampler_view *texture[2];
struct nv50_tsc_entry *sampler[2];
unsigned min_samples;
- uint32_t dirty;
+ uint32_t dirty_3d;
} saved;
struct nv50_rasterizer_stateobj rast;
};
@@ -1253,15 +1253,15 @@ nv50_blitctx_pre_blit(struct nv50_blitctx *ctx)
nv50->min_samples = 1;
- ctx->saved.dirty = nv50->dirty;
+ ctx->saved.dirty_3d = nv50->dirty_3d;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
- nv50->dirty =
- NV50_NEW_FRAMEBUFFER | NV50_NEW_MIN_SAMPLES |
- NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG |
- NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS;
+ nv50->dirty_3d =
+ NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_MIN_SAMPLES |
+ NV50_NEW_3D_VERTPROG | NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_GMTYPROG |
+ NV50_NEW_3D_TEXTURES | NV50_NEW_3D_SAMPLERS;
}
static void
@@ -1302,14 +1302,14 @@ nv50_blitctx_post_blit(struct nv50_blitctx *blit)
nv50->base.pipe.render_condition(&nv50->base.pipe, nv50->cond_query,
nv50->cond_cond, nv50->cond_mode);
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
- nv50->dirty = blit->saved.dirty |
- (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK |
- NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND |
- NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS |
- NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG);
+ nv50->dirty_3d = blit->saved.dirty_3d |
+ (NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR | NV50_NEW_3D_SAMPLE_MASK |
+ NV50_NEW_3D_RASTERIZER | NV50_NEW_3D_ZSA | NV50_NEW_3D_BLEND |
+ NV50_NEW_3D_TEXTURES | NV50_NEW_3D_SAMPLERS |
+ NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_FRAGPROG);
nv50->scissors_dirty |= 1;
nv50->base.pipe.set_min_samples(&nv50->base.pipe, blit->saved.min_samples);
@@ -1344,7 +1344,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
nv50_blitctx_prepare_state(blit);
- nv50_state_validate(nv50, ~0);
+ nv50_state_validate_3d(nv50, ~0);
x_range = (float)info->src.box.width / (float)info->dst.box.width;
y_range = (float)info->src.box.height / (float)info->dst.box.height;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
index 4b69c3b..414d326 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -299,7 +299,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
- BCTX_REFN(nv50->bufctx_3d, TEXTURES, res, RD);
+ BCTX_REFN(nv50->bufctx_3d, 3D_TEXTURES, res, RD);
BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 6f60445..a11cdf8 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -230,7 +230,7 @@ nv50_upload_user_buffers(struct nv50_context *nv50,
addrs[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size,
&bo);
if (addrs[b])
- BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART |
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, NOUVEAU_BO_GART |
NOUVEAU_BO_RD, bo);
}
nv50->base.vbo_dirty = true;
@@ -269,7 +269,7 @@ nv50_update_user_vbufs(struct nv50_context *nv50)
address[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer,
base, size, &bo);
if (address[b])
- BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, bo_flags, bo);
}
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
@@ -286,7 +286,7 @@ static inline void
nv50_release_user_vbufs(struct nv50_context *nv50)
{
if (nv50->vbo_user) {
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX_TMP);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX_TMP);
nouveau_scratch_done(&nv50->base);
}
}
@@ -394,7 +394,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
struct nv04_resource *buf = nv04_resource(vb->buffer);
if (!(refd & (1 << b))) {
refd |= 1 << b;
- BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD);
+ BCTX_REFN(nv50->bufctx_3d, 3D_VERTEX, buf, RD);
}
address = buf->address + vb->buffer_offset + ve->pipe.src_offset;
limit = buf->address + buf->base.width0 - 1;
@@ -779,9 +779,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nv50->vbo_push_hint = /* the 64 is heuristic */
!(info->indexed && ((nv50->vb_elt_limit + 64) < info->count));
- if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_ARRAYS | NV50_NEW_VERTEX))) {
+ if (nv50->vbo_user && !(nv50->dirty_3d & (NV50_NEW_3D_ARRAYS | NV50_NEW_3D_VERTEX))) {
if (!!nv50->vbo_fifo != nv50->vbo_push_hint)
- nv50->dirty |= NV50_NEW_ARRAYS;
+ nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
else
if (!nv50->vbo_fifo)
nv50_update_user_vbufs(nv50);
@@ -790,7 +790,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (unlikely(nv50->num_so_targets && !nv50->gmtyprog))
nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode];
- nv50_state_validate(nv50, ~0);
+ nv50_state_validate_3d(nv50, ~0);
push->kick_notify = nv50_draw_vbo_kick_notify;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
index 6800230..7056258 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -58,8 +58,8 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define SUBC_M2MF(m) 5, (m)
#define NV50_M2MF(n) SUBC_M2MF(NV50_M2MF_##n)
-#define SUBC_COMPUTE(m) 6, (m)
-#define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n)
+#define SUBC_CP(m) 6, (m)
+#define NV50_CP(n) SUBC_CP(NV50_COMPUTE_##n)
static inline uint32_t
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index ffbb16f..6aaa7ce 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -153,7 +153,7 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
if (nvc0->constbuf[s][i].user) {
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
- const unsigned base = s << 16;
+ const unsigned base = NVC0_CB_USR_INFO(s);
const unsigned size = nvc0->constbuf[s][0].size;
assert(i == 0); /* we really only want OpenGL uniforms here */
assert(nvc0->constbuf[s][0].u.data);
@@ -207,8 +207,8 @@ nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
PUSH_DATA (push, (15 << 8) | 1);
@@ -219,15 +219,16 @@ static void
nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
const int s = 5;
int i;
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
if (nvc0->buffers[s][i].buffer) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 54afe88..31e1272 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -98,6 +98,31 @@
#define NVC0_BIND_M2MF 0
#define NVC0_BIND_FENCE 1
+/* 6 user uniform buffers, at 64K each */
+#define NVC0_CB_USR_INFO(s) (s << 16)
+#define NVC0_CB_USR_SIZE (6 << 16)
+/* 6 driver constbuts, at 1K each */
+#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 10)
+#define NVC0_CB_AUX_SIZE (6 << 10)
+/* XXX: Figure out what this UNK data is. */
+#define NVC0_CB_AUX_UNK_INFO 0x000
+#define NVC0_CB_AUX_UNK_SIZE (8 * 4)
+/* 32 textures handles, at 1 32-bits integer each */
+#define NVC0_CB_AUX_TEX_INFO(i) 0x020 + (i) * 4
+#define NVC0_CB_AUX_TEX_SIZE (32 * 4)
+/* 8 user clip planes, at 4 32-bits floats each */
+#define NVC0_CB_AUX_UCP_INFO 0x100
+#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
+/* 8 sets of 32-bits integer pairs sample offsets */
+#define NVC0_CB_AUX_SAMPLE_INFO 0x180 /* FP */
+#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2)
+/* draw parameters (index bais, base instance, drawid) */
+#define NVC0_CB_AUX_DRAW_INFO 0x180 /* VP */
+/* 32 user buffers, at 4 32-bits integers each */
+#define NVC0_CB_AUX_BUF_INFO(i) 0x200 + (i) * 4 * 4
+#define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4)
+/* 4 32-bits floats for the vertex runout, put at the end */
+#define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + NVC0_CB_AUX_SIZE
struct nvc0_blitctx;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index bc884d6..b7c6faf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -535,29 +535,27 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->io.genUserClip = prog->vp.num_ucps;
info->io.auxCBSlot = 15;
- info->io.ucpBase = 256;
- info->io.drawInfoBase = 256 + 128;
+ info->io.ucpBase = NVC0_CB_AUX_UCP_INFO;
+ info->io.drawInfoBase = NVC0_CB_AUX_DRAW_INFO;
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
- info->io.resInfoCBSlot = 0;
+ info->io.auxCBSlot = 0;
info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
} else {
- info->io.resInfoCBSlot = 15;
- info->io.suInfoBase = 512;
+ info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
}
info->io.msInfoCBSlot = 0;
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
} else {
if (chipset >= NVISA_GK104_CHIPSET) {
- info->io.texBindBase = 0x20;
+ info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
info->io.suInfoBase = 0; /* TODO */
}
- info->io.resInfoCBSlot = 15;
- info->io.sampleInfoBase = 256 + 128;
- info->io.suInfoBase = 512;
+ info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO;
+ info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = 0; /* TODO */
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 3c5b1da..553c001 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -922,14 +922,14 @@ nvc0_screen_create(struct nouveau_device *dev)
/* auxiliary constants (6 user clip planes, base instance id) */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
unsigned j;
BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
- PUSH_DATA (push, 0);
+ PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO);
for (j = 0; j < 8; ++j)
PUSH_DATA(push, j);
} else {
@@ -943,8 +943,8 @@ nvc0_screen_create(struct nouveau_device *dev)
/* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 256);
- PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
PUSH_DATA (push, 0);
PUSH_DATAf(push, 0.0f);
@@ -952,8 +952,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f);
BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
if (screen->base.drm->version >= 0x01000101) {
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 8487abc..46b692d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -66,7 +66,7 @@ struct nvc0_screen {
struct nouveau_bo *text;
struct nouveau_bo *parm; /* for COMPUTE */
- struct nouveau_bo *uniform_bo; /* for 3D */
+ struct nouveau_bo *uniform_bo;
struct nouveau_bo *tls;
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
struct nouveau_bo *poly_cache;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 090a039..a100fc4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -413,7 +413,7 @@ nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
unsigned s, i;
- for (s = 0; s < 5; ++s)
+ for (s = 0; s < 6; ++s)
for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i)
if (nvc0_context(pipe)->samplers[s][i] == hwcso)
nvc0_context(pipe)->samplers[s][i] = NULL;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index c0ed5c0..9c64482 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -72,6 +72,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ struct nvc0_screen *screen = nvc0->screen;
unsigned i, ms;
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
bool serialize = false;
@@ -183,10 +184,10 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
ms = 1 << ms_mode;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
- PUSH_DATA (push, 256 + 128);
+ PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
for (i = 0; i < ms; i++) {
float xy[2];
nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
@@ -313,14 +314,14 @@ static inline void
nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- struct nouveau_bo *bo = nvc0->screen->uniform_bo;
+ struct nvc0_screen *screen = nvc0->screen;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, bo->offset + (6 << 16) + (s << 10));
- PUSH_DATA (push, bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
- PUSH_DATA (push, 256);
+ PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO);
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
}
@@ -424,7 +425,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
if (nvc0->constbuf[s][i].user) {
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
- const unsigned base = s << 16;
+ const unsigned base = NVC0_CB_USR_INFO(s);
const unsigned size = nvc0->constbuf[s][0].size;
assert(i == 0); /* we really only want OpenGL uniforms here */
assert(nvc0->constbuf[s][0].u.data);
@@ -478,15 +479,16 @@ static void
nvc0_validate_buffers(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
int i, s;
for (s = 0; s < 5; s++) {
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
if (nvc0->buffers[s][i].buffer) {
struct nv04_resource *res =
@@ -550,8 +552,8 @@ nvc0_validate_driverconst(struct nvc0_context *nvc0)
for (i = 0; i < 5; ++i) {
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 5333240..ce6a6dc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -707,21 +707,20 @@ void
nve4_set_tex_handles(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- uint64_t address;
+ struct nvc0_screen *screen = nvc0->screen;
unsigned s;
if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
return;
- address = nvc0->screen->uniform_bo->offset + (6 << 16);
- for (s = 0; s < 5; ++s, address += (1 << 10)) {
+ for (s = 0; s < 5; ++s) {
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
if (!dirty)
continue;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, address);
- PUSH_DATA (push, address);
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
do {
int i = ffs(dirty) - 1;
dirty &= ~(1 << i);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index e0e0ad2..4d9cd57 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -820,6 +820,7 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
struct nv04_resource *buf_count = nv04_resource(info->indirect_params);
unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
uint32_t offset = buf->offset + info->indirect_offset;
+ struct nvc0_screen *screen = nvc0->screen;
PUSH_SPACE(push, 7);
@@ -833,10 +834,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
/* Queue things up to let the macros write params to the driver constbuf */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
BEGIN_NVC0(push, NVC0_3D(CB_POS), 1);
- PUSH_DATA (push, 256 + 128);
+ PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO);
if (info->indexed) {
assert(nvc0->idxbuf.buffer);
@@ -934,6 +935,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
int s;
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
@@ -975,11 +977,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_SPACE(push, 9);
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
if (!info->indirect) {
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3);
- PUSH_DATA (push, 256 + 128);
+ PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO);
PUSH_DATA (push, info->index_bias);
PUSH_DATA (push, info->start_instance);
PUSH_DATA (push, info->drawid);
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 6fa8920..d100a9d 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -385,7 +385,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
if (!r300->ctx)
goto fail;
- r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300, NULL);
+ r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300);
if (r300->cs == NULL)
goto fail;
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 7a75b43..63182cb 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -53,7 +53,7 @@ static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags,
}
r300->flush_counter++;
- r300->rws->cs_flush(r300->cs, flags, fence, 0);
+ r300->rws->cs_flush(r300->cs, flags, fence);
r300->dirty_hw = 0;
/* New kitchen sink, baby. */
@@ -88,11 +88,11 @@ void r300_flush(struct pipe_context *pipe,
* and we cannot emit an empty CS. Let's write to some reg. */
CS_LOCALS(r300);
OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0);
- r300->rws->cs_flush(r300->cs, flags, fence, 0);
+ r300->rws->cs_flush(r300->cs, flags, fence);
} else {
/* Even if hw is not dirty, we should at least reset the CS in case
* the space checking failed for the first draw operation. */
- r300->rws->cs_flush(r300->cs, flags, NULL, 0);
+ r300->rws->cs_flush(r300->cs, flags, NULL);
}
}
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 57456c6..709345a 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -981,8 +981,8 @@ boolean r300_resource_get_handle(struct pipe_screen* screen,
return FALSE;
}
- return rws->buffer_get_handle(tex->buf,
- tex->tex.stride_in_bytes[0], whandle);
+ return rws->buffer_get_handle(tex->buf, tex->tex.stride_in_bytes[0],
+ 0, 0, whandle);
}
static const struct u_resource_vtbl r300_texture_vtbl =
@@ -1116,7 +1116,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
return NULL;
}
- buffer = rws->buffer_from_handle(rws, whandle, &stride);
+ buffer = rws->buffer_from_handle(rws, whandle, &stride, NULL);
if (!buffer)
return NULL;
diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am
index 8317da7..f3bb03e 100644
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -21,14 +21,6 @@ AM_CFLAGS += \
$(LLVM_CFLAGS) \
-I$(top_srcdir)/src/gallium/drivers/radeon/
-libr600_la_SOURCES += \
- $(LLVM_C_SOURCES)
-
-endif
-
-if USE_R600_LLVM_COMPILER
-AM_CFLAGS += \
- -DR600_USE_LLVM
endif
if HAVE_GALLIUM_COMPUTE
diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources
index 024dea3..8bf8083 100644
--- a/src/gallium/drivers/r600/Makefile.sources
+++ b/src/gallium/drivers/r600/Makefile.sources
@@ -64,7 +64,3 @@ CXX_SOURCES = \
sb/sb_shader.h \
sb/sb_ssa_builder.cpp \
sb/sb_valtable.cpp
-
-LLVM_C_SOURCES = \
- r600_llvm.c \
- r600_llvm.h
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 2a1b251..f4b6690 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -192,6 +192,69 @@ static const struct u_resource_vtbl r600_global_buffer_vtbl =
r600_compute_global_transfer_inline_write /* transfer_inline_write */
};
+/* We need to define these R600 registers here, because we can't include
+ * evergreend.h and r600d.h.
+ */
+#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
+#define R_028850_SQ_PGM_RESOURCES_PS 0x028850
+
+#ifdef HAVE_OPENCL
+
+static void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
+ struct r600_bytecode *bc,
+ uint64_t symbol_offset,
+ boolean *use_kill)
+{
+ unsigned i;
+ const unsigned char *config =
+ radeon_shader_binary_config_start(binary, symbol_offset);
+
+ for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
+ unsigned reg =
+ util_le32_to_cpu(*(uint32_t*)(config + i));
+ unsigned value =
+ util_le32_to_cpu(*(uint32_t*)(config + i + 4));
+ switch (reg) {
+ /* R600 / R700 */
+ case R_028850_SQ_PGM_RESOURCES_PS:
+ case R_028868_SQ_PGM_RESOURCES_VS:
+ /* Evergreen / Northern Islands */
+ case R_028844_SQ_PGM_RESOURCES_PS:
+ case R_028860_SQ_PGM_RESOURCES_VS:
+ case R_0288D4_SQ_PGM_RESOURCES_LS:
+ bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value));
+ bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value));
+ break;
+ case R_02880C_DB_SHADER_CONTROL:
+ *use_kill = G_02880C_KILL_ENABLE(value);
+ break;
+ case R_0288E8_SQ_LDS_ALLOC:
+ bc->nlds_dw = value;
+ break;
+ }
+ }
+}
+
+static unsigned r600_create_shader(struct r600_bytecode *bc,
+ const struct radeon_shader_binary *binary,
+ boolean *use_kill)
+
+{
+ assert(binary->code_size % 4 == 0);
+ bc->bytecode = CALLOC(1, binary->code_size);
+ memcpy(bc->bytecode, binary->code, binary->code_size);
+ bc->ndw = binary->code_size / 4;
+
+ r600_shader_binary_read_config(binary, bc, 0, use_kill);
+ return 0;
+}
+
+#endif
+
+static void r600_destroy_shader(struct r600_bytecode *bc)
+{
+ FREE(bc->bytecode);
+}
void *evergreen_create_compute_state(
struct pipe_context *ctx_,
@@ -236,13 +299,11 @@ void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state)
if (!shader)
return;
-#ifdef HAVE_OPENCL
radeon_shader_binary_clean(&shader->binary);
r600_destroy_shader(&shader->bc);
/* TODO destroy shader->code_bo, shader->const_bo
* we'll need something like r600_buffer_free */
-#endif
FREE(shader);
}
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h
index c8998d0..e6ff760 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.h
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.h
@@ -26,6 +26,10 @@
#define EVERGREEN_COMPUTE_INTERNAL_H
#include "r600_asm.h"
+#ifdef HAVE_OPENCL
+#include "radeon/radeon_llvm.h"
+#include <llvm-c/Core.h>
+#endif
struct r600_pipe_compute {
struct r600_context *ctx;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 4951297..7a6f957 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -57,18 +57,11 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
/* The number of dwords all the dirty states would take. */
mask = ctx->dirty_atoms;
- while (mask != 0) {
+ while (mask != 0)
num_dw += ctx->atoms[u_bit_scan64(&mask)]->num_dw;
- if (ctx->screen->b.trace_bo) {
- num_dw += R600_TRACE_CS_DWORDS;
- }
- }
/* The upper-bound of how much space a draw command would take. */
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
- if (ctx->screen->b.trace_bo) {
- num_dw += R600_TRACE_CS_DWORDS;
- }
}
/* Count in queries_suspend. */
@@ -273,7 +266,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
/* Flush the CS. */
- ctx->b.ws->cs_flush(cs, flags, fence, ctx->screen->b.cs_count++);
+ ctx->b.ws->cs_flush(cs, flags, fence);
r600_begin_new_cs(ctx);
}
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
deleted file mode 100644
index 7eab29c..0000000
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ /dev/null
@@ -1,943 +0,0 @@
-#include "r600_llvm.h"
-
-#include "gallivm/lp_bld_const.h"
-#include "gallivm/lp_bld_intr.h"
-#include "gallivm/lp_bld_gather.h"
-#include "tgsi/tgsi_parse.h"
-#include "util/list.h"
-#include "util/u_memory.h"
-
-#include "evergreend.h"
-#include "r600_asm.h"
-#include "r600_sq.h"
-#include "r600_opcodes.h"
-#include "r600_shader.h"
-#include "r600_pipe.h"
-#include "radeon_llvm.h"
-#include "radeon_llvm_emit.h"
-#include "radeon_elf_util.h"
-
-#include <stdio.h>
-
-#if defined R600_USE_LLVM || defined HAVE_OPENCL
-
-#define CONSTANT_BUFFER_0_ADDR_SPACE 8
-#define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
-#define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
- (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
-
-static LLVMValueRef llvm_load_const_buffer(
- struct lp_build_tgsi_context * bld_base,
- LLVMValueRef OffsetValue,
- unsigned ConstantAddressSpace)
-{
- LLVMValueRef offset[2] = {
- LLVMConstInt(LLVMInt64TypeInContext(bld_base->base.gallivm->context), 0, false),
- OffsetValue
- };
-
- LLVMTypeRef const_ptr_type = LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base->base.elem_type, 4), 1024),
- ConstantAddressSpace);
- LLVMValueRef const_ptr = LLVMBuildIntToPtr(bld_base->base.gallivm->builder, lp_build_const_int32(bld_base->base.gallivm, 0), const_ptr_type, "");
- LLVMValueRef ptr = LLVMBuildGEP(bld_base->base.gallivm->builder, const_ptr, offset, 2, "");
- return LLVMBuildLoad(bld_base->base.gallivm->builder, ptr, "");
-}
-
-static LLVMValueRef llvm_fetch_const(
- struct lp_build_tgsi_context * bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle)
-{
- LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, reg->Register.Index);
- if (reg->Register.Indirect) {
- struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
- LLVMValueRef index = LLVMBuildLoad(bld_base->base.gallivm->builder, bld->addr[reg->Indirect.Index][reg->Indirect.Swizzle], "");
- offset = LLVMBuildAdd(bld_base->base.gallivm->builder, offset, index, "");
- }
- unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ;
- if (reg->Register.Dimension) {
- ConstantAddressSpace += reg->Dimension.Index;
- }
- LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, ConstantAddressSpace);
- LLVMValueRef cval = LLVMBuildExtractElement(bld_base->base.gallivm->builder, cvecval, lp_build_const_int32(bld_base->base.gallivm, swizzle), "");
- return bitcast(bld_base, type, cval);
-}
-
-static void llvm_load_system_value(
- struct radeon_llvm_context * ctx,
- unsigned index,
- const struct tgsi_full_declaration *decl)
-{
- unsigned chan;
-
- switch (decl->Semantic.Name) {
- case TGSI_SEMANTIC_INSTANCEID: chan = 3; break;
- case TGSI_SEMANTIC_VERTEXID: chan = 0; break;
- default: assert(!"unknown system value");
- }
-
- ctx->system_values[index] = LLVMBuildExtractElement(ctx->gallivm.builder,
- LLVMGetParam(ctx->main_fn, 0), lp_build_const_int32(&(ctx->gallivm), chan),
- "");
-}
-
-static LLVMValueRef
-llvm_load_input_vector(
- struct radeon_llvm_context * ctx, unsigned location, unsigned ijregs,
- boolean interp)
-{
- LLVMTypeRef VecType;
- LLVMValueRef Args[3] = {
- lp_build_const_int32(&(ctx->gallivm), location)
- };
- unsigned ArgCount = 1;
- if (interp) {
- VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 2);
- LLVMValueRef IJIndex = LLVMGetParam(ctx->main_fn, ijregs / 2);
- Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
- lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2)), "");
- Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
- lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2) + 1), "");
- LLVMValueRef HalfVec[2] = {
- lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
- VecType, Args, ArgCount, LLVMReadNoneAttribute),
- lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
- VecType, Args, ArgCount, LLVMReadNoneAttribute)
- };
- LLVMValueRef MaskInputs[4] = {
- lp_build_const_int32(&(ctx->gallivm), 0),
- lp_build_const_int32(&(ctx->gallivm), 1),
- lp_build_const_int32(&(ctx->gallivm), 2),
- lp_build_const_int32(&(ctx->gallivm), 3)
- };
- LLVMValueRef Mask = LLVMConstVector(MaskInputs, 4);
- return LLVMBuildShuffleVector(ctx->gallivm.builder, HalfVec[0], HalfVec[1],
- Mask, "");
- } else {
- VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 4);
- return lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
- VecType, Args, ArgCount, LLVMReadNoneAttribute);
- }
-}
-
-static LLVMValueRef
-llvm_face_select_helper(
- struct radeon_llvm_context * ctx,
- LLVMValueRef face, LLVMValueRef front_color, LLVMValueRef back_color)
-{
- const struct lp_build_context * bb = &ctx->soa.bld_base.base;
- LLVMValueRef is_front = LLVMBuildFCmp(
- bb->gallivm->builder, LLVMRealUGT, face,
- lp_build_const_float(bb->gallivm, 0.0f), "");
- return LLVMBuildSelect(bb->gallivm->builder, is_front,
- front_color, back_color, "");
-}
-
-static void llvm_load_input(
- struct radeon_llvm_context * ctx,
- unsigned input_index,
- const struct tgsi_full_declaration *decl)
-{
- const struct r600_shader_io * input = &ctx->r600_inputs[input_index];
- unsigned chan;
- int two_side = (ctx->two_side && input->name == TGSI_SEMANTIC_COLOR);
- LLVMValueRef v;
- boolean require_interp_intrinsic = ctx->chip_class >= EVERGREEN &&
- ctx->type == TGSI_PROCESSOR_FRAGMENT;
-
- if (require_interp_intrinsic && input->spi_sid) {
- v = llvm_load_input_vector(ctx, input->lds_pos, input->ij_index,
- (input->interpolate > 0));
- } else
- v = LLVMGetParam(ctx->main_fn, input->gpr);
-
- if (two_side) {
- struct r600_shader_io * back_input =
- &ctx->r600_inputs[input->back_color_input];
- LLVMValueRef v2;
- LLVMValueRef face = LLVMGetParam(ctx->main_fn, ctx->face_gpr);
- face = LLVMBuildExtractElement(ctx->gallivm.builder, face,
- lp_build_const_int32(&(ctx->gallivm), 0), "");
-
- if (require_interp_intrinsic && back_input->spi_sid)
- v2 = llvm_load_input_vector(ctx, back_input->lds_pos,
- back_input->ij_index, (back_input->interpolate > 0));
- else
- v2 = LLVMGetParam(ctx->main_fn, back_input->gpr);
- v = llvm_face_select_helper(ctx, face, v, v2);
- }
-
- for (chan = 0; chan < 4; chan++) {
- unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
-
- ctx->inputs[soa_index] = LLVMBuildExtractElement(ctx->gallivm.builder, v,
- lp_build_const_int32(&(ctx->gallivm), chan), "");
-
- if (input->name == TGSI_SEMANTIC_POSITION &&
- ctx->type == TGSI_PROCESSOR_FRAGMENT && chan == 3) {
- /* RCP for fragcoord.w */
- ctx->inputs[soa_index] = LLVMBuildFDiv(ctx->gallivm.builder,
- lp_build_const_float(&(ctx->gallivm), 1.0f),
- ctx->inputs[soa_index], "");
- }
- }
-}
-
-static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
-{
- struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
- radeon_llvm_shader_type(ctx->main_fn, ctx->type);
-
-}
-
-static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
-{
- struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
- struct lp_build_context * base = &bld_base->base;
- struct pipe_stream_output_info * so = ctx->stream_outputs;
- unsigned i;
- unsigned next_pos = 60;
- unsigned next_param = 0;
-
- unsigned color_count = 0;
- boolean has_color = false;
-
- if (ctx->type == TGSI_PROCESSOR_VERTEX && so->num_outputs) {
- for (i = 0; i < so->num_outputs; i++) {
- unsigned register_index = so->output[i].register_index;
- unsigned start_component = so->output[i].start_component;
- unsigned num_components = so->output[i].num_components;
- unsigned dst_offset = so->output[i].dst_offset;
- unsigned chan;
- LLVMValueRef elements[4];
- if (dst_offset < start_component) {
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- elements[chan] = LLVMBuildLoad(base->gallivm->builder,
- ctx->soa.outputs[register_index][(chan + start_component) % TGSI_NUM_CHANNELS], "");
- }
- start_component = 0;
- } else {
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- elements[chan] = LLVMBuildLoad(base->gallivm->builder,
- ctx->soa.outputs[register_index][chan], "");
- }
- }
- LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4);
- LLVMValueRef args[4];
- args[0] = output;
- args[1] = lp_build_const_int32(base->gallivm, dst_offset - start_component);
- args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer);
- args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component);
- lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output",
- LLVMVoidTypeInContext(base->gallivm->context), args, 4, 0);
- }
- }
-
- /* Add the necessary export instructions */
- for (i = 0; i < ctx->output_reg_count; i++) {
- unsigned chan;
- LLVMValueRef elements[4];
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- elements[chan] = LLVMBuildLoad(base->gallivm->builder,
- ctx->soa.outputs[i][chan], "");
- }
- if (ctx->alpha_to_one && ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->r600_outputs[i].name == TGSI_SEMANTIC_COLOR)
- elements[3] = lp_build_const_float(base->gallivm, 1.0f);
- LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4);
-
- if (ctx->type == TGSI_PROCESSOR_VERTEX) {
- switch (ctx->r600_outputs[i].name) {
- case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_PSIZE: {
- LLVMValueRef args[3];
- args[0] = output;
- args[1] = lp_build_const_int32(base->gallivm, next_pos++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- break;
- }
- case TGSI_SEMANTIC_CLIPVERTEX: {
- LLVMValueRef args[3];
- unsigned reg_index;
- LLVMValueRef adjusted_elements[4];
- for (reg_index = 0; reg_index < 2; reg_index ++) {
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, reg_index * 4 + chan);
- LLVMValueRef base_vector = llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE);
- args[0] = output;
- args[1] = base_vector;
- adjusted_elements[chan] = lp_build_intrinsic(base->gallivm->builder,
- "llvm.AMDGPU.dp4", bld_base->base.elem_type,
- args, 2, LLVMReadNoneAttribute);
- }
- args[0] = lp_build_gather_values(base->gallivm,
- adjusted_elements, 4);
- args[1] = lp_build_const_int32(base->gallivm, next_pos++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- }
- break;
- }
- case TGSI_SEMANTIC_CLIPDIST : {
- LLVMValueRef args[3];
- args[0] = output;
- args[1] = lp_build_const_int32(base->gallivm, next_pos++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- args[1] = lp_build_const_int32(base->gallivm, next_param++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- break;
- }
- case TGSI_SEMANTIC_FOG: {
- elements[0] = LLVMBuildLoad(base->gallivm->builder,
- ctx->soa.outputs[i][0], "");
- elements[1] = elements[2] = lp_build_const_float(base->gallivm, 0.0f);
- elements[3] = lp_build_const_float(base->gallivm, 1.0f);
-
- LLVMValueRef args[3];
- args[0] = lp_build_gather_values(base->gallivm, elements, 4);
- args[1] = lp_build_const_int32(base->gallivm, next_param++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- break;
- }
- default: {
- LLVMValueRef args[3];
- args[0] = output;
- args[1] = lp_build_const_int32(base->gallivm, next_param++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- break;
- }
- }
- } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- switch (ctx->r600_outputs[i].name) {
- case TGSI_SEMANTIC_COLOR:
- has_color = true;
- if ( color_count < ctx->color_buffer_count) {
- LLVMValueRef args[3];
- args[0] = output;
- if (ctx->fs_color_all) {
- for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
- args[1] = lp_build_const_int32(base->gallivm, j);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- }
- } else {
- args[1] = lp_build_const_int32(base->gallivm, color_count++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- }
- }
- break;
- case TGSI_SEMANTIC_POSITION:
- lp_build_intrinsic_unary(
- base->gallivm->builder,
- "llvm.R600.store.pixel.depth",
- LLVMVoidTypeInContext(base->gallivm->context),
- LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][2], ""));
- break;
- case TGSI_SEMANTIC_STENCIL:
- lp_build_intrinsic_unary(
- base->gallivm->builder,
- "llvm.R600.store.pixel.stencil",
- LLVMVoidTypeInContext(base->gallivm->context),
- LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][1], ""));
- break;
- }
- }
- }
- // Add dummy exports
- if (ctx->type == TGSI_PROCESSOR_VERTEX) {
- if (!next_param) {
- lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy",
- LLVMVoidTypeInContext(base->gallivm->context),
- lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM));
- }
- if (!(next_pos-60)) {
- lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy",
- LLVMVoidTypeInContext(base->gallivm->context),
- lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS));
- }
- }
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- if (!has_color) {
- lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy",
- LLVMVoidTypeInContext(base->gallivm->context),
- lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL));
- }
- }
-
-}
-
-static void llvm_emit_tex(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- struct gallivm_state * gallivm = bld_base->base.gallivm;
- LLVMValueRef args[7];
- unsigned c, sampler_src;
- struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
-
- if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
- switch (emit_data->inst->Instruction.Opcode) {
- case TGSI_OPCODE_TXQ: {
- struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
- ctx->uses_tex_buffers = true;
- bool isEgPlus = (ctx->chip_class >= EVERGREEN);
- LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm,
- isEgPlus ? 0 : 1);
- LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset,
- LLVM_R600_BUFFER_INFO_CONST_BUFFER);
- if (!isEgPlus) {
- LLVMValueRef maskval[4] = {
- lp_build_const_int32(gallivm, 1),
- lp_build_const_int32(gallivm, 2),
- lp_build_const_int32(gallivm, 3),
- lp_build_const_int32(gallivm, 0),
- };
- LLVMValueRef mask = LLVMConstVector(maskval, 4);
- cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval,
- mask, "");
- }
- emit_data->output[0] = cvecval;
- return;
- }
- case TGSI_OPCODE_TXF: {
- args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
- args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
- emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
- "llvm.R600.load.texbuf",
- emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
- if (ctx->chip_class >= EVERGREEN)
- return;
- ctx->uses_tex_buffers = true;
- LLVMDumpValue(emit_data->output[0]);
- emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
- emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4),
- "");
- LLVMValueRef Mask = llvm_load_const_buffer(bld_base,
- lp_build_const_int32(gallivm, 0),
- LLVM_R600_BUFFER_INFO_CONST_BUFFER);
- Mask = LLVMBuildBitCast(gallivm->builder, Mask,
- LLVMVectorType(bld_base->base.int_elem_type, 4), "");
- emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND,
- emit_data->output[0],
- Mask);
- LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder,
- emit_data->output[0], lp_build_const_int32(gallivm, 3), "");
- Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1),
- LLVM_R600_BUFFER_INFO_CONST_BUFFER);
- Mask = LLVMBuildExtractElement(gallivm->builder, Mask,
- lp_build_const_int32(gallivm, 0), "");
- Mask = LLVMBuildBitCast(gallivm->builder, Mask,
- bld_base->base.int_elem_type, "");
- WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR,
- WComponent, Mask);
- emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder,
- emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), "");
- emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
- emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), "");
- }
- return;
- default:
- break;
- }
- }
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX ||
- emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
- LLVMValueRef Vector[4] = {
- LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
- lp_build_const_int32(gallivm, 0), ""),
- LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
- lp_build_const_int32(gallivm, 1), ""),
- LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
- lp_build_const_int32(gallivm, 2), ""),
- LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
- lp_build_const_int32(gallivm, 3), ""),
- };
- switch (emit_data->inst->Texture.Texture) {
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_RECT:
- Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
- break;
- case TGSI_TEXTURE_1D:
- Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
- break;
- default:
- break;
- }
- args[0] = lp_build_gather_values(gallivm, Vector, 4);
- } else {
- args[0] = emit_data->args[0];
- }
-
- assert(emit_data->arg_count + 2 <= Elements(args));
-
- for (c = 1; c < emit_data->arg_count; ++c)
- args[c] = emit_data->args[c];
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
- args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), "");
- args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), "");
- args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), "");
- }
-
- sampler_src = emit_data->inst->Instruction.NumSrcRegs-1;
-
- args[c++] = lp_build_const_int32(gallivm,
- emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS);
- args[c++] = lp_build_const_int32(gallivm,
- emit_data->inst->Src[sampler_src].Register.Index);
- args[c++] = lp_build_const_int32(gallivm,
- emit_data->inst->Texture.Texture);
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
- (emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
- emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
-
- switch (emit_data->inst->Texture.Texture) {
- case TGSI_TEXTURE_2D_MSAA:
- args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D);
- break;
- case TGSI_TEXTURE_2D_ARRAY_MSAA:
- args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY);
- break;
- default:
- break;
- }
-
- if (ctx->has_compressed_msaa_texturing) {
- LLVMValueRef ldptr_args[10] = {
- args[0], // Coord
- args[1], // Offset X
- args[2], // Offset Y
- args[3], // Offset Z
- args[4],
- args[5],
- lp_build_const_int32(gallivm, 1),
- lp_build_const_int32(gallivm, 1),
- lp_build_const_int32(gallivm, 1),
- lp_build_const_int32(gallivm, 1)
- };
- LLVMValueRef ptr = lp_build_intrinsic(gallivm->builder,
- "llvm.R600.ldptr",
- emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
- LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
- lp_build_const_int32(gallivm, 3), "");
- Tmp = LLVMBuildMul(gallivm->builder, Tmp,
- lp_build_const_int32(gallivm, 4), "");
- LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr,
- lp_build_const_int32(gallivm, 0), "");
- ResX = LLVMBuildBitCast(gallivm->builder, ResX,
- bld_base->base.int_elem_type, "");
- Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, "");
- Tmp = LLVMBuildAnd(gallivm->builder, Tmp,
- lp_build_const_int32(gallivm, 0xF), "");
- args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp,
- lp_build_const_int32(gallivm, 3), "");
- args[c++] = lp_build_const_int32(gallivm,
- emit_data->inst->Texture.Texture);
- }
- }
-
- emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
- action->intr_name,
- emit_data->dst_type, args, c, LLVMReadNoneAttribute);
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
- ((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
- emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
- if (emit_data->inst->Dst[0].Register.WriteMask & 4) {
- LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0);
- LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder,
- llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER),
- lp_build_const_int32(gallivm, 0), "");
-
- emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), "");
- struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
- ctx->has_txq_cube_array_z_comp = true;
- }
-}
-
-static void emit_cndlt(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMValueRef float_zero = lp_build_const_float(
- bld_base->base.gallivm, 0.0f);
- LLVMValueRef cmp = LLVMBuildFCmp(
- builder, LLVMRealULT, emit_data->args[0], float_zero, "");
- emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
- cmp, emit_data->args[1], emit_data->args[2], "");
-}
-
-static void dp_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- struct lp_build_context * base = &bld_base->base;
- unsigned chan;
- LLVMValueRef elements[2][4];
- unsigned opcode = emit_data->inst->Instruction.Opcode;
- unsigned dp_components = (opcode == TGSI_OPCODE_DP2 ? 2 :
- (opcode == TGSI_OPCODE_DP3 ? 3 : 4));
- for (chan = 0 ; chan < dp_components; chan++) {
- elements[0][chan] = lp_build_emit_fetch(bld_base,
- emit_data->inst, 0, chan);
- elements[1][chan] = lp_build_emit_fetch(bld_base,
- emit_data->inst, 1, chan);
- }
-
- for ( ; chan < 4; chan++) {
- elements[0][chan] = base->zero;
- elements[1][chan] = base->zero;
- }
-
- /* Fix up for DPH */
- if (opcode == TGSI_OPCODE_DPH) {
- elements[0][TGSI_CHAN_W] = base->one;
- }
-
- emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
- elements[0], 4);
- emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm,
- elements[1], 4);
- emit_data->arg_count = 2;
-
- emit_data->dst_type = base->elem_type;
-}
-
-static struct lp_build_tgsi_action dot_action = {
- .fetch_args = dp_fetch_args,
- .emit = build_tgsi_intrinsic_nomem,
- .intr_name = "llvm.AMDGPU.dp4"
-};
-
-static void txd_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
-
- LLVMValueRef coords[4];
- unsigned chan, src;
- for (src = 0; src < 3; src++) {
- for (chan = 0; chan < 4; chan++)
- coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
-
- emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- }
- emit_data->arg_count = 3;
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-}
-
-
-static void txp_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
- LLVMValueRef src_w;
- unsigned chan;
- LLVMValueRef coords[5];
-
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
- src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
-
- for (chan = 0; chan < 3; chan++ ) {
- LLVMValueRef arg = lp_build_emit_fetch(bld_base,
- emit_data->inst, 0, chan);
- coords[chan] = lp_build_emit_llvm_binary(bld_base,
- TGSI_OPCODE_DIV, arg, src_w);
- }
- coords[3] = bld_base->base.one;
-
- if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
- }
-
- emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- emit_data->arg_count = 1;
-}
-
-static void tex_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
-
- LLVMValueRef coords[5];
- unsigned chan;
- for (chan = 0; chan < 4; chan++) {
- coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
- }
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
- /* These instructions have additional operand that should be packed
- * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
- * That operand should be passed as a float value in the args array
- * right after the coord vector. After packing it's not used anymore,
- * that's why arg_count is not increased */
- coords[4] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
- }
-
- if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
- }
-
- emit_data->arg_count = 1;
- emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-}
-
-static void txf_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
- struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
- const struct tgsi_texture_offset * off = inst->TexOffsets;
- LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
-
- /* fetch tex coords */
- tex_fetch_args(bld_base, emit_data);
-
- /* fetch tex offsets */
- if (inst->Texture.NumOffsets) {
- assert(inst->Texture.NumOffsets == 1);
-
- emit_data->args[1] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleX],
- offset_type);
- emit_data->args[2] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleY],
- offset_type);
- emit_data->args[3] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleZ],
- offset_type);
- } else {
- emit_data->args[1] = bld_base->int_bld.zero;
- emit_data->args[2] = bld_base->int_bld.zero;
- emit_data->args[3] = bld_base->int_bld.zero;
- }
-
- emit_data->arg_count = 4;
-}
-
-LLVMModuleRef r600_tgsi_llvm(
- struct radeon_llvm_context * ctx,
- const struct tgsi_token * tokens)
-{
- struct tgsi_shader_info shader_info;
- struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
- radeon_llvm_context_init(ctx, "r600--");
- LLVMTypeRef Arguments[32];
- unsigned ArgumentsCount = 0;
- for (unsigned i = 0; i < ctx->inputs_count; i++)
- Arguments[ArgumentsCount++] = LLVMVectorType(bld_base->base.elem_type, 4);
- radeon_llvm_create_func(ctx, NULL, 0, Arguments, ArgumentsCount);
- for (unsigned i = 0; i < ctx->inputs_count; i++) {
- LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
- LLVMAddAttribute(P, LLVMInRegAttribute);
- }
- tgsi_scan_shader(tokens, &shader_info);
-
- bld_base->info = &shader_info;
- bld_base->userdata = ctx;
- bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const;
- bld_base->emit_prologue = llvm_emit_prologue;
- bld_base->emit_epilogue = llvm_emit_epilogue;
- ctx->load_input = llvm_load_input;
- ctx->load_system_value = llvm_load_system_value;
-
- bld_base->op_actions[TGSI_OPCODE_DP2] = dot_action;
- bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action;
- bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action;
- bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action;
- bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
- bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
- bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TEX2].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
- bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
- bld_base->op_actions[TGSI_OPCODE_TXB2].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
- bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
- bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
- bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
- bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
- bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
-
- lp_build_tgsi_llvm(bld_base, tokens);
-
- LLVMBuildRetVoid(bld_base->base.gallivm->builder);
- radeon_llvm_finalize_module(ctx);
-
- return ctx->gallivm.module;
-}
-
-/* We need to define these R600 registers here, because we can't include
- * evergreend.h and r600d.h.
- */
-#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
-#define R_028850_SQ_PGM_RESOURCES_PS 0x028850
-
-void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
- struct r600_bytecode *bc,
- uint64_t symbol_offset,
- boolean *use_kill)
-{
- unsigned i;
- const unsigned char *config =
- radeon_shader_binary_config_start(binary, symbol_offset);
-
- for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
- unsigned reg =
- util_le32_to_cpu(*(uint32_t*)(config + i));
- unsigned value =
- util_le32_to_cpu(*(uint32_t*)(config + i + 4));
- switch (reg) {
- /* R600 / R700 */
- case R_028850_SQ_PGM_RESOURCES_PS:
- case R_028868_SQ_PGM_RESOURCES_VS:
- /* Evergreen / Northern Islands */
- case R_028844_SQ_PGM_RESOURCES_PS:
- case R_028860_SQ_PGM_RESOURCES_VS:
- case R_0288D4_SQ_PGM_RESOURCES_LS:
- bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value));
- bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value));
- break;
- case R_02880C_DB_SHADER_CONTROL:
- *use_kill = G_02880C_KILL_ENABLE(value);
- break;
- case R_0288E8_SQ_LDS_ALLOC:
- bc->nlds_dw = value;
- break;
- }
- }
-
-}
-
-unsigned r600_create_shader(struct r600_bytecode *bc,
- const struct radeon_shader_binary *binary,
- boolean *use_kill)
-
-{
- assert(binary->code_size % 4 == 0);
- bc->bytecode = CALLOC(1, binary->code_size);
- memcpy(bc->bytecode, binary->code, binary->code_size);
- bc->ndw = binary->code_size / 4;
-
- r600_shader_binary_read_config(binary, bc, 0, use_kill);
-
- return 0;
-}
-
-void r600_destroy_shader(struct r600_bytecode *bc)
-{
- FREE(bc->bytecode);
-}
-
-unsigned r600_llvm_compile(
- LLVMModuleRef mod,
- enum radeon_family family,
- struct r600_bytecode *bc,
- boolean *use_kill,
- unsigned dump,
- struct pipe_debug_callback *debug)
-{
- unsigned r;
- struct radeon_shader_binary binary;
- const char * gpu_family = r600_get_llvm_processor_name(family);
-
- radeon_shader_binary_init(&binary);
- if (dump)
- LLVMDumpModule(mod);
- r = radeon_llvm_compile(mod, &binary, gpu_family, NULL, debug);
-
- r = r600_create_shader(bc, &binary, use_kill);
-
- radeon_shader_binary_clean(&binary);
-
- return r;
-}
-
-#endif
diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h
deleted file mode 100644
index 3f7fc4b..0000000
--- a/src/gallium/drivers/r600/r600_llvm.h
+++ /dev/null
@@ -1,42 +0,0 @@
-
-#ifndef R600_LLVM_H
-#define R600_LLVM_H
-
-#if defined R600_USE_LLVM || defined HAVE_OPENCL
-
-#include "radeon/radeon_llvm.h"
-#include <llvm-c/Core.h>
-
-struct pipe_debug_callback;
-struct r600_bytecode;
-struct r600_shader_ctx;
-struct radeon_llvm_context;
-struct radeon_shader_binary;
-enum radeon_family;
-
-LLVMModuleRef r600_tgsi_llvm(
- struct radeon_llvm_context * ctx,
- const struct tgsi_token * tokens);
-
-unsigned r600_llvm_compile(
- LLVMModuleRef mod,
- enum radeon_family family,
- struct r600_bytecode *bc,
- boolean *use_kill,
- unsigned dump,
- struct pipe_debug_callback *debug);
-
-unsigned r600_create_shader(struct r600_bytecode *bc,
- const struct radeon_shader_binary *binary,
- boolean *use_kill);
-
-void r600_destroy_shader(struct r600_bytecode *bc);
-
-void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
- struct r600_bytecode *bc,
- uint64_t symbol_offset,
- boolean *use_kill);
-
-#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
-
-#endif /* R600_LLVM_H */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 7018088..b801191 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -43,9 +43,6 @@
static const struct debug_named_value r600_debug_options[] = {
/* features */
-#if defined(R600_USE_LLVM)
- { "llvm", DBG_LLVM, "Enable the LLVM shader compiler" },
-#endif
{ "nocpdma", DBG_NO_CP_DMA, "Disable CP DMA" },
/* shader backend */
@@ -187,9 +184,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen,
}
rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
- r600_context_gfx_flush, rctx,
- rscreen->b.trace_bo ?
- rscreen->b.trace_bo->buf : NULL);
+ r600_context_gfx_flush, rctx);
rctx->b.gfx.flush = r600_context_gfx_flush;
rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 256,
@@ -622,8 +617,6 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS | DBG_TCS | DBG_TES;
if (!debug_get_bool_option("R600_HYPERZ", TRUE))
rscreen->b.debug_flags |= DBG_NO_HYPERZ;
- if (debug_get_bool_option("R600_LLVM", FALSE))
- rscreen->b.debug_flags |= DBG_LLVM;
if (rscreen->b.family == CHIP_UNKNOWN) {
fprintf(stderr, "r600: Unknown chipset 0x%04X\n", rscreen->b.info.pci_id);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index f8a2039..cd0052a 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -28,8 +28,6 @@
#include "radeon/r600_pipe_common.h"
#include "radeon/r600_cs.h"
-
-#include "r600_llvm.h"
#include "r600_public.h"
#include "util/u_suballoc.h"
@@ -60,7 +58,6 @@
/* the number of CS dwords for flushing and drawing */
#define R600_MAX_FLUSH_CS_DWORDS 16
#define R600_MAX_DRAW_CS_DWORDS 58
-#define R600_TRACE_CS_DWORDS 7
#define R600_MAX_USER_CONST_BUFFERS 13
#define R600_MAX_DRIVER_CONST_BUFFERS 3
@@ -244,7 +241,6 @@ struct r600_gs_rings_state {
/* This must start from 16. */
/* features */
-#define DBG_LLVM (1 << 29)
#define DBG_NO_CP_DMA (1 << 30)
/* shader backend */
#define DBG_NO_SB (1 << 21)
@@ -571,15 +567,10 @@ static inline void r600_mark_atom_dirty(struct r600_context *rctx,
r600_set_atom_dirty(rctx, atom, true);
}
-void r600_trace_emit(struct r600_context *rctx);
-
static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
{
atom->emit(&rctx->b, atom);
r600_set_atom_dirty(rctx, atom, false);
- if (rctx->screen->b.trace_bo) {
- r600_trace_emit(rctx);
- }
}
static inline void r600_set_cso_state(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index df40f94..77658f5 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -21,7 +21,6 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "r600_sq.h"
-#include "r600_llvm.h"
#include "r600_formats.h"
#include "r600_opcodes.h"
#include "r600_shader.h"
@@ -194,10 +193,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
/* disable SB for shaders using doubles */
use_sb &= !shader->shader.uses_doubles;
- /* Check if the bytecode has already been built. When using the llvm
- * backend, r600_shader_from_tgsi() will take care of building the
- * bytecode.
- */
+ /* Check if the bytecode has already been built. */
if (!shader->shader.bc.bytecode) {
r = r600_bytecode_build(&shader->shader.bc);
if (r) {
@@ -332,7 +328,6 @@ struct r600_shader_ctx {
uint32_t *literals;
uint32_t nliterals;
uint32_t max_driver_temp_used;
- boolean use_llvm;
/* needed for evergreen interpolation */
struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid
/* evergreen/cayman also store sample mask in face register */
@@ -661,11 +656,9 @@ static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
if (ctx->shader->input[index].interpolate > 0) {
evergreen_interp_assign_ij_index(ctx, index);
- if (!ctx->use_llvm)
- r = evergreen_interp_alu(ctx, index);
+ r = evergreen_interp_alu(ctx, index);
} else {
- if (!ctx->use_llvm)
- r = evergreen_interp_flat(ctx, index);
+ r = evergreen_interp_flat(ctx, index);
}
}
return r;
@@ -2936,22 +2929,16 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
int i, j, k, r = 0;
int next_param_base = 0, next_clip_base;
int max_color_exports = MAX2(key.ps.nr_cbufs, 1);
- /* Declarations used by llvm code */
- bool use_llvm = false;
bool indirect_gprs;
bool ring_outputs = false;
bool lds_outputs = false;
bool lds_inputs = false;
bool pos_emitted = false;
-#ifdef R600_USE_LLVM
- use_llvm = rscreen->b.debug_flags & DBG_LLVM;
-#endif
ctx.bc = &shader->bc;
ctx.shader = shader;
ctx.native_integers = true;
-
r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
rscreen->has_compressed_msaa_texturing);
ctx.tokens = tokens;
@@ -3043,19 +3030,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.file_offset[i] = 0;
}
-#ifdef R600_USE_LLVM
- if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) {
- fprintf(stderr, "Warning: R600 LLVM backend does not support "
- "indirect adressing. Falling back to TGSI "
- "backend.\n");
- use_llvm = 0;
- }
-#endif
if (ctx.type == TGSI_PROCESSOR_VERTEX) {
ctx.file_offset[TGSI_FILE_INPUT] = 1;
- if (!use_llvm) {
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
- }
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
}
if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
if (ctx.bc->chip_class >= EVERGREEN)
@@ -3085,16 +3062,10 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
if (add_tess_inout)
ctx.file_offset[TGSI_FILE_INPUT]+=2;
}
- ctx.use_llvm = use_llvm;
- if (use_llvm) {
- ctx.file_offset[TGSI_FILE_OUTPUT] =
- ctx.file_offset[TGSI_FILE_INPUT];
- } else {
- ctx.file_offset[TGSI_FILE_OUTPUT] =
+ ctx.file_offset[TGSI_FILE_OUTPUT] =
ctx.file_offset[TGSI_FILE_INPUT] +
ctx.info.file_max[TGSI_FILE_INPUT] + 1;
- }
ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
@@ -3234,71 +3205,12 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
}
}
-/* LLVM backend setup */
-#ifdef R600_USE_LLVM
- if (use_llvm) {
- struct radeon_llvm_context radeon_llvm_ctx;
- LLVMModuleRef mod;
- bool dump = r600_can_dump_shader(&rscreen->b,
- tgsi_get_processor_type(tokens));
- boolean use_kill = false;
-
- memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
- radeon_llvm_ctx.type = ctx.type;
- radeon_llvm_ctx.two_side = shader->two_side;
- radeon_llvm_ctx.face_gpr = ctx.face_gpr;
- radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1;
- radeon_llvm_ctx.r600_inputs = ctx.shader->input;
- radeon_llvm_ctx.r600_outputs = ctx.shader->output;
- radeon_llvm_ctx.color_buffer_count = max_color_exports;
- radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
- radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
- radeon_llvm_ctx.stream_outputs = &so;
- radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one;
- radeon_llvm_ctx.has_compressed_msaa_texturing =
- ctx.bc->has_compressed_msaa_texturing;
- mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
- ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp;
- ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers;
-
- if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill,
- dump, &rctx->b.debug)) {
- radeon_llvm_dispose(&radeon_llvm_ctx);
- use_llvm = 0;
- fprintf(stderr, "R600 LLVM backend failed to compile "
- "shader. Falling back to TGSI\n");
- } else {
- ctx.file_offset[TGSI_FILE_OUTPUT] =
- ctx.file_offset[TGSI_FILE_INPUT];
- }
- if (use_kill)
- ctx.shader->uses_kill = use_kill;
- radeon_llvm_dispose(&radeon_llvm_ctx);
- }
-#endif
-/* End of LLVM backend setup */
-
if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
shader->nr_ps_max_color_exports = 8;
- if (!use_llvm) {
- if (ctx.fragcoord_input >= 0) {
- if (ctx.bc->chip_class == CAYMAN) {
- for (j = 0 ; j < 4; j++) {
- struct r600_bytecode_alu alu;
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_RECIP_IEEE;
- alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
- alu.src[0].chan = 3;
-
- alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
- alu.dst.chan = j;
- alu.dst.write = (j == 3);
- alu.last = 1;
- if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
- return r;
- }
- } else {
+ if (ctx.fragcoord_input >= 0) {
+ if (ctx.bc->chip_class == CAYMAN) {
+ for (j = 0 ; j < 4; j++) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_RECIP_IEEE;
@@ -3306,87 +3218,100 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
alu.src[0].chan = 3;
alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
- alu.dst.chan = 3;
- alu.dst.write = 1;
+ alu.dst.chan = j;
+ alu.dst.write = (j == 3);
alu.last = 1;
if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
return r;
}
- }
-
- if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ } else {
struct r600_bytecode_alu alu;
- int r;
-
- /* GS thread with no output workaround - emit a cut at start of GS */
- if (ctx.bc->chip_class == R600)
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX);
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_RECIP_IEEE;
+ alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
+ alu.src[0].chan = 3;
- for (j = 0; j < 4; j++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_MOV;
- alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[0].value = 0;
- alu.dst.sel = ctx.gs_export_gpr_tregs[j];
- alu.dst.write = 1;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx.bc, &alu);
- if (r)
- return r;
- }
+ alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+ if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
+ return r;
}
+ }
+
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ struct r600_bytecode_alu alu;
+ int r;
- if (ctx.type == TGSI_PROCESSOR_TESS_CTRL)
- r600_fetch_tess_io_info(&ctx);
+ /* GS thread with no output workaround - emit a cut at start of GS */
+ if (ctx.bc->chip_class == R600)
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX);
- if (shader->two_side && ctx.colors_used) {
- if ((r = process_twoside_color_inputs(&ctx)))
+ for (j = 0; j < 4; j++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0;
+ alu.dst.sel = ctx.gs_export_gpr_tregs[j];
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx.bc, &alu);
+ if (r)
return r;
}
+ }
- tgsi_parse_init(&ctx.parse, tokens);
- while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
- tgsi_parse_token(&ctx.parse);
- switch (ctx.parse.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- r = tgsi_is_supported(&ctx);
- if (r)
- goto out_err;
- ctx.max_driver_temp_used = 0;
- /* reserve first tmp for everyone */
- r600_get_temp(&ctx);
+ if (ctx.type == TGSI_PROCESSOR_TESS_CTRL)
+ r600_fetch_tess_io_info(&ctx);
+
+ if (shader->two_side && ctx.colors_used) {
+ if ((r = process_twoside_color_inputs(&ctx)))
+ return r;
+ }
- opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
- if ((r = tgsi_split_constant(&ctx)))
+ tgsi_parse_init(&ctx.parse, tokens);
+ while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
+ tgsi_parse_token(&ctx.parse);
+ switch (ctx.parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ r = tgsi_is_supported(&ctx);
+ if (r)
+ goto out_err;
+ ctx.max_driver_temp_used = 0;
+ /* reserve first tmp for everyone */
+ r600_get_temp(&ctx);
+
+ opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
+ if ((r = tgsi_split_constant(&ctx)))
+ goto out_err;
+ if ((r = tgsi_split_literal_constant(&ctx)))
+ goto out_err;
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ if ((r = tgsi_split_gs_inputs(&ctx)))
goto out_err;
- if ((r = tgsi_split_literal_constant(&ctx)))
+ } else if (lds_inputs) {
+ if ((r = tgsi_split_lds_inputs(&ctx)))
goto out_err;
- if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
- if ((r = tgsi_split_gs_inputs(&ctx)))
- goto out_err;
- } else if (lds_inputs) {
- if ((r = tgsi_split_lds_inputs(&ctx)))
- goto out_err;
- }
- if (ctx.bc->chip_class == CAYMAN)
- ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
- else if (ctx.bc->chip_class >= EVERGREEN)
- ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
- else
- ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
- r = ctx.inst_info->process(&ctx);
+ }
+ if (ctx.bc->chip_class == CAYMAN)
+ ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
+ else if (ctx.bc->chip_class >= EVERGREEN)
+ ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
+ else
+ ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
+ r = ctx.inst_info->process(&ctx);
+ if (r)
+ goto out_err;
+
+ if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) {
+ r = r600_store_tcs_output(&ctx);
if (r)
goto out_err;
-
- if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) {
- r = r600_store_tcs_output(&ctx);
- if (r)
- goto out_err;
- }
- break;
- default:
- break;
}
+ break;
+ default:
+ break;
}
}
@@ -3437,8 +3362,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
alu.dst.write = (j == ochan);
if (j == 3)
alu.last = 1;
- if (!use_llvm)
- r = r600_bytecode_add_alu(ctx.bc, &alu);
+ r = r600_bytecode_add_alu(ctx.bc, &alu);
if (r)
return r;
}
@@ -3446,7 +3370,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
}
/* Add stream outputs. */
- if (!use_llvm && so.num_outputs) {
+ if (so.num_outputs) {
bool emit = false;
if (!lds_outputs && !ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX)
emit = true;
@@ -3709,31 +3633,27 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
}
}
/* add output to bytecode */
- if (!use_llvm) {
- for (i = 0; i < noutput; i++) {
- r = r600_bytecode_add_output(ctx.bc, &output[i]);
- if (r)
- goto out_err;
- }
+ for (i = 0; i < noutput; i++) {
+ r = r600_bytecode_add_output(ctx.bc, &output[i]);
+ if (r)
+ goto out_err;
}
}
/* add program end */
- if (!use_llvm) {
- if (ctx.bc->chip_class == CAYMAN)
- cm_bytecode_add_cf_end(ctx.bc);
- else {
- const struct cf_op_info *last = NULL;
+ if (ctx.bc->chip_class == CAYMAN)
+ cm_bytecode_add_cf_end(ctx.bc);
+ else {
+ const struct cf_op_info *last = NULL;
- if (ctx.bc->cf_last)
- last = r600_isa_cf(ctx.bc->cf_last->op);
+ if (ctx.bc->cf_last)
+ last = r600_isa_cf(ctx.bc->cf_last->op);
- /* alu clause instructions don't have EOP bit, so add NOP */
- if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS)
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+ /* alu clause instructions don't have EOP bit, so add NOP */
+ if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS)
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
- ctx.bc->cf_last->end_of_program = 1;
- }
+ ctx.bc->cf_last->end_of_program = 1;
}
/* check GPR limit - we have 124 = 128 - 4
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 2211e07..df41d3f 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2029,10 +2029,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SQ_NON_EVENT);
}
- if (rctx->screen->b.trace_bo) {
- r600_trace_emit(rctx);
- }
-
/* Set the depth buffer as dirty. */
if (rctx->framebuffer.state.zsbuf) {
struct pipe_surface *surf = rctx->framebuffer.state.zsbuf;
@@ -2927,22 +2923,3 @@ void r600_init_common_state_functions(struct r600_context *rctx)
rctx->b.set_occlusion_query_state = r600_set_occlusion_query_state;
rctx->b.need_gfx_cs_space = r600_need_gfx_cs_space;
}
-
-void r600_trace_emit(struct r600_context *rctx)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
- uint64_t va;
- uint32_t reloc;
-
- va = rscreen->b.trace_bo->gpu_address;
- reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rscreen->b.trace_bo,
- RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
- radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
- radeon_emit(cs, va & 0xFFFFFFFFUL);
- radeon_emit(cs, (va >> 32UL) & 0xFFUL);
- radeon_emit(cs, cs->cdw);
- radeon_emit(cs, rscreen->b.cs_count);
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, reloc);
-}
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 556a05d..3dd3a48 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -598,9 +598,13 @@ bool expr_handler::fold_assoc(alu_node *n) {
unsigned op = n->bc.op;
bool allow_neg = false, cur_neg = false;
+ bool distribute_neg = false;
switch(op) {
case ALU_OP2_ADD:
+ distribute_neg = true;
+ allow_neg = true;
+ break;
case ALU_OP2_MUL:
case ALU_OP2_MUL_IEEE:
allow_neg = true;
@@ -632,7 +636,7 @@ bool expr_handler::fold_assoc(alu_node *n) {
if (v1->is_const()) {
literal arg = v1->get_const_value();
apply_alu_src_mod(a->bc, 1, arg);
- if (cur_neg)
+ if (cur_neg && distribute_neg)
arg.f = -arg.f;
if (a == n)
@@ -660,7 +664,7 @@ bool expr_handler::fold_assoc(alu_node *n) {
if (v0->is_const()) {
literal arg = v0->get_const_value();
apply_alu_src_mod(a->bc, 0, arg);
- if (cur_neg)
+ if (cur_neg && distribute_neg)
arg.f = -arg.f;
if (last_arg == 0) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index ea02827..eed9d83 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -229,7 +229,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
struct radeon_winsys_cs *cs = rctx->dma.cs;
if (cs->cdw)
- rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence, 0);
+ rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
if (fence)
rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
}
@@ -318,7 +318,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
if (rscreen->info.has_sdma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
r600_flush_dma_ring,
- rctx, NULL);
+ rctx);
rctx->dma.flush = r600_flush_dma_ring;
}
@@ -379,7 +379,6 @@ static const struct debug_named_value common_debug_options[] = {
{ "tex", DBG_TEX, "Print texture info" },
{ "compute", DBG_COMPUTE, "Print compute info" },
{ "vm", DBG_VM, "Print virtual addresses when creating resources" },
- { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
{ "info", DBG_INFO, "Print driver information" },
/* shaders */
@@ -893,19 +892,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
pipe_mutex_init(rscreen->aux_context_lock);
pipe_mutex_init(rscreen->gpu_load_mutex);
- if (((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 28) ||
- rscreen->info.drm_major == 3) &&
- (rscreen->debug_flags & DBG_TRACE_CS)) {
- rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b,
- PIPE_BIND_CUSTOM,
- PIPE_USAGE_STAGING,
- 4096);
- if (rscreen->trace_bo) {
- rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->buf, NULL,
- PIPE_TRANSFER_UNSYNCHRONIZED);
- }
- }
-
if (rscreen->debug_flags & DBG_INFO) {
printf("pci_id = 0x%x\n", rscreen->info.pci_id);
printf("family = %i (%s)\n", rscreen->info.family,
@@ -951,9 +937,6 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen)
pipe_mutex_destroy(rscreen->aux_context_lock);
rscreen->aux_context->destroy(rscreen->aux_context);
- if (rscreen->trace_bo)
- pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
-
rscreen->ws->destroy(rscreen->ws);
FREE(rscreen);
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index cf8dcf7..381ad21 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -61,7 +61,7 @@
/* gap - reuse */
#define DBG_COMPUTE (1 << 2)
#define DBG_VM (1 << 3)
-#define DBG_TRACE_CS (1 << 4)
+/* gap - reuse */
/* shader logging */
#define DBG_FS (1 << 5)
#define DBG_VS (1 << 6)
@@ -303,10 +303,6 @@ struct r600_common_screen {
struct pipe_context *aux_context;
pipe_mutex aux_context_lock;
- struct r600_resource *trace_bo;
- uint32_t *trace_ptr;
- unsigned cs_count;
-
/* This must be in the screen, because UE4 uses one context for
* compilation and another one for rendering.
*/
@@ -610,6 +606,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct r600_atom *fb_state,
unsigned *buffers, unsigned *dirty_cbufs,
const union pipe_color_union *color);
+void r600_texture_disable_dcc(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex);
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
void r600_init_context_texture_functions(struct r600_common_context *rctx);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 115c728..7322f3e 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -201,9 +201,11 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
static int r600_setup_surface(struct pipe_screen *screen,
struct r600_texture *rtex,
- unsigned pitch_in_bytes_override)
+ unsigned pitch_in_bytes_override,
+ unsigned offset)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ unsigned i;
int r;
r = rscreen->ws->surface_init(rscreen->ws, &rtex->surface);
@@ -225,6 +227,11 @@ static int r600_setup_surface(struct pipe_screen *screen,
rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size;
}
}
+
+ if (offset) {
+ for (i = 0; i < Elements(rtex->surface.level); ++i)
+ rtex->surface.level[i].offset += offset;
+ }
return 0;
}
@@ -290,8 +297,8 @@ static void r600_texture_disable_cmask(struct r600_common_screen *rscreen,
p_atomic_inc(&rscreen->compressed_colortex_counter);
}
-static void r600_texture_disable_dcc(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
+void r600_texture_disable_dcc(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
{
struct r600_common_context *rctx =
(struct r600_common_context *)rscreen->aux_context;
@@ -366,6 +373,8 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
return rscreen->ws->buffer_get_handle(res->buf,
rtex->surface.level[0].pitch_bytes,
+ rtex->surface.level[0].offset,
+ rtex->surface.level[0].slice_size,
whandle);
}
@@ -629,8 +638,14 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
return 0;
- /* Overalign HTILE on Stoney to fix piglit/depthstencil-render-miplevels 585. */
- if (rscreen->family == CHIP_STONEY)
+ /* Overalign HTILE on P2 configs to work around GPU hangs in
+ * piglit/depthstencil-render-miplevels 585.
+ *
+ * This has been confirmed to help Kabini & Stoney, where the hangs
+ * are always reproducible. I think I have seen the test hang
+ * on Carrizo too, though it was very rare there.
+ */
+ if (rscreen->chip_class >= CIK && num_pipes < 4)
num_pipes = 4;
switch (num_pipes) {
@@ -791,6 +806,7 @@ static struct r600_texture *
r600_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
unsigned pitch_in_bytes_override,
+ unsigned offset,
struct pb_buffer *buf,
struct radeon_surf *surface)
{
@@ -812,7 +828,7 @@ r600_texture_create_object(struct pipe_screen *screen,
rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
rtex->surface = *surface;
- if (r600_setup_surface(screen, rtex, pitch_in_bytes_override)) {
+ if (r600_setup_surface(screen, rtex, pitch_in_bytes_override, offset)) {
FREE(rtex);
return NULL;
}
@@ -979,7 +995,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
if (r) {
return NULL;
}
- return (struct pipe_resource *)r600_texture_create_object(screen, templ,
+ return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0,
0, NULL, &surface);
}
@@ -990,7 +1006,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct pb_buffer *buf = NULL;
- unsigned stride = 0;
+ unsigned stride = 0, offset = 0;
unsigned array_mode;
struct radeon_surf surface;
int r;
@@ -1002,7 +1018,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
templ->depth0 != 1 || templ->last_level != 0)
return NULL;
- buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride);
+ buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset);
if (!buf)
return NULL;
@@ -1029,8 +1045,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
if (metadata.scanout)
surface.flags |= RADEON_SURF_SCANOUT;
- rtex = r600_texture_create_object(screen, templ,
- stride, buf, &surface);
+ rtex = r600_texture_create_object(screen, templ, stride,
+ offset, buf, &surface);
if (!rtex)
return NULL;
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index bdee2f8..0a164bb 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -51,24 +51,8 @@ struct radeon_llvm_loop {
};
struct radeon_llvm_context {
-
struct lp_build_tgsi_soa_context soa;
- unsigned chip_class;
- unsigned type;
- unsigned face_gpr;
- unsigned two_side;
- unsigned inputs_count;
- struct r600_shader_io * r600_inputs;
- struct r600_shader_io * r600_outputs;
- struct pipe_stream_output_info *stream_outputs;
- unsigned color_buffer_count;
- unsigned fs_color_all;
- unsigned alpha_to_one;
- unsigned has_txq_cube_array_z_comp;
- unsigned uses_tex_buffers;
- unsigned has_compressed_msaa_texturing;
-
/*=== Front end configuration ===*/
/* Instructions that are not described by any of the TGSI opcodes. */
@@ -90,7 +74,6 @@ struct radeon_llvm_context {
*/
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
- unsigned output_reg_count;
/** This pointer is used to contain the temporary values.
* The amount of temporary used in tgsi can't be bound to a max value and
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index c74397f..fb883cb 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -363,9 +363,6 @@ static void emit_declaration(
ctx->soa.bld_base.base.elem_type, "");
}
}
-
- ctx->output_reg_count = MAX2(ctx->output_reg_count,
- decl->Range.Last + 1);
break;
}
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index b8efc58..233f460 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -92,7 +92,7 @@ struct ruvd_decoder {
/* flush IB to the hardware */
static void flush(struct ruvd_decoder *dec)
{
- dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, NULL, 0);
+ dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, NULL);
}
/* add a new set register command to the IB */
@@ -1142,7 +1142,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
dec->stream_handle = rvid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
- dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, NULL);
+ dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL);
if (!dec->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index 087d942..2ab74e9 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -56,7 +56,7 @@
*/
static void flush(struct rvce_encoder *enc)
{
- enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL, 0);
+ enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL);
enc->task_info_idx = 0;
enc->bs_idx = 0;
}
@@ -429,7 +429,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
enc->screen = context->screen;
enc->ws = ws;
- enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc, NULL);
+ enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc);
if (!enc->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index b8a0659..d35e963 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -515,7 +515,7 @@ struct radeon_winsys {
*/
struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws,
struct winsys_handle *whandle,
- unsigned *stride);
+ unsigned *stride, unsigned *offset);
/**
* Get a winsys buffer from a user pointer. The resulting buffer can't
@@ -546,7 +546,8 @@ struct radeon_winsys {
* \return TRUE on success.
*/
boolean (*buffer_get_handle)(struct pb_buffer *buf,
- unsigned stride,
+ unsigned stride, unsigned offset,
+ unsigned slice_size,
struct winsys_handle *whandle);
/**
@@ -592,14 +593,12 @@ struct radeon_winsys {
* \param ring_type The ring type (GFX, DMA, UVD)
* \param flush Flush callback function associated with the command stream.
* \param user User pointer that will be passed to the flush callback.
- * \param trace_buf Trace buffer when tracing is enabled
*/
struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys_ctx *ctx,
enum ring_type ring_type,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
- void *flush_ctx,
- struct pb_buffer *trace_buf);
+ void *flush_ctx);
/**
* Destroy a command stream.
@@ -672,12 +671,10 @@ struct radeon_winsys {
* \param flags, RADEON_FLUSH_ASYNC or 0.
* \param fence Pointer to a fence. If non-NULL, a fence is inserted
* after the CS and is returned through this parameter.
- * \param cs_trace_id A unique identifier of the cs, used for tracing.
*/
void (*cs_flush)(struct radeon_winsys_cs *cs,
unsigned flags,
- struct pipe_fence_handle **fence,
- uint32_t cs_trace_id);
+ struct pipe_fence_handle **fence);
/**
* Return TRUE if a buffer is referenced by a command stream.
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index f9a6de4..e0dbec5 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -325,8 +325,8 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
}
static void
-si_decompress_color_textures(struct si_context *sctx,
- struct si_textures_info *textures)
+si_decompress_sampler_color_textures(struct si_context *sctx,
+ struct si_textures_info *textures)
{
unsigned i;
unsigned mask = textures->compressed_colortex_mask;
@@ -350,6 +350,33 @@ si_decompress_color_textures(struct si_context *sctx,
}
}
+static void
+si_decompress_image_color_textures(struct si_context *sctx,
+ struct si_images_info *images)
+{
+ unsigned i;
+ unsigned mask = images->compressed_colortex_mask;
+
+ while (mask) {
+ const struct pipe_image_view *view;
+ struct r600_texture *tex;
+
+ i = u_bit_scan(&mask);
+
+ view = &images->views[i];
+ assert(view->resource->target != PIPE_BUFFER);
+
+ tex = (struct r600_texture *)view->resource;
+ if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
+ continue;
+
+ si_blit_decompress_color(&sctx->b.b, tex,
+ view->u.tex.level, view->u.tex.level,
+ 0, util_max_layer(&tex->resource.b.b, view->u.tex.level),
+ false);
+ }
+}
+
void si_decompress_textures(struct si_context *sctx)
{
unsigned compressed_colortex_counter;
@@ -370,7 +397,10 @@ void si_decompress_textures(struct si_context *sctx)
si_flush_depth_textures(sctx, &sctx->samplers[i]);
}
if (sctx->samplers[i].compressed_colortex_mask) {
- si_decompress_color_textures(sctx, &sctx->samplers[i]);
+ si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]);
+ }
+ if (sctx->images[i].compressed_colortex_mask) {
+ si_decompress_image_color_textures(sctx, &sctx->images[i]);
}
}
}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index d12b3e6..815b87b 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -64,7 +64,8 @@
#include "util/u_upload_mgr.h"
-/* NULL image and buffer descriptor.
+/* NULL image and buffer descriptor for textures (alpha = 1) and images
+ * (alpha = 0).
*
* For images, all fields must be zero except for the swizzle, which
* supports arbitrary combinations of 0s and 1s. The texture type must be
@@ -74,7 +75,7 @@
*
* This is the only reason why the buffer descriptor must be in words [4:7].
*/
-static uint32_t null_descriptor[8] = {
+static uint32_t null_texture_descriptor[8] = {
0,
0,
0,
@@ -84,10 +85,20 @@ static uint32_t null_descriptor[8] = {
* descriptor */
};
+static uint32_t null_image_descriptor[8] = {
+ 0,
+ 0,
+ 0,
+ S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
+ /* the rest must contain zeros, which is also used by the buffer
+ * descriptor */
+};
+
static void si_init_descriptors(struct si_descriptors *desc,
unsigned shader_userdata_index,
unsigned element_dw_size,
- unsigned num_elements)
+ unsigned num_elements,
+ const uint32_t *null_descriptor)
{
int i;
@@ -100,10 +111,12 @@ static void si_init_descriptors(struct si_descriptors *desc,
desc->shader_userdata_offset = shader_userdata_index * 4;
/* Initialize the array to NULL descriptors if the element size is 8. */
- if (element_dw_size % 8 == 0)
+ if (null_descriptor) {
+ assert(element_dw_size % 8 == 0);
for (i = 0; i < num_elements * element_dw_size / 8; i++)
- memcpy(desc->list + i*8, null_descriptor,
- sizeof(null_descriptor));
+ memcpy(desc->list + i * 8, null_descriptor,
+ 8 * 4);
+ }
}
static void si_release_descriptors(struct si_descriptors *desc)
@@ -210,7 +223,7 @@ static void si_set_sampler_view(struct si_context *sctx,
} else {
/* Disable FMASK and bind sampler state in [12:15]. */
memcpy(views->desc.list + slot*16 + 8,
- null_descriptor, 4*4);
+ null_texture_descriptor, 4*4);
if (views->sampler_states[slot])
memcpy(views->desc.list + slot*16 + 12,
@@ -220,9 +233,9 @@ static void si_set_sampler_view(struct si_context *sctx,
views->desc.enabled_mask |= 1llu << slot;
} else {
pipe_sampler_view_reference(&views->views[slot], NULL);
- memcpy(views->desc.list + slot*16, null_descriptor, 8*4);
+ memcpy(views->desc.list + slot*16, null_texture_descriptor, 8*4);
/* Only clear the lower dwords of FMASK. */
- memcpy(views->desc.list + slot*16 + 8, null_descriptor, 4*4);
+ memcpy(views->desc.list + slot*16 + 8, null_texture_descriptor, 4*4);
views->desc.enabled_mask &= ~(1llu << slot);
}
@@ -301,6 +314,160 @@ si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers)
}
}
+/* IMAGE VIEWS */
+
+static void
+si_release_image_views(struct si_images_info *images)
+{
+ unsigned i;
+
+ for (i = 0; i < SI_NUM_IMAGES; ++i) {
+ struct pipe_image_view *view = &images->views[i];
+
+ pipe_resource_reference(&view->resource, NULL);
+ }
+
+ si_release_descriptors(&images->desc);
+}
+
+static void
+si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *images)
+{
+ uint mask = images->desc.enabled_mask;
+
+ /* Add buffers to the CS. */
+ while (mask) {
+ int i = u_bit_scan(&mask);
+ struct pipe_image_view *view = &images->views[i];
+
+ assert(view->resource);
+
+ si_sampler_view_add_buffer(sctx, view->resource);
+ }
+
+ if (images->desc.buffer) {
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ images->desc.buffer,
+ RADEON_USAGE_READ,
+ RADEON_PRIO_DESCRIPTORS);
+ }
+}
+
+static void
+si_disable_shader_image(struct si_images_info *images, unsigned slot)
+{
+ if (images->desc.enabled_mask & (1llu << slot)) {
+ pipe_resource_reference(&images->views[slot].resource, NULL);
+ images->compressed_colortex_mask &= ~(1 << slot);
+
+ memcpy(images->desc.list + slot*8, null_image_descriptor, 8*4);
+ images->desc.enabled_mask &= ~(1llu << slot);
+ images->desc.list_dirty = true;
+ }
+}
+
+static void
+si_set_shader_images(struct pipe_context *pipe, unsigned shader,
+ unsigned start_slot, unsigned count,
+ struct pipe_image_view *views)
+{
+ struct si_context *ctx = (struct si_context *)pipe;
+ struct si_screen *screen = ctx->screen;
+ struct si_images_info *images = &ctx->images[shader];
+ unsigned i, slot;
+
+ assert(shader < SI_NUM_SHADERS);
+
+ if (!count)
+ return;
+
+ assert(start_slot + count <= SI_NUM_IMAGES);
+
+ for (i = 0, slot = start_slot; i < count; ++i, ++slot) {
+ struct r600_resource *res;
+
+ if (!views || !views[i].resource) {
+ si_disable_shader_image(images, slot);
+ continue;
+ }
+
+ res = (struct r600_resource *)views[i].resource;
+ util_copy_image_view(&images->views[slot], &views[i]);
+
+ si_sampler_view_add_buffer(ctx, &res->b.b);
+
+ if (res->b.b.target == PIPE_BUFFER) {
+ si_make_buffer_descriptor(screen, res,
+ views[i].format,
+ views[i].u.buf.first_element,
+ views[i].u.buf.last_element,
+ images->desc.list + slot * 8);
+ images->compressed_colortex_mask &= ~(1 << slot);
+ } else {
+ static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
+ struct r600_texture *tex = (struct r600_texture *)res;
+ unsigned level;
+ unsigned width, height, depth;
+
+ assert(!tex->is_depth);
+ assert(tex->fmask.size == 0);
+
+ if (tex->dcc_offset &&
+ views[i].access & PIPE_IMAGE_ACCESS_WRITE)
+ r600_texture_disable_dcc(&screen->b, tex);
+
+ if (is_compressed_colortex(tex)) {
+ images->compressed_colortex_mask |= 1 << slot;
+ } else {
+ images->compressed_colortex_mask &= ~(1 << slot);
+ }
+
+ /* Always force the base level to the selected level.
+ *
+ * This is required for 3D textures, where otherwise
+ * selecting a single slice for non-layered bindings
+ * fails. It doesn't hurt the other targets.
+ */
+ level = views[i].u.tex.level;
+ width = u_minify(res->b.b.width0, level);
+ height = u_minify(res->b.b.height0, level);
+ depth = u_minify(res->b.b.depth0, level);
+
+ si_make_texture_descriptor(screen, tex, false, res->b.b.target,
+ views[i].format, swizzle,
+ level, 0, 0,
+ views[i].u.tex.first_layer, views[i].u.tex.last_layer,
+ width, height, depth,
+ images->desc.list + slot * 8,
+ NULL);
+ }
+
+ images->desc.enabled_mask |= 1llu << slot;
+ images->desc.list_dirty = true;
+ }
+}
+
+static void
+si_images_update_compressed_colortex_mask(struct si_images_info *images)
+{
+ uint64_t mask = images->desc.enabled_mask;
+
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ struct pipe_resource *res = images->views[i].resource;
+
+ if (res && res->target != PIPE_BUFFER) {
+ struct r600_texture *rtex = (struct r600_texture *)res;
+
+ if (is_compressed_colortex(rtex)) {
+ images->compressed_colortex_mask |= 1 << i;
+ } else {
+ images->compressed_colortex_mask &= ~(1 << i);
+ }
+ }
+ }
+}
+
/* SAMPLER STATES */
static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
@@ -351,7 +518,7 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers,
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
si_init_descriptors(&buffers->desc, shader_userdata_index, 4,
- num_buffers);
+ num_buffers, NULL);
}
static void si_release_buffer_resources(struct si_buffer_resources *buffers)
@@ -804,6 +971,7 @@ void si_update_compressed_colortex_masks(struct si_context *sctx)
{
for (int i = 0; i < SI_NUM_SHADERS; ++i) {
si_samplers_update_compressed_colortex_mask(&sctx->samplers[i]);
+ si_images_update_compressed_colortex_mask(&sctx->images[i]);
}
}
@@ -925,6 +1093,28 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
}
}
}
+
+ /* Shader images */
+ for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
+ struct si_images_info *images = &sctx->images[shader];
+ unsigned mask = images->desc.enabled_mask;
+
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+
+ if (images->views[i].resource == buf) {
+ si_desc_reset_buffer_offset(
+ ctx, images->desc.list + i * 8 + 4,
+ old_va, buf);
+ images->desc.list_dirty = true;
+
+ radeon_add_to_buffer_list(
+ &sctx->b, &sctx->b.gfx, rbuffer,
+ RADEON_USAGE_READWRITE,
+ RADEON_PRIO_SAMPLER_BUFFER);
+ }
+ }
+ }
}
/* SHADER USER DATA */
@@ -1055,6 +1245,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
+ si_emit_shader_pointer(sctx, &sctx->images[i].desc, base, false);
}
si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
}
@@ -1074,14 +1265,20 @@ void si_init_all_descriptors(struct si_context *sctx)
RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
si_init_descriptors(&sctx->samplers[i].views.desc,
- SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS);
+ SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
+ null_texture_descriptor);
+
+ si_init_descriptors(&sctx->images[i].desc,
+ SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
+ null_image_descriptor);
}
si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
- 4, SI_NUM_VERTEX_BUFFERS);
+ 4, SI_NUM_VERTEX_BUFFERS, NULL);
/* Set pipe_context functions. */
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
+ sctx->b.b.set_shader_images = si_set_shader_images;
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
@@ -1105,7 +1302,8 @@ bool si_upload_shader_descriptors(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
- !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc))
+ !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
+ !si_upload_descriptors(sctx, &sctx->images[i].desc))
return false;
}
return si_upload_vertex_buffer_descriptors(sctx);
@@ -1119,6 +1317,7 @@ void si_release_all_descriptors(struct si_context *sctx)
si_release_buffer_resources(&sctx->const_buffers[i]);
si_release_buffer_resources(&sctx->rw_buffers[i]);
si_release_sampler_views(&sctx->samplers[i].views);
+ si_release_image_views(&sctx->images[i]);
}
si_release_descriptors(&sctx->vertex_buffers);
}
@@ -1131,6 +1330,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
+ si_image_views_begin_new_cs(sctx, &sctx->images[i]);
}
si_vertex_buffers_begin_new_cs(sctx);
si_shader_userdata_begin_new_cs(sctx);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index b5a4034..8c900a4 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -118,8 +118,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
}
/* Flush the CS. */
- ws->cs_flush(cs, flags, &ctx->last_gfx_fence,
- ctx->screen->b.cs_count++);
+ ws->cs_flush(cs, flags, &ctx->last_gfx_fence);
if (fence)
ws->fence_reference(fence, ctx->last_gfx_fence);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 8b50a49..dd1103e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -140,9 +140,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
sctx->b.b.create_video_buffer = vl_video_buffer_create;
}
- sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
- sctx, sscreen->b.trace_bo ?
- sscreen->b.trace_bo->buf : NULL);
+ sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
+ si_context_gfx_flush, sctx);
sctx->b.gfx.flush = si_context_gfx_flush;
/* Border colors. */
@@ -539,8 +538,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
- case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ return HAVE_LLVM >= 0x0309 ? SI_NUM_IMAGES : 0;
}
return 0;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 0fef5f7..6d0d687 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -141,6 +141,12 @@ struct si_textures_info {
uint32_t compressed_colortex_mask;
};
+struct si_images_info {
+ struct si_descriptors desc;
+ struct pipe_image_view views[SI_NUM_IMAGES];
+ uint32_t compressed_colortex_mask;
+};
+
struct si_framebuffer {
struct r600_atom atom;
struct pipe_framebuffer_state state;
@@ -251,6 +257,7 @@ struct si_context {
struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
struct si_buffer_resources rw_buffers[SI_NUM_SHADERS];
struct si_textures_info samplers[SI_NUM_SHADERS];
+ struct si_images_info images[SI_NUM_SHADERS];
/* other shader resources */
struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8c1151a..9eb531f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -40,6 +40,7 @@
#include "util/u_memory.h"
#include "util/u_pstipple.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_build.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_dump.h"
@@ -99,6 +100,7 @@ struct si_shader_context
LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
LLVMValueRef fmasks[SI_NUM_USER_SAMPLERS];
+ LLVMValueRef images[SI_NUM_IMAGES];
LLVMValueRef so_buffers[4];
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
@@ -530,6 +532,37 @@ static LLVMValueRef get_indirect_index(struct si_shader_context *ctx,
}
/**
+ * Like get_indirect_index, but restricts the return value to a (possibly
+ * undefined) value inside [0..num).
+ */
+static LLVMValueRef get_bounded_indirect_index(struct si_shader_context *ctx,
+ const struct tgsi_ind_register *ind,
+ int rel_index, unsigned num)
+{
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef result = get_indirect_index(ctx, ind, rel_index);
+ LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
+ LLVMValueRef cc;
+
+ if (util_is_power_of_two(num)) {
+ result = LLVMBuildAnd(builder, result, c_max, "");
+ } else {
+ /* In theory, this MAX pattern should result in code that is
+ * as good as the bit-wise AND above.
+ *
+ * In practice, LLVM generates worse code (at the time of
+ * writing), because its value tracking is not strong enough.
+ */
+ cc = LLVMBuildICmp(builder, LLVMIntULE, result, c_max, "");
+ result = LLVMBuildSelect(builder, cc, result, c_max, "");
+ }
+
+ return result;
+}
+
+
+/**
* Calculate a dword address given an input or output register and a stride.
*/
static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
@@ -2656,10 +2689,90 @@ static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
ctx->return_value = ret;
}
+/**
+ * Given a v8i32 resource descriptor for a buffer, extract the size of the
+ * buffer in number of elements and return it as an i32.
+ */
+static LLVMValueRef get_buffer_size(
+ struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef descriptor)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef size =
+ LLVMBuildExtractElement(builder, descriptor,
+ lp_build_const_int32(gallivm, 6), "");
+
+ if (ctx->screen->b.chip_class >= VI) {
+ /* On VI, the descriptor contains the size in bytes,
+ * but TXQ must return the size in elements.
+ * The stride is always non-zero for resources using TXQ.
+ */
+ LLVMValueRef stride =
+ LLVMBuildExtractElement(builder, descriptor,
+ lp_build_const_int32(gallivm, 5), "");
+ stride = LLVMBuildLShr(builder, stride,
+ lp_build_const_int32(gallivm, 16), "");
+ stride = LLVMBuildAnd(builder, stride,
+ lp_build_const_int32(gallivm, 0x3FFF), "");
+
+ size = LLVMBuildUDiv(builder, size, stride, "");
+ }
+
+ return size;
+}
+
+/**
+ * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
+ * intrinsic names).
+ */
+static void build_int_type_name(
+ LLVMTypeRef type,
+ char *buf, unsigned bufsize)
+{
+ assert(bufsize >= 6);
+
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
+ snprintf(buf, bufsize, "v%ui32",
+ LLVMGetVectorSize(type));
+ else
+ strcpy(buf, "i32");
+}
+
static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data);
+/* Prevent optimizations (at least of memory accesses) across the current
+ * point in the program by emitting empty inline assembly that is marked as
+ * having side effects.
+ */
+static void emit_optimization_barrier(struct si_shader_context *ctx)
+{
+ LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
+ LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+}
+
+static void membar_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+
+ /* Since memoryBarrier only makes guarantees about atomics and
+ * coherent image accesses (which bypass TC L1), we do not need to emit
+ * any special cache handling here.
+ *
+ * We do have to prevent LLVM from re-ordering loads across
+ * the barrier though.
+ */
+ emit_optimization_barrier(ctx);
+}
+
static bool tgsi_is_array_sampler(unsigned target)
{
return target == TGSI_TEXTURE_1D_ARRAY ||
@@ -2671,6 +2784,459 @@ static bool tgsi_is_array_sampler(unsigned target)
target == TGSI_TEXTURE_2D_ARRAY_MSAA;
}
+static bool tgsi_is_array_image(unsigned target)
+{
+ return target == TGSI_TEXTURE_3D ||
+ target == TGSI_TEXTURE_CUBE ||
+ target == TGSI_TEXTURE_1D_ARRAY ||
+ target == TGSI_TEXTURE_2D_ARRAY ||
+ target == TGSI_TEXTURE_CUBE_ARRAY ||
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA;
+}
+
+/**
+ * Given a 256-bit resource descriptor, force the DCC enable bit to off.
+ *
+ * At least on Tonga, executing image stores on images with DCC enabled and
+ * non-trivial can eventually lead to lockups. This can occur when an
+ * application binds an image as read-only but then uses a shader that writes
+ * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
+ * program termination) in this case, but it doesn't cost much to be a bit
+ * nicer: disabling DCC in the shader still leads to undefined results but
+ * avoids the lockup.
+ */
+static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
+ LLVMValueRef rsrc)
+{
+ if (ctx->screen->b.chip_class <= CIK) {
+ return rsrc;
+ } else {
+ LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
+ LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
+ LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
+ LLVMValueRef tmp;
+
+ tmp = LLVMBuildExtractElement(builder, rsrc, i32_6, "");
+ tmp = LLVMBuildAnd(builder, tmp, i32_C, "");
+ return LLVMBuildInsertElement(builder, rsrc, tmp, i32_6, "");
+ }
+}
+
+/**
+ * Load the resource descriptor for \p image.
+ */
+static void
+image_fetch_rsrc(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *image,
+ bool dcc_off,
+ LLVMValueRef *rsrc)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+
+ assert(image->Register.File == TGSI_FILE_IMAGE);
+
+ if (!image->Register.Indirect) {
+ /* Fast path: use preloaded resources */
+ *rsrc = ctx->images[image->Register.Index];
+ } else {
+ /* Indexing and manual load */
+ LLVMValueRef ind_index;
+ LLVMValueRef rsrc_ptr;
+ LLVMValueRef tmp;
+
+ /* From the GL_ARB_shader_image_load_store extension spec:
+ *
+ * If a shader performs an image load, store, or atomic
+ * operation using an image variable declared as an array,
+ * and if the index used to select an individual element is
+ * negative or greater than or equal to the size of the
+ * array, the results of the operation are undefined but may
+ * not lead to termination.
+ */
+ ind_index = get_bounded_indirect_index(ctx, &image->Indirect,
+ image->Register.Index,
+ SI_NUM_IMAGES);
+
+ rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
+ tmp = build_indexed_load_const(ctx, rsrc_ptr, ind_index);
+ if (dcc_off)
+ tmp = force_dcc_off(ctx, tmp);
+ *rsrc = tmp;
+ }
+}
+
+static LLVMValueRef image_fetch_coords(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_instruction *inst,
+ unsigned src)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ unsigned target = inst->Memory.Texture;
+ int sample;
+ unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &sample);
+ LLVMValueRef coords[4];
+ LLVMValueRef tmp;
+ int chan;
+
+ for (chan = 0; chan < num_coords; ++chan) {
+ tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
+ tmp = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+ coords[chan] = tmp;
+ }
+
+ if (num_coords == 1)
+ return coords[0];
+
+ if (num_coords == 3) {
+ /* LLVM has difficulties lowering 3-element vectors. */
+ coords[3] = bld_base->uint_bld.undef;
+ num_coords = 4;
+ }
+
+ return lp_build_gather_values(gallivm, coords, num_coords);
+}
+
+/**
+ * Append the extra mode bits that are used by image load and store.
+ */
+static void image_append_args(
+ struct si_shader_context *ctx,
+ struct lp_build_emit_data * emit_data,
+ unsigned target,
+ bool atomic)
+{
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
+ LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
+
+ emit_data->args[emit_data->arg_count++] = i1false; /* r128 */
+ emit_data->args[emit_data->arg_count++] =
+ tgsi_is_array_image(target) ? i1true : i1false; /* da */
+ if (!atomic) {
+ emit_data->args[emit_data->arg_count++] =
+ inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
+ i1true : i1false; /* glc */
+ }
+ emit_data->args[emit_data->arg_count++] = i1false; /* slc */
+}
+
+/**
+ * Append the resource and indexing arguments for buffer intrinsics.
+ *
+ * \param rsrc the 256 bit resource
+ * \param index index into the buffer
+ */
+static void buffer_append_args(
+ struct si_shader_context *ctx,
+ struct lp_build_emit_data *emit_data,
+ LLVMValueRef rsrc,
+ LLVMValueRef index,
+ bool atomic)
+{
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
+ LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
+ LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
+
+ rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, v2i128, "");
+ rsrc = LLVMBuildExtractElement(gallivm->builder, rsrc, bld_base->uint_bld.one, "");
+ rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, "");
+
+ emit_data->args[emit_data->arg_count++] = rsrc;
+ emit_data->args[emit_data->arg_count++] = index; /* vindex */
+ emit_data->args[emit_data->arg_count++] = bld_base->uint_bld.zero; /* voffset */
+ if (!atomic) {
+ emit_data->args[emit_data->arg_count++] =
+ inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
+ i1true : i1false; /* glc */
+ }
+ emit_data->args[emit_data->arg_count++] = i1false; /* slc */
+}
+
+static void load_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ LLVMValueRef coords;
+ LLVMValueRef rsrc;
+
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+
+ image_fetch_rsrc(bld_base, &inst->Src[0], false, &rsrc);
+ coords = image_fetch_coords(bld_base, inst, 1);
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ buffer_append_args(ctx, emit_data, rsrc, coords, false);
+ } else {
+ emit_data->args[0] = coords;
+ emit_data->args[1] = rsrc;
+ emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
+ emit_data->arg_count = 3;
+
+ image_append_args(ctx, emit_data, target, false);
+ }
+}
+
+static void load_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ char intrinsic_name[32];
+ char coords_type[8];
+
+ if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
+ emit_optimization_barrier(ctx);
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
+ builder, "llvm.amdgcn.buffer.load.format.v4f32", emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
+ } else {
+ build_int_type_name(LLVMTypeOf(emit_data->args[0]),
+ coords_type, sizeof(coords_type));
+
+ snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.image.load.%s", coords_type);
+
+ emit_data->output[emit_data->chan] =
+ lp_build_intrinsic(
+ builder, intrinsic_name, emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
+ }
+}
+
+static void store_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ struct tgsi_full_src_register image;
+ unsigned target = inst->Memory.Texture;
+ LLVMValueRef chans[4];
+ LLVMValueRef data;
+ LLVMValueRef coords;
+ LLVMValueRef rsrc;
+ unsigned chan;
+
+ emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
+
+ image = tgsi_full_src_register_from_dst(&inst->Dst[0]);
+ coords = image_fetch_coords(bld_base, inst, 0);
+
+ for (chan = 0; chan < 4; ++chan) {
+ chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
+ }
+ data = lp_build_gather_values(gallivm, chans, 4);
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ image_fetch_rsrc(bld_base, &image, false, &rsrc);
+ emit_data->args[0] = data;
+ emit_data->arg_count = 1;
+
+ buffer_append_args(ctx, emit_data, rsrc, coords, false);
+ } else {
+ emit_data->args[0] = data;
+ emit_data->args[1] = coords;
+ image_fetch_rsrc(bld_base, &image, true, &emit_data->args[2]);
+ emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
+ emit_data->arg_count = 4;
+
+ image_append_args(ctx, emit_data, target, false);
+ }
+}
+
+static void store_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ char intrinsic_name[32];
+ char coords_type[8];
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
+ builder, "llvm.amdgcn.buffer.store.format.v4f32",
+ emit_data->dst_type, emit_data->args, emit_data->arg_count,
+ LLVMNoUnwindAttribute);
+ } else {
+ build_int_type_name(LLVMTypeOf(emit_data->args[1]),
+ coords_type, sizeof(coords_type));
+ snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.image.store.%s", coords_type);
+
+ emit_data->output[emit_data->chan] =
+ lp_build_intrinsic(
+ builder, intrinsic_name, emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMNoUnwindAttribute);
+ }
+}
+
+static void atomic_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ LLVMValueRef data1, data2;
+ LLVMValueRef coords;
+ LLVMValueRef rsrc;
+ LLVMValueRef tmp;
+
+ emit_data->dst_type = bld_base->base.elem_type;
+
+ image_fetch_rsrc(bld_base, &inst->Src[0], target != TGSI_TEXTURE_BUFFER,
+ &rsrc);
+ coords = image_fetch_coords(bld_base, inst, 1);
+
+ tmp = lp_build_emit_fetch(bld_base, inst, 2, 0);
+ data1 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+ tmp = lp_build_emit_fetch(bld_base, inst, 3, 0);
+ data2 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+ }
+
+ /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
+ * of arguments, which is reversed relative to TGSI (and GLSL)
+ */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+ emit_data->args[emit_data->arg_count++] = data2;
+ emit_data->args[emit_data->arg_count++] = data1;
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ buffer_append_args(ctx, emit_data, rsrc, coords, true);
+ } else {
+ emit_data->args[emit_data->arg_count++] = coords;
+ emit_data->args[emit_data->arg_count++] = rsrc;
+
+ image_append_args(ctx, emit_data, target, true);
+ }
+}
+
+static void atomic_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ char intrinsic_name[40];
+ LLVMValueRef tmp;
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
+ } else {
+ char coords_type[8];
+
+ build_int_type_name(LLVMTypeOf(emit_data->args[1]),
+ coords_type, sizeof(coords_type));
+ snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.image.atomic.%s.%s",
+ action->intr_name, coords_type);
+ }
+
+ tmp = lp_build_intrinsic(
+ builder, intrinsic_name, bld_base->uint_bld.elem_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMNoUnwindAttribute);
+ emit_data->output[emit_data->chan] =
+ LLVMBuildBitCast(builder, tmp, bld_base->base.elem_type, "");
+}
+
+static void resq_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ const struct tgsi_full_src_register *reg = &inst->Src[0];
+ unsigned tex_target = inst->Memory.Texture;
+
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+
+ if (tex_target == TGSI_TEXTURE_BUFFER) {
+ image_fetch_rsrc(bld_base, reg, false, &emit_data->args[0]);
+ emit_data->arg_count = 1;
+ } else {
+ emit_data->args[0] = bld_base->uint_bld.zero; /* mip level */
+ image_fetch_rsrc(bld_base, reg, false, &emit_data->args[1]);
+ emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
+ emit_data->args[3] = bld_base->uint_bld.zero; /* unorm */
+ emit_data->args[4] = bld_base->uint_bld.zero; /* r128 */
+ emit_data->args[5] = tgsi_is_array_image(tex_target) ?
+ bld_base->uint_bld.one : bld_base->uint_bld.zero; /* da */
+ emit_data->args[6] = bld_base->uint_bld.zero; /* glc */
+ emit_data->args[7] = bld_base->uint_bld.zero; /* slc */
+ emit_data->args[8] = bld_base->uint_bld.zero; /* tfe */
+ emit_data->args[9] = bld_base->uint_bld.zero; /* lwe */
+ emit_data->arg_count = 10;
+ }
+}
+
+static void resq_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ LLVMValueRef out;
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ out = get_buffer_size(bld_base, emit_data->args[0]);
+ } else {
+ out = lp_build_intrinsic(
+ builder, "llvm.SI.getresinfo.i32", emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+
+ /* Divide the number of layers by 6 to get the number of cubes. */
+ if (target == TGSI_TEXTURE_CUBE_ARRAY) {
+ LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2);
+ LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);
+
+ LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
+ z = LLVMBuildBitCast(builder, z, bld_base->uint_bld.elem_type, "");
+ z = LLVMBuildSDiv(builder, z, imm6, "");
+ z = LLVMBuildBitCast(builder, z, bld_base->base.elem_type, "");
+ out = LLVMBuildInsertElement(builder, out, z, imm2, "");
+ }
+ }
+
+ emit_data->output[emit_data->chan] = out;
+}
+
static void set_tex_fetch_args(struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data,
unsigned opcode, unsigned target,
@@ -2836,26 +3402,7 @@ static void tex_fetch_args(
if (target == TGSI_TEXTURE_BUFFER) {
/* Read the size from the buffer descriptor directly. */
LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, ctx->v8i32, "");
- LLVMValueRef size = LLVMBuildExtractElement(builder, res,
- lp_build_const_int32(gallivm, 6), "");
-
- if (ctx->screen->b.chip_class >= VI) {
- /* On VI, the descriptor contains the size in bytes,
- * but TXQ must return the size in elements.
- * The stride is always non-zero for resources using TXQ.
- */
- LLVMValueRef stride =
- LLVMBuildExtractElement(builder, res,
- lp_build_const_int32(gallivm, 5), "");
- stride = LLVMBuildLShr(builder, stride,
- lp_build_const_int32(gallivm, 16), "");
- stride = LLVMBuildAnd(builder, stride,
- lp_build_const_int32(gallivm, 0x3FFF), "");
-
- size = LLVMBuildUDiv(builder, size, stride, "");
- }
-
- emit_data->args[0] = size;
+ emit_data->args[0] = get_buffer_size(bld_base, res);
return;
}
@@ -3236,14 +3783,9 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
return;
}
- if (LLVMGetTypeKind(LLVMTypeOf(emit_data->args[0])) == LLVMVectorTypeKind)
- sprintf(type, ".v%ui32",
- LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
- else
- strcpy(type, ".i32");
-
/* Add the type and suffixes .c, .o if needed. */
- sprintf(intr_name, "%s%s%s%s%s",
+ build_int_type_name(LLVMTypeOf(emit_data->args[0]), type, sizeof(type));
+ sprintf(intr_name, "%s%s%s%s.%s",
name, is_shadow ? ".c" : "", infix,
has_offset ? ".o" : "", type);
@@ -3865,8 +4407,8 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
- params[SI_PARAM_UNUSED] = LLVMPointerType(ctx->i32, CONST_ADDR_SPACE);
- last_array_pointer = SI_PARAM_UNUSED;
+ params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
+ last_array_pointer = SI_PARAM_IMAGES;
switch (ctx->type) {
case TGSI_PROCESSOR_VERTEX:
@@ -4153,6 +4695,34 @@ static void preload_samplers(struct si_shader_context *ctx)
}
}
+static void preload_images(struct si_shader_context *ctx)
+{
+ struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
+ struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ unsigned num_images = bld_base->info->file_max[TGSI_FILE_IMAGE] + 1;
+ LLVMValueRef res_ptr;
+ unsigned i;
+
+ if (num_images == 0)
+ return;
+
+ res_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
+
+ for (i = 0; i < num_images; ++i) {
+ /* Rely on LLVM to shrink the load for buffer resources. */
+ LLVMValueRef rsrc =
+ build_indexed_load_const(ctx, res_ptr,
+ lp_build_const_int32(gallivm, i));
+
+ if (info->images_writemask & (1 << i) &&
+ !(info->images_buffers & (1 << i)))
+ rsrc = force_dcc_off(ctx, rsrc);
+
+ ctx->images[i] = rsrc;
+ }
+}
+
static void preload_streamout_buffers(struct si_shader_context *ctx)
{
struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
@@ -4792,6 +5362,7 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
LLVMTargetMachineRef tm)
{
struct lp_build_tgsi_context *bld_base;
+ struct lp_build_tgsi_action tmpl = {};
memset(ctx, 0, sizeof(*ctx));
radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--");
@@ -4839,6 +5410,38 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
+ bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
+ bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
+ bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
+
+ tmpl.fetch_args = atomic_fetch_args;
+ tmpl.emit = atomic_emit;
+ bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add";
+ bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap";
+ bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap";
+ bld_base->op_actions[TGSI_OPCODE_ATOMAND] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and";
+ bld_base->op_actions[TGSI_OPCODE_ATOMOR] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or";
+ bld_base->op_actions[TGSI_OPCODE_ATOMXOR] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor";
+ bld_base->op_actions[TGSI_OPCODE_ATOMUMIN] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin";
+ bld_base->op_actions[TGSI_OPCODE_ATOMUMAX] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax";
+ bld_base->op_actions[TGSI_OPCODE_ATOMIMIN] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin";
+ bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax";
+
+ bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
+
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
@@ -4926,6 +5529,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
create_function(&ctx);
preload_constants(&ctx);
preload_samplers(&ctx);
+ preload_images(&ctx);
preload_streamout_buffers(&ctx);
preload_ring_buffers(&ctx);
@@ -5383,7 +5987,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen,
last_array_pointer = SI_PARAM_RW_BUFFERS;
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
params[SI_PARAM_SAMPLERS] = ctx.i64;
- params[SI_PARAM_UNUSED] = ctx.i64;
+ params[SI_PARAM_IMAGES] = ctx.i64;
params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
@@ -5633,7 +6237,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
params[SI_PARAM_RW_BUFFERS] = ctx.i64;
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
params[SI_PARAM_SAMPLERS] = ctx.i64;
- params[SI_PARAM_UNUSED] = ctx.i64;
+ params[SI_PARAM_IMAGES] = ctx.i64;
params[SI_PARAM_ALPHA_REF] = ctx.f32;
last_array_pointer = -1;
last_sgpr = SI_PARAM_ALPHA_REF;
@@ -5897,12 +6501,15 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *mainp = shader->selector->main_shader_part;
int r;
- /* LS and ES are always compiled on demand. */
+ /* LS, ES, VS are compiled on demand if the main part hasn't been
+ * compiled for that stage.
+ */
if (!mainp ||
(shader->selector->type == PIPE_SHADER_VERTEX &&
- (shader->key.vs.as_es || shader->key.vs.as_ls)) ||
+ (shader->key.vs.as_es != mainp->key.vs.as_es ||
+ shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
(shader->selector->type == PIPE_SHADER_TESS_EVAL &&
- shader->key.tes.as_es)) {
+ shader->key.tes.as_es != mainp->key.tes.as_es)) {
/* Monolithic shader (compiled as a whole, has many variants,
* may take a long time to compile).
*/
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index de23e64..8059edf 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -80,7 +80,7 @@ struct radeon_shader_reloc;
#define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */
#define SI_SGPR_CONST_BUFFERS 2
#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */
-/* TODO: gap */
+#define SI_SGPR_IMAGES 6
#define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */
#define SI_SGPR_BASE_VERTEX 10 /* VS only */
#define SI_SGPR_START_INSTANCE 11 /* VS only */
@@ -104,7 +104,7 @@ struct radeon_shader_reloc;
#define SI_PARAM_RW_BUFFERS 0
#define SI_PARAM_CONST_BUFFERS 1
#define SI_PARAM_SAMPLERS 2
-#define SI_PARAM_UNUSED 3 /* TODO: use */
+#define SI_PARAM_IMAGES 3
/* VS only parameters */
#define SI_PARAM_VERTEX_BUFFERS 4
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index f823af1..1245f56 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2797,7 +2797,7 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
* Build the sampler view descriptor for a buffer texture.
* @param state 256-bit descriptor; only the high 128 bits are filled in
*/
-static void
+void
si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
enum pipe_format format,
unsigned first_element, unsigned last_element,
@@ -2838,9 +2838,10 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
/**
* Build the sampler view descriptor for a texture.
*/
-static void
+void
si_make_texture_descriptor(struct si_screen *screen,
struct r600_texture *tex,
+ bool sampler,
enum pipe_texture_target target,
enum pipe_format pipe_format,
const unsigned char state_swizzle[4],
@@ -2855,7 +2856,7 @@ si_make_texture_descriptor(struct si_screen *screen,
const struct util_format_description *desc;
unsigned char swizzle[4];
int first_non_void;
- unsigned num_format, data_format;
+ unsigned num_format, data_format, type;
uint32_t pitch;
uint64_t va;
@@ -2973,12 +2974,30 @@ si_make_texture_descriptor(struct si_screen *screen,
data_format = 0;
}
- if (res->target == PIPE_TEXTURE_1D_ARRAY) {
+ if (!sampler &&
+ (res->target == PIPE_TEXTURE_CUBE ||
+ res->target == PIPE_TEXTURE_CUBE_ARRAY ||
+ res->target == PIPE_TEXTURE_3D)) {
+ /* For the purpose of shader images, treat cube maps and 3D
+ * textures as 2D arrays. For 3D textures, the address
+ * calculations for mipmaps are different, so we rely on the
+ * caller to effectively disable mipmaps.
+ */
+ type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
+
+ assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
+ } else {
+ type = si_tex_dim(res->target, target, res->nr_samples);
+ }
+
+ if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
height = 1;
depth = res->array_size;
- } else if (res->target == PIPE_TEXTURE_2D_ARRAY) {
- depth = res->array_size;
- } else if (res->target == PIPE_TEXTURE_CUBE_ARRAY)
+ } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
+ type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
+ if (sampler || res->target != PIPE_TEXTURE_3D)
+ depth = res->array_size;
+ } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
depth = res->array_size / 6;
pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format);
@@ -3001,7 +3020,7 @@ si_make_texture_descriptor(struct si_screen *screen,
last_level) |
S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level, false)) |
S_008F1C_POW2_PAD(res->last_level > 0) |
- S_008F1C_TYPE(si_tex_dim(res->target, target, res->nr_samples)));
+ S_008F1C_TYPE(type));
state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
state[5] = (S_008F24_BASE_ARRAY(first_layer) |
S_008F24_LAST_ARRAY(last_layer));
@@ -3155,7 +3174,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
state->target == PIPE_TEXTURE_CUBE)
last_layer = state->u.tex.first_layer;
- si_make_texture_descriptor(sctx->screen, tmp, state->target,
+ si_make_texture_descriptor(sctx->screen, tmp, true, state->target,
state->format, state_swizzle,
base_level, first_level, last_level,
state->u.tex.first_layer, last_layer,
@@ -3503,6 +3522,52 @@ static void si_texture_barrier(struct pipe_context *ctx)
SI_CONTEXT_FLUSH_AND_INV_CB;
}
+static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+
+ /* Subsequent commands must wait for all shader invocations to
+ * complete. */
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+
+ if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
+ sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
+ SI_CONTEXT_INV_VMEM_L1;
+
+ if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
+ PIPE_BARRIER_SHADER_BUFFER |
+ PIPE_BARRIER_TEXTURE |
+ PIPE_BARRIER_IMAGE |
+ PIPE_BARRIER_STREAMOUT_BUFFER)) {
+ /* As far as I can tell, L1 contents are written back to L2
+ * automatically at end of shader, but the contents of other
+ * L1 caches might still be stale. */
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
+ }
+
+ if (flags & PIPE_BARRIER_INDEX_BUFFER) {
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
+
+ /* Indices are read through TC L2 since VI. */
+ if (sctx->screen->b.chip_class <= CIK)
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ }
+
+ if (flags & PIPE_BARRIER_FRAMEBUFFER)
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+
+ if (flags & (PIPE_BARRIER_MAPPED_BUFFER |
+ PIPE_BARRIER_FRAMEBUFFER |
+ PIPE_BARRIER_INDIRECT_BUFFER)) {
+ /* Not sure if INV_GLOBAL_L2 is the best thing here.
+ *
+ * We need to make sure that TC L1 & L2 are written back to
+ * memory, because neither CPU accesses nor CB fetches consider
+ * TC, but there's no need to invalidate any TC cache lines. */
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ }
+}
+
static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
{
struct pipe_blend_state blend;
@@ -3583,6 +3648,7 @@ void si_init_state_functions(struct si_context *sctx)
sctx->b.b.set_index_buffer = si_set_index_buffer;
sctx->b.b.texture_barrier = si_texture_barrier;
+ sctx->b.b.memory_barrier = si_memory_barrier;
sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
sctx->b.b.set_min_samples = si_set_min_samples;
sctx->b.b.set_tess_state = si_set_tess_state;
@@ -3637,7 +3703,8 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
/* TILE_MODE_INDEX is ambiguous without a PCI ID. */
md->metadata[1] = (ATI_VENDOR_ID << 16) | rscreen->info.pci_id;
- si_make_texture_descriptor(sscreen, rtex, res->target, res->format,
+ si_make_texture_descriptor(sscreen, rtex, true,
+ res->target, res->format,
swizzle, 0, 0, res->last_level, 0,
is_array ? res->array_size - 1 : 0,
res->width0, res->height0, res->depth0,
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 60c34f1..c4d6b9d 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -158,6 +158,8 @@ struct si_shader_data {
#define SI_DRIVER_STATE_CONST_BUF SI_NUM_USER_CONST_BUFFERS
#define SI_NUM_CONST_BUFFERS (SI_DRIVER_STATE_CONST_BUF + 1)
+#define SI_NUM_IMAGES 16
+
/* Read-write buffer slots.
*
* Ring buffers: 0..1
@@ -272,6 +274,23 @@ unsigned cik_tile_split(unsigned tile_split);
unsigned si_array_mode(unsigned mode);
uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex);
unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil);
+void
+si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
+ enum pipe_format format,
+ unsigned first_element, unsigned last_element,
+ uint32_t *state);
+void
+si_make_texture_descriptor(struct si_screen *screen,
+ struct r600_texture *tex,
+ bool sampler,
+ enum pipe_texture_target target,
+ enum pipe_format pipe_format,
+ const unsigned char state_swizzle[4],
+ unsigned base_level, unsigned first_level, unsigned last_level,
+ unsigned first_layer, unsigned last_layer,
+ unsigned width, unsigned height, unsigned depth,
+ uint32_t *state,
+ uint32_t *fmask_state);
struct pipe_sampler_view *
si_create_sampler_view_custom(struct pipe_context *ctx,
struct pipe_resource *texture,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 5fe1f79..0248958 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -794,9 +794,15 @@ static void si_shader_ps(struct si_shader *shader)
* - the shader uses at least 2 VMEM instructions, or
* - the code size is at least 50 2-dword instructions or 100 1-dword
* instructions.
+ *
+ * Shaders with side effects that must execute independently of the
+ * depth test require LATE_Z.
*/
- if (info->num_memory_instructions >= 2 ||
- shader->binary.code_size > 100*4)
+ if (info->writes_memory &&
+ !info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
+ shader->z_order = V_02880C_LATE_Z;
+ else if (info->num_memory_instructions >= 2 ||
+ shader->binary.code_size > 100*4)
shader->z_order = V_02880C_EARLY_Z_THEN_RE_Z;
else
shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
@@ -1042,6 +1048,31 @@ static int si_shader_select(struct pipe_context *ctx,
return si_shader_select_with_key(ctx, state, &key);
}
+static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
+ union si_shader_key *key)
+{
+ unsigned next_shader = info->properties[TGSI_PROPERTY_NEXT_SHADER];
+
+ switch (info->processor) {
+ case TGSI_PROCESSOR_VERTEX:
+ switch (next_shader) {
+ case TGSI_PROCESSOR_GEOMETRY:
+ key->vs.as_es = 1;
+ break;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ case TGSI_PROCESSOR_TESS_EVAL:
+ key->vs.as_ls = 1;
+ break;
+ }
+ break;
+
+ case TGSI_PROCESSOR_TESS_EVAL:
+ if (next_shader == TGSI_PROCESSOR_GEOMETRY)
+ key->tes.as_es = 1;
+ break;
+ }
+}
+
static void *si_create_shader_selector(struct pipe_context *ctx,
const struct pipe_shader_state *state)
{
@@ -1157,6 +1188,10 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1);
+ if (sel->info.writes_memory)
+ sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) |
+ S_02880C_EXEC_ON_NOOP(1);
+
/* Compile the main shader part for use with a prolog and/or epilog. */
if (sel->type != PIPE_SHADER_GEOMETRY &&
!sscreen->use_monolithic_shaders) {
@@ -1167,6 +1202,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
goto error;
shader->selector = sel;
+ si_parse_next_shader_property(&sel->info, &shader->key);
tgsi_binary = si_get_tgsi_binary(sel);
@@ -1202,6 +1238,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
union si_shader_key key;
memset(&key, 0, sizeof(key));
+ si_parse_next_shader_property(&sel->info, &key);
/* Set reasonable defaults, so that the shader key doesn't
* cause any code to be eliminated.
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index da42814..896dcdf 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -247,6 +247,7 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
sizeof(svga->state.hw_draw.default_constbuf_size));
memset(svga->state.hw_draw.enabled_constbufs, 0,
sizeof(svga->state.hw_draw.enabled_constbufs));
+ svga->state.hw_draw.ib = NULL;
/* Create a no-operation blend state which we will bind whenever the
* requested blend state is impossible (e.g. due to having an integer
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 1976f98..ead47c0 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -55,16 +55,21 @@
#define SVGA_QUERY_COMMAND_BUFFER_SIZE (PIPE_QUERY_DRIVER_SPECIFIC + 7)
#define SVGA_QUERY_FLUSH_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 8)
#define SVGA_QUERY_SURFACE_WRITE_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define SVGA_QUERY_NUM_READBACKS (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define SVGA_QUERY_NUM_RESOURCE_UPDATES (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define SVGA_QUERY_NUM_BUFFER_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
+#define SVGA_QUERY_NUM_CONST_BUF_UPDATES (PIPE_QUERY_DRIVER_SPECIFIC + 13)
+#define SVGA_QUERY_NUM_CONST_UPDATES (PIPE_QUERY_DRIVER_SPECIFIC + 14)
/* running total counters */
-#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 10)
-#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 11)
-#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 12)
-#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 13)
-#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 14)
-#define SVGA_QUERY_NUM_GENERATE_MIPMAP (PIPE_QUERY_DRIVER_SPECIFIC + 15)
+#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 15)
+#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 16)
+#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 17)
+#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 18)
+#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 19)
+#define SVGA_QUERY_NUM_GENERATE_MIPMAP (PIPE_QUERY_DRIVER_SPECIFIC + 20)
/*SVGA_QUERY_MAX has to be last because it is size of an array*/
-#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 16)
+#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 21)
/**
* Maximum supported number of constant buffers per shader
@@ -499,20 +504,25 @@ struct svga_context
/** performance / info queries for HUD */
struct {
- uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */
- uint64_t num_fallbacks; /**< SVGA_QUERY_NUM_FALLBACKS */
- uint64_t num_flushes; /**< SVGA_QUERY_NUM_FLUSHES */
- uint64_t num_validations; /**< SVGA_QUERY_NUM_VALIDATIONS */
- uint64_t map_buffer_time; /**< SVGA_QUERY_MAP_BUFFER_TIME */
- uint64_t num_resources_mapped; /**< SVGA_QUERY_NUM_RESOURCES_MAPPED */
- uint64_t command_buffer_size; /**< SVGA_QUERY_COMMAND_BUFFER_SIZE */
- uint64_t flush_time; /**< SVGA_QUERY_FLUSH_TIME */
- uint64_t surface_write_flushes; /**< SVGA_QUERY_SURFACE_WRITE_FLUSHES */
- uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */
- uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */
- uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */
- uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */
- uint64_t num_generate_mipmap; /**< SVGA_QUERY_NUM_GENERATE_MIPMAP */
+ uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */
+ uint64_t num_fallbacks; /**< SVGA_QUERY_NUM_FALLBACKS */
+ uint64_t num_flushes; /**< SVGA_QUERY_NUM_FLUSHES */
+ uint64_t num_validations; /**< SVGA_QUERY_NUM_VALIDATIONS */
+ uint64_t map_buffer_time; /**< SVGA_QUERY_MAP_BUFFER_TIME */
+ uint64_t num_resources_mapped; /**< SVGA_QUERY_NUM_RESOURCES_MAPPED */
+ uint64_t command_buffer_size; /**< SVGA_QUERY_COMMAND_BUFFER_SIZE */
+ uint64_t flush_time; /**< SVGA_QUERY_FLUSH_TIME */
+ uint64_t surface_write_flushes; /**< SVGA_QUERY_SURFACE_WRITE_FLUSHES */
+ uint64_t num_readbacks; /**< SVGA_QUERY_NUM_READBACKS */
+ uint64_t num_resource_updates; /**< SVGA_QUERY_NUM_RESOURCE_UPDATES */
+ uint64_t num_buffer_uploads; /**< SVGA_QUERY_NUM_BUFFER_UPLOADS */
+ uint64_t num_const_buf_updates; /**< SVGA_QUERY_NUM_CONST_BUF_UPDATES */
+ uint64_t num_const_updates; /**< SVGA_QUERY_NUM_CONST_UPDATES */
+ uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */
+ uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */
+ uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */
+ uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */
+ uint64_t num_generate_mipmap; /**< SVGA_QUERY_NUM_GENERATE_MIPMAP */
} hud;
/** The currently bound stream output targets */
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index fe6cf71..0b9ea88 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -458,6 +458,14 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
ret = svga_rebind_shaders(svga);
if (ret != PIPE_OK)
return ret;
+
+ /* Rebind stream output targets */
+ ret = svga_rebind_stream_output_targets(svga);
+ if (ret != PIPE_OK)
+ return ret;
+
+ /* Force rebinding the index buffer when needed */
+ svga->state.hw_draw.ib = NULL;
}
ret = validate_sampler_resources(svga);
diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c
index af9356d..a26e577 100644
--- a/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -254,10 +254,13 @@ svga_set_debug_callback(struct pipe_context *pipe,
{
struct svga_context *svga = svga_context(pipe);
- if (cb)
+ if (cb) {
svga->debug.callback = *cb;
- else
+ svga->swc->debug_callback = &svga->debug.callback;
+ } else {
memset(&svga->debug.callback, 0, sizeof(svga->debug.callback));
+ svga->swc->debug_callback = NULL;
+ }
}
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 845f4ef..88f41ea 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -72,11 +72,14 @@ struct svga_query {
/** cast wrapper */
static inline struct svga_query *
-svga_query( struct pipe_query *q )
+svga_query(struct pipe_query *q)
{
return (struct svga_query *)q;
}
+/**
+ * VGPU9
+ */
static boolean
svga_get_query_result(struct pipe_context *pipe,
@@ -736,6 +739,11 @@ svga_create_query(struct pipe_context *pipe,
case SVGA_QUERY_NUM_STATE_OBJECTS:
case SVGA_QUERY_NUM_SURFACE_VIEWS:
case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+ case SVGA_QUERY_NUM_READBACKS:
+ case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+ case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+ case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+ case SVGA_QUERY_NUM_CONST_UPDATES:
break;
default:
assert(!"unexpected query type in svga_create_query()");
@@ -808,6 +816,11 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_NUM_STATE_OBJECTS:
case SVGA_QUERY_NUM_SURFACE_VIEWS:
case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+ case SVGA_QUERY_NUM_READBACKS:
+ case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+ case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+ case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+ case SVGA_QUERY_NUM_CONST_UPDATES:
/* nothing */
break;
default:
@@ -899,6 +912,21 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
sq->begin_count = svga->hud.surface_write_flushes;
break;
+ case SVGA_QUERY_NUM_READBACKS:
+ sq->begin_count = svga->hud.num_readbacks;
+ break;
+ case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+ sq->begin_count = svga->hud.num_resource_updates;
+ break;
+ case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+ sq->begin_count = svga->hud.num_buffer_uploads;
+ break;
+ case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+ sq->begin_count = svga->hud.num_const_buf_updates;
+ break;
+ case SVGA_QUERY_NUM_CONST_UPDATES:
+ sq->begin_count = svga->hud.num_const_updates;
+ break;
case SVGA_QUERY_MEMORY_USED:
case SVGA_QUERY_NUM_SHADERS:
case SVGA_QUERY_NUM_RESOURCES:
@@ -1002,6 +1030,21 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
sq->end_count = svga->hud.surface_write_flushes;
break;
+ case SVGA_QUERY_NUM_READBACKS:
+ sq->end_count = svga->hud.num_readbacks;
+ break;
+ case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+ sq->end_count = svga->hud.num_resource_updates;
+ break;
+ case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+ sq->end_count = svga->hud.num_buffer_uploads;
+ break;
+ case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+ sq->end_count = svga->hud.num_const_buf_updates;
+ break;
+ case SVGA_QUERY_NUM_CONST_UPDATES:
+ sq->end_count = svga->hud.num_const_updates;
+ break;
case SVGA_QUERY_MEMORY_USED:
case SVGA_QUERY_NUM_SHADERS:
case SVGA_QUERY_NUM_RESOURCES:
@@ -1103,6 +1146,11 @@ svga_get_query_result(struct pipe_context *pipe,
case SVGA_QUERY_COMMAND_BUFFER_SIZE:
case SVGA_QUERY_FLUSH_TIME:
case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
+ case SVGA_QUERY_NUM_READBACKS:
+ case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+ case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+ case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+ case SVGA_QUERY_NUM_CONST_UPDATES:
vresult->u64 = sq->end_count - sq->begin_count;
break;
/* These are running total counters */
diff --git a/src/gallium/drivers/svga/svga_pipe_streamout.c b/src/gallium/drivers/svga/svga_pipe_streamout.c
index 3f443c4..1318b55 100644
--- a/src/gallium/drivers/svga/svga_pipe_streamout.c
+++ b/src/gallium/drivers/svga/svga_pipe_streamout.c
@@ -311,6 +311,25 @@ svga_set_stream_output_targets(struct pipe_context *pipe,
svga->num_so_targets = num_targets;
}
+/**
+ * Rebind stream output target surfaces
+ */
+enum pipe_error
+svga_rebind_stream_output_targets(struct svga_context *svga)
+{
+ struct svga_winsys_context *swc = svga->swc;
+ enum pipe_error ret;
+ unsigned i;
+
+ for (i = 0; i < svga->num_so_targets; i++) {
+ ret = swc->resource_rebind(swc, svga->so_surfaces[i], NULL, SVGA_RELOC_WRITE);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+
+ return PIPE_OK;
+}
+
void
svga_init_stream_output_functions(struct svga_context *svga)
{
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index a8ffcc7..9ecb975 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -109,6 +109,8 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
assert(ret == PIPE_OK);
}
+ svga->hud.num_readbacks++;
+
svga_context_finish(svga);
sbuf->dirty = FALSE;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 7f7ceab..1121b78 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -311,6 +311,8 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
sbuf->dma.flags.discard = FALSE;
+ svga->hud.num_resource_updates++;
+
return PIPE_OK;
}
@@ -385,6 +387,8 @@ svga_buffer_upload_command(struct svga_context *svga,
swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
sbuf->dma.flags.discard = FALSE;
+ svga->hud.num_buffer_uploads++;
+
return PIPE_OK;
}
@@ -433,6 +437,7 @@ svga_buffer_upload_flush(struct svga_context *svga,
assert(box->x + box->w <= sbuf->b.b.width0);
svga->hud.num_bytes_uploaded += box->w;
+ svga->hud.num_buffer_uploads++;
}
}
else {
@@ -460,6 +465,7 @@ svga_buffer_upload_flush(struct svga_context *svga,
assert(box->x + box->w <= sbuf->b.b.width0);
svga->hud.num_bytes_uploaded += box->w;
+ svga->hud.num_buffer_uploads++;
}
}
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index 1edb41d..db73080 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -448,6 +448,8 @@ svga_texture_transfer_map(struct pipe_context *pipe,
ret = readback_image_vgpu9(svga, surf, st->slice, transfer->level);
}
+ svga->hud.num_readbacks++;
+
assert(ret == PIPE_OK);
(void) ret;
@@ -681,6 +683,8 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level);
}
+ svga->hud.num_resource_updates++;
+
assert(ret == PIPE_OK);
(void) ret;
}
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index bcc5120..c0873c0 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -837,6 +837,16 @@ svga_get_driver_query_info(struct pipe_screen *screen,
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS),
QUERY("surface-write-flushes", SVGA_QUERY_SURFACE_WRITE_FLUSHES,
PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-readbacks", SVGA_QUERY_NUM_READBACKS,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-resource-updates", SVGA_QUERY_NUM_RESOURCE_UPDATES,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-buffer-uploads", SVGA_QUERY_NUM_BUFFER_UPLOADS,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-const-buf-updates", SVGA_QUERY_NUM_CONST_BUF_UPDATES,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-const-updates", SVGA_QUERY_NUM_CONST_UPDATES,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
/* running total counters */
QUERY("memory-used", SVGA_QUERY_MEMORY_USED,
diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c
index 5c99e16..78eb3f6 100644
--- a/src/gallium/drivers/svga/svga_shader.c
+++ b/src/gallium/drivers/svga/svga_shader.c
@@ -180,18 +180,18 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
assert(view->texture);
assert(view->texture->target < (1 << 4)); /* texture_target:4 */
- key->tex[i].texture_target = view->texture->target;
-
/* 1D/2D array textures with one slice are treated as non-arrays
* by the SVGA3D device. Convert the texture type here so that
* we emit the right TEX/SAMPLE instruction in the shader.
*/
- if (view->texture->array_size == 1) {
- if (view->texture->target == PIPE_TEXTURE_1D_ARRAY) {
- key->tex[i].texture_target = PIPE_TEXTURE_1D;
+ if (view->texture->target == PIPE_TEXTURE_1D_ARRAY ||
+ view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+ if (view->texture->array_size == 1) {
+ key->tex[i].is_array = 0;
}
- else if (view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
- key->tex[i].texture_target = PIPE_TEXTURE_2D;
+ else {
+ assert(view->texture->array_size > 1);
+ key->tex[i].is_array = 1;
}
}
@@ -207,8 +207,6 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
key->tex[i].swizzle_g = view->swizzle_g;
key->tex[i].swizzle_b = view->swizzle_b;
key->tex[i].swizzle_a = view->swizzle_a;
-
- key->tex[i].return_type = svga_get_texture_datatype(view->format);
}
}
key->num_textures = svga->curr.num_sampler_views[shader];
diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h
index f49fdb4..3f91574 100644
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -98,14 +98,13 @@ struct svga_compile_key
unsigned compare_func:3;
unsigned unnormalized:1;
unsigned width_height_idx:5; /**< texture unit */
- unsigned texture_target:4; /**< PIPE_TEXTURE_x */
+ unsigned is_array:1;
unsigned texture_msaa:1; /**< A multisample texture? */
unsigned sprite_texgen:1;
unsigned swizzle_r:3;
unsigned swizzle_g:3;
unsigned swizzle_b:3;
unsigned swizzle_a:3;
- unsigned return_type:3; /**< TGSI_RETURN_TYPE_x */
} tex[PIPE_MAX_SAMPLERS];
/* Note: svga_compile_keys_equal() depends on the variable-size
* tex[] array being at the end of this structure.
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index 8ab1693..5ae0382 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -301,6 +301,8 @@ emit_const(struct svga_context *svga, unsigned shader, unsigned i,
return ret;
memcpy(svga->state.hw_draw.cb[shader][i], value, 4 * sizeof(float));
+
+ svga->hud.num_const_updates++;
}
return ret;
@@ -420,6 +422,9 @@ emit_const_range(struct svga_context *svga,
(j - i) * 4 * sizeof(float));
i = j + 1;
+
+ svga->hud.num_const_updates++;
+
} else {
++i;
}
@@ -549,6 +554,7 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
void *src_map = NULL, *dst_map;
unsigned offset;
const struct svga_shader_variant *variant;
+ unsigned alloc_buf_size;
assert(shader == PIPE_SHADER_VERTEX ||
shader == PIPE_SHADER_GEOMETRY ||
@@ -613,7 +619,16 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
*/
new_buf_size = align(new_buf_size, 16);
- u_upload_alloc(svga->const0_upload, 0, new_buf_size,
+ /* Constant buffer size in the upload buffer must be in multiples of 256.
+ * In order to maximize the chance of merging the upload buffer chunks
+ * when svga_buffer_add_range() is called,
+ * the allocate buffer size needs to be in multiples of 256 as well.
+ * Otherwise, since there is gap between each dirty range of the upload buffer,
+ * each dirty range will end up in its own UPDATE_GB_IMAGE command.
+ */
+ alloc_buf_size = align(new_buf_size, CONST0_UPLOAD_ALIGNMENT);
+
+ u_upload_alloc(svga->const0_upload, 0, alloc_buf_size,
CONST0_UPLOAD_ALIGNMENT, &offset,
&dst_buffer, &dst_map);
if (!dst_map) {
@@ -664,6 +679,8 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
pipe_resource_reference(&dst_buffer, NULL);
+ svga->hud.num_const_buf_updates++;
+
return ret;
}
@@ -732,6 +749,8 @@ emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
size);
if (ret != PIPE_OK)
return ret;
+
+ svga->hud.num_const_buf_updates++;
}
svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs;
diff --git a/src/gallium/drivers/svga/svga_streamout.h b/src/gallium/drivers/svga/svga_streamout.h
index da0c445..1daa1ad 100644
--- a/src/gallium/drivers/svga/svga_streamout.h
+++ b/src/gallium/drivers/svga/svga_streamout.h
@@ -47,4 +47,7 @@ void
svga_delete_stream_output(struct svga_context *svga,
struct svga_stream_output *streamout);
+enum pipe_error
+svga_rebind_stream_output_targets(struct svga_context *svga);
+
#endif /* SVGA_STREAMOUT_H */
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
index ca4009b..204b814 100644
--- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -517,15 +517,15 @@ vs30_output(struct svga_shader_emitter *emit,
static ubyte
svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
{
- switch (emit->key.tex[idx].texture_target) {
- case PIPE_TEXTURE_1D:
+ switch (emit->sampler_target[idx]) {
+ case TGSI_TEXTURE_1D:
return SVGA3DSAMP_2D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_RECT:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
return SVGA3DSAMP_2D;
- case PIPE_TEXTURE_3D:
+ case TGSI_TEXTURE_3D:
return SVGA3DSAMP_VOLUME;
- case PIPE_TEXTURE_CUBE:
+ case TGSI_TEXTURE_CUBE:
return SVGA3DSAMP_CUBE;
}
@@ -585,6 +585,14 @@ svga_translate_decl_sm30( struct svga_shader_emitter *emit,
ok = ps30_output( emit, decl->Semantic, idx );
break;
+ case TGSI_FILE_SAMPLER_VIEW:
+ {
+ unsigned unit = decl->Range.First;
+ assert(decl->Range.First == decl->Range.Last);
+ emit->sampler_target[unit] = decl->SamplerView.Resource;
+ }
+ break;
+
default:
/* don't need to declare other vars */
ok = TRUE;
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 83f0c8b..7a593ba 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -136,6 +136,8 @@ struct svga_shader_emitter
int current_arl;
unsigned pstipple_sampler_unit;
+
+ uint8_t sampler_target[PIPE_MAX_SAMPLERS];
};
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 489e68f..3188c41 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -3849,7 +3849,7 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,
if (new_tokens) {
/* Setup texture state for stipple */
- emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+ emit->sampler_target[unit] = TGSI_TEXTURE_2D;
emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 0c5afeb..0d56282 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -134,6 +134,8 @@ struct svga_shader_emitter_v10
/* Samplers */
unsigned num_samplers;
+ ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */
+ ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */
/* Address regs (really implemented with temps) */
unsigned num_address_regs;
@@ -2312,9 +2314,13 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
return TRUE;
case TGSI_FILE_SAMPLER_VIEW:
- /* Not used at this time, but maybe in the future.
- * See emit_resource_declarations().
- */
+ {
+ unsigned unit = decl->Range.First;
+ assert(decl->Range.First == decl->Range.Last);
+ emit->sampler_target[unit] = decl->SamplerView.Resource;
+ /* Note: we can ignore YZW return types for now */
+ emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
+ }
return TRUE;
default:
@@ -2854,7 +2860,7 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
/* Texture buffer sizes */
for (i = 0; i < emit->num_samplers; i++) {
- if (emit->key.tex[i].texture_target == PIPE_BUFFER) {
+ if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
emit->texture_buffer_size_index[i] = total_consts++;
}
}
@@ -2918,30 +2924,44 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
/**
- * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
+ * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
*/
static unsigned
-pipe_texture_to_resource_dimension(unsigned target, bool msaa)
+tgsi_texture_to_resource_dimension(unsigned target, boolean is_array)
{
switch (target) {
- case PIPE_BUFFER:
+ case TGSI_TEXTURE_BUFFER:
return VGPU10_RESOURCE_DIMENSION_BUFFER;
- case PIPE_TEXTURE_1D:
+ case TGSI_TEXTURE_1D:
return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_RECT:
- return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
- : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
- case PIPE_TEXTURE_3D:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+ case TGSI_TEXTURE_3D:
return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
- case PIPE_TEXTURE_CUBE:
+ case TGSI_TEXTURE_CUBE:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
+ case TGSI_TEXTURE_SHADOW1D:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
+ : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
+ : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+ case TGSI_TEXTURE_SHADOWCUBE:
return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
- case PIPE_TEXTURE_1D_ARRAY:
- return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY;
- case PIPE_TEXTURE_2D_ARRAY:
- return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
- : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
- case PIPE_TEXTURE_CUBE_ARRAY:
+ case TGSI_TEXTURE_2D_MSAA:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
+ : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
+ case TGSI_TEXTURE_CUBE_ARRAY:
return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
default:
assert(!"Unexpected resource type");
@@ -2993,8 +3013,8 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
opcode0.value = 0;
opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
opcode0.resourceDimension =
- pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target,
- emit->key.tex[i].texture_msaa);
+ tgsi_texture_to_resource_dimension(emit->sampler_target[i],
+ emit->key.tex[i].is_array);
operand0.value = 0;
operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
@@ -3008,10 +3028,10 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
- assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT);
- rt = emit->key.tex[i].return_type + 1;
+ assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
+ rt = emit->sampler_return_type[i] + 1;
#else
- switch (emit->key.tex[i].return_type) {
+ switch (emit->sampler_return_type[i]) {
case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break;
@@ -5024,7 +5044,7 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
unsigned writemask_0 = 0, writemask_1 = 0;
- boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type);
+ boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
/* Swizzle w/out zero/one terms */
struct tgsi_full_src_register src_swizzled =
@@ -5131,7 +5151,7 @@ is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
boolean valid = TRUE;
if (tgsi_is_shadow_target(target) &&
- is_integer_type(emit->key.tex[unit].return_type)) {
+ is_integer_type(emit->sampler_return_type[unit])) {
debug_printf("Invalid SAMPLE_C with an integer texture!\n");
valid = FALSE;
}
@@ -5528,7 +5548,7 @@ emit_txq(struct svga_shader_emitter_v10 *emit,
{
const uint unit = inst->Src[1].Register.Index;
- if (emit->key.tex[unit].texture_target == PIPE_BUFFER) {
+ if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) {
/* RESINFO does not support querying texture buffers, so we instead
* store texture buffer sizes in shader constants, then copy them to
* implement TXQ instead of emitting RESINFO.
@@ -6617,7 +6637,7 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
emit->fs.pstipple_sampler_unit = unit;
/* Setup texture state for stipple */
- emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+ emit->sampler_target[unit] = TGSI_TEXTURE_2D;
emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
index 0ad6b5e..7da2c4e 100644
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -48,6 +48,7 @@ struct svga_winsys_screen;
struct svga_winsys_buffer;
struct pipe_screen;
struct pipe_context;
+struct pipe_debug_callback;
struct pipe_fence_handle;
struct pipe_resource;
struct svga_region;
@@ -286,6 +287,9 @@ struct svga_winsys_context
struct svga_winsys_surface *surface,
struct svga_winsys_gb_shader *shader,
unsigned flags);
+
+ /** To report perf/conformance/etc issues to the state tracker */
+ struct pipe_debug_callback *debug_callback;
};
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
index c8cb145..78b8fdf 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -129,7 +129,7 @@ swr_transfer_map(struct pipe_context *pipe,
swr_fence_submit(swr_context(pipe), screen->flush_fence);
swr_fence_finish(pipe->screen, screen->flush_fence, 0);
- swr_resource_unused(pipe, spr);
+ swr_resource_unused(resource);
}
}
}
@@ -206,8 +206,8 @@ swr_resource_copy(struct pipe_context *pipe,
swr_store_dirty_resource(pipe, dst, SWR_TILE_RESOLVED);
swr_fence_finish(pipe->screen, screen->flush_fence, 0);
- swr_resource_unused(pipe, swr_resource(src));
- swr_resource_unused(pipe, swr_resource(dst));
+ swr_resource_unused(src);
+ swr_resource_unused(dst);
if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER)
|| (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) {
@@ -293,6 +293,7 @@ static void
swr_destroy(struct pipe_context *pipe)
{
struct swr_context *ctx = swr_context(pipe);
+ struct swr_screen *screen = swr_screen(pipe->screen);
if (ctx->blitter)
util_blitter_destroy(ctx->blitter);
@@ -306,6 +307,9 @@ swr_destroy(struct pipe_context *pipe)
swr_destroy_scratch_buffers(ctx);
+ assert(screen);
+ screen->pipe = NULL;
+
FREE(ctx);
}
@@ -324,9 +328,10 @@ swr_render_condition(struct pipe_context *pipe,
}
struct pipe_context *
-swr_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
+swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
{
struct swr_context *ctx = CALLOC_STRUCT(swr_context);
+ struct swr_screen *screen = swr_screen(p_screen);
ctx->blendJIT =
new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;
@@ -347,7 +352,8 @@ swr_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
if (ctx->swrContext == NULL)
goto fail;
- ctx->pipe.screen = screen;
+ screen->pipe = &ctx->pipe;
+ ctx->pipe.screen = p_screen;
ctx->pipe.destroy = swr_destroy;
ctx->pipe.priv = priv;
ctx->pipe.create_surface = swr_create_surface;
diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h
index 2fdc768..59cf028 100644
--- a/src/gallium/drivers/swr/swr_resource.h
+++ b/src/gallium/drivers/swr/swr_resource.h
@@ -54,9 +54,6 @@ struct swr_resource {
unsigned mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
enum swr_resource_status status;
-
- /* pipe_context to which resource is currently bound. */
- struct pipe_context *bound_to_context;
};
@@ -120,24 +117,21 @@ swr_resource_status & operator|=(enum swr_resource_status & a,
}
static INLINE void
-swr_resource_read(struct pipe_context *pipe, struct swr_resource *resource)
+swr_resource_read(struct pipe_resource *resource)
{
- resource->status |= SWR_RESOURCE_READ;
- resource->bound_to_context = pipe;
+ swr_resource(resource)->status |= SWR_RESOURCE_READ;
}
static INLINE void
-swr_resource_write(struct pipe_context *pipe, struct swr_resource *resource)
+swr_resource_write(struct pipe_resource *resource)
{
- resource->status |= SWR_RESOURCE_WRITE;
- resource->bound_to_context = pipe;
+ swr_resource(resource)->status |= SWR_RESOURCE_WRITE;
}
static INLINE void
-swr_resource_unused(struct pipe_context *pipe, struct swr_resource *resource)
+swr_resource_unused(struct pipe_resource *resource)
{
- resource->status = SWR_RESOURCE_UNUSED;
- resource->bound_to_context = nullptr;
+ swr_resource(resource)->status = SWR_RESOURCE_UNUSED;
}
#endif
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
index e46df47..f9e52be 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -620,7 +620,7 @@ swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt)
{
struct swr_screen *screen = swr_screen(p_screen);
struct swr_resource *spr = swr_resource(pt);
- struct pipe_context *pipe = spr->bound_to_context;
+ struct pipe_context *pipe = screen->pipe;
/* Only wait on fence if the resource is being used */
if (pipe && spr->status) {
@@ -630,7 +630,7 @@ swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt)
swr_fence_submit(swr_context(pipe), screen->flush_fence);
swr_fence_finish(p_screen, screen->flush_fence, 0);
- swr_resource_unused(pipe, spr);
+ swr_resource_unused(pt);
}
/*
@@ -661,11 +661,11 @@ swr_flush_frontbuffer(struct pipe_screen *p_screen,
struct swr_screen *screen = swr_screen(p_screen);
struct sw_winsys *winsys = screen->winsys;
struct swr_resource *spr = swr_resource(resource);
- struct pipe_context *pipe = spr->bound_to_context;
+ struct pipe_context *pipe = screen->pipe;
if (pipe) {
swr_fence_finish(p_screen, screen->flush_fence, 0);
- swr_resource_unused(pipe, spr);
+ swr_resource_unused(resource);
SwrEndFrame(swr_context(pipe)->swrContext);
}
diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h
index a96dc44..0c82a2e 100644
--- a/src/gallium/drivers/swr/swr_screen.h
+++ b/src/gallium/drivers/swr/swr_screen.h
@@ -32,6 +32,7 @@ struct sw_winsys;
struct swr_screen {
struct pipe_screen base;
+ struct pipe_context *pipe;
struct pipe_fence_handle *flush_fence;
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index 47ee3cb..e7bf361 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -646,24 +646,24 @@ swr_update_resource_status(struct pipe_context *pipe,
if (fb->nr_cbufs)
for (uint32_t i = 0; i < fb->nr_cbufs; ++i)
if (fb->cbufs[i])
- swr_resource_write(pipe, swr_resource(fb->cbufs[i]->texture));
+ swr_resource_write(fb->cbufs[i]->texture);
/* depth/stencil target */
if (fb->zsbuf)
- swr_resource_write(pipe, swr_resource(fb->zsbuf->texture));
+ swr_resource_write(fb->zsbuf->texture);
/* VBO vertex buffers */
for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) {
struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
if (!vb->user_buffer)
- swr_resource_read(pipe, swr_resource(vb->buffer));
+ swr_resource_read(vb->buffer);
}
/* VBO index buffer */
if (p_draw_info && p_draw_info->indexed) {
struct pipe_index_buffer *ib = &ctx->index_buffer;
if (!ib->user_buffer)
- swr_resource_read(pipe, swr_resource(ib->buffer));
+ swr_resource_read(ib->buffer);
}
/* texture sampler views */
@@ -671,7 +671,7 @@ swr_update_resource_status(struct pipe_context *pipe,
struct pipe_sampler_view *view =
ctx->sampler_views[PIPE_SHADER_FRAGMENT][i];
if (view)
- swr_resource_read(pipe, swr_resource(view->texture));
+ swr_resource_read(view->texture);
}
}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index a13e309..49a314c 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -62,7 +62,7 @@ vc4_nir_get_dst_color(nir_builder *b, int sample)
load->num_components = 1;
load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT + sample;
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
return &load->dest.ssa;
}
@@ -627,7 +627,7 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr_create(b->shader,
nir_intrinsic_load_sample_mask_in);
load->num_components = 1;
- nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
nir_ssa_def *bitmask = &load->dest.ssa;
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index d47e3bf..d08ad58 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -183,7 +183,7 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
* with an offset value of 0.
*/
assert(nir_src_as_const_value(intr->src[0]) &&
- nir_src_as_const_value(intr->src[0])->u[0] == 0);
+ nir_src_as_const_value(intr->src[0])->u32[0] == 0);
/* Generate dword loads for the VPM values (Since these intrinsics may
* be reordered, the actual reads will be generated at the top of the
@@ -197,7 +197,7 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
intr_comp->num_components = 1;
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &intr_comp->instr);
vpm_reads[i] = &intr_comp->dest.ssa;
@@ -256,7 +256,7 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
* with an offset value of 0.
*/
assert(nir_src_as_const_value(intr->src[0]) &&
- nir_src_as_const_value(intr->src[0])->u[0] == 0);
+ nir_src_as_const_value(intr->src[0])->u32[0] == 0);
/* Generate scalar loads equivalent to the original VEC4. */
nir_ssa_def *dests[4];
@@ -267,7 +267,7 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &intr_comp->instr);
dests[i] = &intr_comp->dest.ssa;
@@ -339,7 +339,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
* with an offset value of 0.
*/
assert(nir_src_as_const_value(intr->src[1]) &&
- nir_src_as_const_value(intr->src[1])->u[0] == 0);
+ nir_src_as_const_value(intr->src[1])->u32[0] == 0);
b->cursor = nir_before_instr(&intr->instr);
@@ -378,7 +378,7 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr_comp =
nir_intrinsic_instr_create(c->s, intr->intrinsic);
intr_comp->num_components = 1;
- nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
/* Convert the uniform (not user_clip_plane) offset to bytes.
* If it happens to be a constant, constant-folding will clean
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
index f6ba5b8..a2d89ef 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
@@ -123,7 +123,7 @@ vc4_nir_lower_txf_ms_instr(struct vc4_compile *c, nir_builder *b,
txf->src[0].src_type = nir_tex_src_coord;
txf->src[0].src = nir_src_for_ssa(nir_vec2(b, addr, nir_imm_int(b, 0)));
- nir_ssa_dest_init(&txf->instr, &txf->dest, 4, NULL);
+ nir_ssa_dest_init(&txf->instr, &txf->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &txf->instr);
nir_ssa_def_rewrite_uses(&txf_ms->dest.ssa,
nir_src_for_ssa(&txf->dest.ssa));
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index f5826d8..71a1ebbb 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -118,7 +118,7 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
intr->const_index[0] = (VC4_NIR_STATE_UNIFORM_OFFSET + contents) * 4;
intr->num_components = 1;
intr->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL);
+ nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &intr->instr);
return &intr->dest.ssa;
}
@@ -885,7 +885,9 @@ ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest,
struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0);
struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1);
- if (nir_op_infos[compare_instr->op].input_types[0] == nir_type_float)
+ unsigned unsized_type =
+ nir_alu_type_get_base_type(nir_op_infos[compare_instr->op].input_types[0]);
+ if (unsized_type == nir_type_float)
qir_SF(c, qir_FSUB(c, src0, src1));
else
qir_SF(c, qir_SUB(c, src0, src1));
@@ -1519,7 +1521,7 @@ ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr)
{
struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
for (int i = 0; i < instr->def.num_components; i++)
- qregs[i] = qir_uniform_ui(c, instr->value.u[i]);
+ qregs[i] = qir_uniform_ui(c, instr->value.u32[i]);
_mesa_hash_table_insert(c->def_ht, &instr->def, qregs);
}
@@ -1553,7 +1555,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
assert(instr->num_components == 1);
const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
- offset = instr->const_index[0] + const_offset->u[0];
+ offset = instr->const_index[0] + const_offset->u32[0];
assert(offset % 4 == 0);
/* We need dwords */
offset = offset / 4;
@@ -1584,7 +1586,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
const_offset = nir_src_as_const_value(instr->src[0]);
assert(const_offset && "vc4 doesn't support indirect inputs");
if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) {
- assert(const_offset->u[0] == 0);
+ assert(const_offset->u32[0] == 0);
/* Reads of the per-sample color need to be done in
* order.
*/
@@ -1598,7 +1600,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
}
*dest = c->color_reads[sample_index];
} else {
- offset = instr->const_index[0] + const_offset->u[0];
+ offset = instr->const_index[0] + const_offset->u32[0];
*dest = c->inputs[offset];
}
break;
@@ -1606,7 +1608,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_store_output:
const_offset = nir_src_as_const_value(instr->src[1]);
assert(const_offset && "vc4 doesn't support indirect outputs");
- offset = instr->const_index[0] + const_offset->u[0];
+ offset = instr->const_index[0] + const_offset->u32[0];
/* MSAA color outputs are the only case where we have an
* output that's not lowered to being a store of a single 32
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index bdd76ab..8257b4a 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -360,6 +360,14 @@ enum pipe_flush_flags
#define PIPE_BARRIER_MAPPED_BUFFER (1 << 0)
#define PIPE_BARRIER_SHADER_BUFFER (1 << 1)
#define PIPE_BARRIER_QUERY_BUFFER (1 << 2)
+#define PIPE_BARRIER_VERTEX_BUFFER (1 << 3)
+#define PIPE_BARRIER_INDEX_BUFFER (1 << 4)
+#define PIPE_BARRIER_CONSTANT_BUFFER (1 << 5)
+#define PIPE_BARRIER_INDIRECT_BUFFER (1 << 6)
+#define PIPE_BARRIER_TEXTURE (1 << 7)
+#define PIPE_BARRIER_IMAGE (1 << 8)
+#define PIPE_BARRIER_FRAMEBUFFER (1 << 9)
+#define PIPE_BARRIER_STREAMOUT_BUFFER (1 << 10)
/**
* Resource binding flags -- state tracker must specify in advance all
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 7a34841..5cc18a2 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -117,6 +117,12 @@ enum tgsi_file_type {
#define TGSI_CYLINDRICAL_WRAP_Z (1 << 2)
#define TGSI_CYLINDRICAL_WRAP_W (1 << 3)
+#define TGSI_MEMORY_TYPE_GLOBAL 0 /* OpenCL global */
+#define TGSI_MEMORY_TYPE_SHARED 1 /* OpenCL local / GLSL shared */
+#define TGSI_MEMORY_TYPE_PRIVATE 2 /* OpenCL private */
+#define TGSI_MEMORY_TYPE_INPUT 3 /* OpenCL kernel input params */
+#define TGSI_MEMORY_TYPE_COUNT 4
+
struct tgsi_declaration
{
unsigned Type : 4; /**< TGSI_TOKEN_TYPE_DECLARATION */
@@ -130,8 +136,8 @@ struct tgsi_declaration
unsigned Local : 1; /**< optimize as subroutine local variable? */
unsigned Array : 1; /**< extra array info? */
unsigned Atomic : 1; /**< atomic only? for TGSI_FILE_BUFFER */
- unsigned Shared : 1; /**< shared storage for TGSI_FILE_MEMORY */
- unsigned Padding : 4;
+ unsigned MemType : 2; /**< TGSI_MEMORY_TYPE_x for TGSI_FILE_MEMORY */
+ unsigned Padding : 3;
};
struct tgsi_declaration_range
@@ -231,15 +237,6 @@ struct tgsi_declaration_array {
unsigned Padding : 22;
};
-/*
- * Special resources that don't need to be declared. They map to the
- * GLOBAL/LOCAL/PRIVATE/INPUT compute memory spaces.
- */
-#define TGSI_RESOURCE_GLOBAL 0x7fff
-#define TGSI_RESOURCE_LOCAL 0x7ffe
-#define TGSI_RESOURCE_PRIVATE 0x7ffd
-#define TGSI_RESOURCE_INPUT 0x7ffc
-
#define TGSI_IMM_FLOAT32 0
#define TGSI_IMM_UINT32 1
#define TGSI_IMM_INT32 2
@@ -278,7 +275,8 @@ union tgsi_immediate_data
#define TGSI_PROPERTY_NUM_CLIPDIST_ENABLED 15
#define TGSI_PROPERTY_NUM_CULLDIST_ENABLED 16
#define TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL 17
-#define TGSI_PROPERTY_COUNT 18
+#define TGSI_PROPERTY_NEXT_SHADER 18
+#define TGSI_PROPERTY_COUNT 19
struct tgsi_property {
unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */
diff --git a/src/gallium/include/state_tracker/drm_driver.h b/src/gallium/include/state_tracker/drm_driver.h
index 959a762..fefab11 100644
--- a/src/gallium/include/state_tracker/drm_driver.h
+++ b/src/gallium/include/state_tracker/drm_driver.h
@@ -26,6 +26,11 @@ struct winsys_handle
*/
unsigned type;
/**
+ * Input for texture_get_handle, allows to export the offset
+ * of a specific layer of an array texture.
+ */
+ unsigned layer;
+ /**
* Input to texture_from_handle.
* Output for texture_get_handle.
*/
@@ -35,6 +40,11 @@ struct winsys_handle
* Output for texture_get_handle.
*/
unsigned stride;
+ /**
+ * Input to texture_from_handle.
+ * Output for texture_get_handle.
+ */
+ unsigned offset;
};
diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index 7f7fbc4..fb0a180 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -534,6 +534,7 @@ dri2_allocate_textures(struct dri_context *ctx,
templ.bind = bind;
whandle.handle = buf->name;
whandle.stride = buf->pitch;
+ whandle.offset = 0;
if (screen->can_share_buffer)
whandle.type = DRM_API_HANDLE_TYPE_SHARED;
else
@@ -756,6 +757,7 @@ dri2_create_image_from_winsys(__DRIscreen *_screen,
templ.array_size = 1;
whandle->stride = pitch * util_format_get_blocksize(pf);
+ whandle->offset = 0;
img->texture = screen->base.screen->resource_from_handle(screen->base.screen,
&templ, whandle, PIPE_HANDLE_USAGE_READ_WRITE);
diff --git a/src/gallium/state_trackers/omx/vid_dec.c b/src/gallium/state_trackers/omx/vid_dec.c
index 5584348..108a460 100644
--- a/src/gallium/state_trackers/omx/vid_dec.c
+++ b/src/gallium/state_trackers/omx/vid_dec.c
@@ -140,7 +140,7 @@ static OMX_ERRORTYPE vid_dec_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
r = omx_base_filter_Constructor(comp, name);
if (r)
- return r;
+ return r;
priv->profile = PIPE_VIDEO_PROFILE_UNKNOWN;
@@ -268,7 +268,7 @@ static OMX_ERRORTYPE vid_dec_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE i
r = checkHeader(param, sizeof(OMX_PARAM_COMPONENTROLETYPE));
if (r)
return r;
-
+
if (!strcmp((char *)role->cRole, OMX_VID_DEC_MPEG2_ROLE)) {
priv->profile = PIPE_VIDEO_PROFILE_MPEG2_MAIN;
} else if (!strcmp((char *)role->cRole, OMX_VID_DEC_AVC_ROLE)) {
@@ -321,7 +321,7 @@ static OMX_ERRORTYPE vid_dec_GetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE i
strcpy((char *)role->cRole, OMX_VID_DEC_MPEG2_ROLE);
else if (priv->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH)
strcpy((char *)role->cRole, OMX_VID_DEC_AVC_ROLE);
-
+
break;
}
@@ -419,6 +419,7 @@ static OMX_ERRORTYPE vid_dec_DecodeBuffer(omx_base_PortType *port, OMX_BUFFERHEA
priv->in_buffers[i] = buf;
priv->sizes[i] = buf->nFilledLen;
priv->inputs[i] = buf->pBuffer;
+ priv->timestamps[i] = buf->nTimeStamp;
while (priv->num_in_buffers > (!!(buf->nFlags & OMX_BUFFERFLAG_EOS) ? 0 : 1)) {
bool eos = !!(priv->in_buffers[0]->nFlags & OMX_BUFFERFLAG_EOS);
@@ -469,12 +470,13 @@ static OMX_ERRORTYPE vid_dec_DecodeBuffer(omx_base_PortType *port, OMX_BUFFERHEA
priv->in_buffers[0] = priv->in_buffers[1];
priv->sizes[0] = priv->sizes[1] - delta;
priv->inputs[0] = priv->inputs[1] + delta;
+ priv->timestamps[0] = priv->timestamps[1];
}
if (r)
return r;
}
-
+
return OMX_ErrorNone;
}
@@ -513,7 +515,7 @@ static void vid_dec_FillOutput(vid_dec_PrivateType *priv, struct pipe_video_buff
box.width = def->nFrameWidth / 2;
box.height = def->nFrameHeight / 2;
-
+
src = priv->pipe->transfer_map(priv->pipe, views[1]->texture, 0,
PIPE_TRANSFER_READ, &box, &transfer);
util_copy_rect(dst, views[1]->texture->format, def->nStride, 0, 0,
@@ -526,9 +528,13 @@ static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE*
{
vid_dec_PrivateType *priv = comp->pComponentPrivate;
bool eos = !!(input->nFlags & OMX_BUFFERFLAG_EOS);
+ OMX_TICKS timestamp;
- if (!input->pInputPortPrivate)
- input->pInputPortPrivate = priv->Flush(priv);
+ if (!input->pInputPortPrivate) {
+ input->pInputPortPrivate = priv->Flush(priv, &timestamp);
+ if (timestamp != OMX_VID_DEC_TIMESTAMP_INVALID)
+ input->nTimeStamp = timestamp;
+ }
if (input->pInputPortPrivate) {
if (output->pInputPortPrivate) {
@@ -539,6 +545,7 @@ static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE*
vid_dec_FillOutput(priv, input->pInputPortPrivate, output);
}
output->nFilledLen = output->nAllocLen;
+ output->nTimeStamp = input->nTimeStamp;
}
if (eos && input->pInputPortPrivate)
diff --git a/src/gallium/state_trackers/omx/vid_dec.h b/src/gallium/state_trackers/omx/vid_dec.h
index 3b39826..649d745 100644
--- a/src/gallium/state_trackers/omx/vid_dec.h
+++ b/src/gallium/state_trackers/omx/vid_dec.h
@@ -59,6 +59,8 @@
#define OMX_VID_DEC_AVC_NAME "OMX.mesa.video_decoder.avc"
#define OMX_VID_DEC_AVC_ROLE "video_decoder.avc"
+#define OMX_VID_DEC_TIMESTAMP_INVALID ((OMX_TICKS) -1)
+
struct vl_vlc;
DERIVEDCLASS(vid_dec_PrivateType, omx_base_filter_PrivateType)
@@ -69,7 +71,7 @@ DERIVEDCLASS(vid_dec_PrivateType, omx_base_filter_PrivateType)
struct pipe_video_codec *codec; \
void (*Decode)(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left); \
void (*EndFrame)(vid_dec_PrivateType *priv); \
- struct pipe_video_buffer *(*Flush)(vid_dec_PrivateType *priv); \
+ struct pipe_video_buffer *(*Flush)(vid_dec_PrivateType *priv, OMX_TICKS *timestamp); \
struct pipe_video_buffer *target, *shadow; \
union { \
struct { \
@@ -100,6 +102,9 @@ DERIVEDCLASS(vid_dec_PrivateType, omx_base_filter_PrivateType)
OMX_BUFFERHEADERTYPE *in_buffers[2]; \
const void *inputs[2]; \
unsigned sizes[2]; \
+ OMX_TICKS timestamps[2]; \
+ OMX_TICKS timestamp; \
+ bool first_buf_in_frame; \
bool frame_finished; \
bool frame_started; \
unsigned bytes_left; \
diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c
index b453682..9aab6d1 100644
--- a/src/gallium/state_trackers/omx/vid_dec_h264.c
+++ b/src/gallium/state_trackers/omx/vid_dec_h264.c
@@ -45,6 +45,7 @@
struct dpb_list {
struct list_head list;
struct pipe_video_buffer *buffer;
+ OMX_TICKS timestamp;
unsigned poc;
};
@@ -82,7 +83,7 @@ static const uint8_t Default_8x8_Inter[64] = {
static void vid_dec_h264_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left);
static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv);
-static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv);
+static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp);
void vid_dec_h264_Init(vid_dec_PrivateType *priv)
{
@@ -91,9 +92,10 @@ void vid_dec_h264_Init(vid_dec_PrivateType *priv)
priv->Decode = vid_dec_h264_Decode;
priv->EndFrame = vid_dec_h264_EndFrame;
priv->Flush = vid_dec_h264_Flush;
-
+
LIST_INITHEAD(&priv->codec_data.h264.dpb_list);
priv->picture.h264.field_order_cnt[0] = priv->picture.h264.field_order_cnt[1] = INT_MAX;
+ priv->first_buf_in_frame = true;
}
static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv)
@@ -104,6 +106,9 @@ static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv)
return;
vid_dec_NeedTarget(priv);
+ if (priv->first_buf_in_frame)
+ priv->timestamp = priv->timestamps[0];
+ priv->first_buf_in_frame = false;
priv->picture.h264.num_ref_frames = priv->picture.h264.pps->sps->max_num_ref_frames;
@@ -127,7 +132,8 @@ static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv)
priv->frame_started = true;
}
-static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv)
+static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv,
+ OMX_TICKS *timestamp)
{
struct dpb_list *entry, *result = NULL;
struct pipe_video_buffer *buf;
@@ -146,6 +152,8 @@ static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv)
return NULL;
buf = result->buffer;
+ if (timestamp)
+ *timestamp = result->timestamp;
--priv->codec_data.h264.dpb_num;
LIST_DEL(&result->list);
@@ -159,6 +167,7 @@ static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv)
struct dpb_list *entry;
struct pipe_video_buffer *tmp;
bool top_field_first;
+ OMX_TICKS timestamp;
if (!priv->frame_started)
return;
@@ -181,7 +190,9 @@ static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv)
if (!entry)
return;
+ priv->first_buf_in_frame = true;
entry->buffer = priv->target;
+ entry->timestamp = priv->timestamp;
entry->poc = MIN2(priv->picture.h264.field_order_cnt[0], priv->picture.h264.field_order_cnt[1]);
LIST_ADDTAIL(&entry->list, &priv->codec_data.h264.dpb_list);
++priv->codec_data.h264.dpb_num;
@@ -192,7 +203,8 @@ static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv)
return;
tmp = priv->in_buffers[0]->pInputPortPrivate;
- priv->in_buffers[0]->pInputPortPrivate = vid_dec_h264_Flush(priv);
+ priv->in_buffers[0]->pInputPortPrivate = vid_dec_h264_Flush(priv, &timestamp);
+ priv->in_buffers[0]->nTimeStamp = timestamp;
priv->target = tmp;
priv->frame_finished = priv->in_buffers[0]->pInputPortPrivate != NULL;
}
@@ -829,7 +841,7 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
priv->picture.h264.field_order_cnt[0] = expectedPicOrderCnt + priv->codec_data.h264.delta_pic_order_cnt[0];
priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt[0] +
sps->offset_for_top_to_bottom_field + priv->codec_data.h264.delta_pic_order_cnt[1];
-
+
} else if (!priv->picture.h264.bottom_field_flag)
priv->picture.h264.field_order_cnt[0] = expectedPicOrderCnt + priv->codec_data.h264.delta_pic_order_cnt[0];
else
@@ -859,7 +871,7 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
if (!priv->picture.h264.field_pic_flag) {
priv->picture.h264.field_order_cnt[0] = tempPicOrderCnt;
priv->picture.h264.field_order_cnt[1] = tempPicOrderCnt;
-
+
} else if (!priv->picture.h264.bottom_field_flag)
priv->picture.h264.field_order_cnt[0] = tempPicOrderCnt;
else
@@ -876,7 +888,7 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
priv->picture.h264.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
priv->picture.h264.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
-
+
if (slice_type == PIPE_H264_SLICE_TYPE_P ||
slice_type == PIPE_H264_SLICE_TYPE_SP ||
slice_type == PIPE_H264_SLICE_TYPE_B) {
diff --git a/src/gallium/state_trackers/omx/vid_dec_mpeg12.c b/src/gallium/state_trackers/omx/vid_dec_mpeg12.c
index bef83ec..7b2df8f4 100644
--- a/src/gallium/state_trackers/omx/vid_dec_mpeg12.c
+++ b/src/gallium/state_trackers/omx/vid_dec_mpeg12.c
@@ -61,7 +61,7 @@ static uint8_t default_non_intra_matrix[64] = {
static void vid_dec_mpeg12_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left);
static void vid_dec_mpeg12_EndFrame(vid_dec_PrivateType *priv);
-static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv);
+static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp);
void vid_dec_mpeg12_Init(vid_dec_PrivateType *priv)
{
@@ -131,10 +131,12 @@ static void vid_dec_mpeg12_EndFrame(vid_dec_PrivateType *priv)
priv->in_buffers[0]->pInputPortPrivate = done;
}
-static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv)
+static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp)
{
struct pipe_video_buffer *result = priv->picture.mpeg12.ref[1];
priv->picture.mpeg12.ref[1] = NULL;
+ if (timestamp)
+ *timestamp = OMX_VID_DEC_TIMESTAMP_INVALID;
return result;
}
diff --git a/src/gallium/state_trackers/omx/vid_enc.c b/src/gallium/state_trackers/omx/vid_enc.c
index df22a97..4505fe1 100644
--- a/src/gallium/state_trackers/omx/vid_enc.c
+++ b/src/gallium/state_trackers/omx/vid_enc.c
@@ -179,7 +179,7 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
if (!screen->get_video_param(screen, PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_SUPPORTED))
return OMX_ErrorBadParameter;
-
+
priv->stacked_frames_num = screen->get_video_param(screen,
PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
PIPE_VIDEO_ENTRYPOINT_ENCODE,
@@ -242,7 +242,7 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
port->Port_AllocateBuffer = vid_enc_AllocateOutBuffer;
port->Port_FreeBuffer = vid_enc_FreeOutBuffer;
-
+
priv->bitrate.eControlRate = OMX_Video_ControlRateDisable;
priv->bitrate.nTargetBitrate = 0;
@@ -253,7 +253,7 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
priv->profile_level.eProfile = OMX_VIDEO_AVCProfileBaseline;
priv->profile_level.eLevel = OMX_VIDEO_AVCLevel42;
- priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
+ priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
priv->frame_num = 0;
priv->pic_order_cnt = 0;
priv->restricted_b_frames = debug_get_bool_option("OMX_USE_RESTRICTED_B_FRAMES", FALSE);
@@ -380,7 +380,7 @@ static OMX_ERRORTYPE vid_enc_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE i
port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX];
port->sPortParam.nBufferSize = framesize * 512 / (16*16);
-
+
priv->frame_rate = def->format.video.xFramerate;
priv->callbacks->EventHandler(comp, priv->callbackData, OMX_EventPortSettingsChanged,
@@ -532,10 +532,10 @@ static OMX_ERRORTYPE vid_enc_SetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx,
vid_enc_PrivateType *priv = comp->pComponentPrivate;
OMX_ERRORTYPE r;
int i;
-
+
if (!config)
return OMX_ErrorBadParameter;
-
+
switch(idx) {
case OMX_IndexConfigVideoIntraVOPRefresh: {
OMX_CONFIG_INTRAREFRESHVOPTYPE *type = config;
@@ -543,9 +543,9 @@ static OMX_ERRORTYPE vid_enc_SetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx,
r = checkHeader(config, sizeof(OMX_CONFIG_INTRAREFRESHVOPTYPE));
if (r)
return r;
-
+
priv->force_pic_type = *type;
-
+
break;
}
case OMX_IndexConfigCommonScale: {
@@ -568,11 +568,11 @@ static OMX_ERRORTYPE vid_enc_SetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx,
priv->scale = *scale;
if (priv->scale.xWidth != 0xffffffff && priv->scale.xHeight != 0xffffffff) {
struct pipe_video_buffer templat = {};
-
+
templat.buffer_format = PIPE_FORMAT_NV12;
templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
- templat.width = priv->scale.xWidth;
- templat.height = priv->scale.xHeight;
+ templat.width = priv->scale.xWidth;
+ templat.height = priv->scale.xHeight;
templat.interlaced = false;
for (i = 0; i < OMX_VID_ENC_NUM_SCALING_BUFFERS; ++i) {
priv->scale_buffer[i] = priv->s_pipe->create_video_buffer(priv->s_pipe, &templat);
@@ -615,7 +615,7 @@ static OMX_ERRORTYPE vid_enc_GetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx,
default:
return omx_base_component_GetConfig(handle, idx, config);
}
-
+
return OMX_ErrorNone;
}
@@ -1010,10 +1010,10 @@ static void enc_ControlPicture(omx_base_PortType *port, struct pipe_h264_enc_pic
switch (priv->bitrate.eControlRate) {
case OMX_Video_ControlRateVariable:
rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE;
- break;
+ break;
case OMX_Video_ControlRateConstant:
rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT;
- break;
+ break;
case OMX_Video_ControlRateVariableSkipFrames:
rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP;
break;
@@ -1023,8 +1023,8 @@ static void enc_ControlPicture(omx_base_PortType *port, struct pipe_h264_enc_pic
default:
rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE;
break;
- }
-
+ }
+
rate_ctrl->frame_rate_den = OMX_VID_ENC_CONTROL_FRAME_RATE_DEN_DEFAULT;
rate_ctrl->frame_rate_num = ((priv->frame_rate) >> 16) * rate_ctrl->frame_rate_den;
@@ -1035,7 +1035,7 @@ static void enc_ControlPicture(omx_base_PortType *port, struct pipe_h264_enc_pic
rate_ctrl->target_bitrate = priv->bitrate.nTargetBitrate;
else
rate_ctrl->target_bitrate = OMX_VID_ENC_BITRATE_MAX;
- rate_ctrl->peak_bitrate = rate_ctrl->target_bitrate;
+ rate_ctrl->peak_bitrate = rate_ctrl->target_bitrate;
if (rate_ctrl->target_bitrate < OMX_VID_ENC_BITRATE_MEDIAN)
rate_ctrl->vbv_buffer_size = MIN2((rate_ctrl->target_bitrate * 2.75), OMX_VID_ENC_BITRATE_MEDIAN);
else
@@ -1051,7 +1051,7 @@ static void enc_ControlPicture(omx_base_PortType *port, struct pipe_h264_enc_pic
rate_ctrl->peak_bits_picture_integer = rate_ctrl->target_bits_picture;
rate_ctrl->peak_bits_picture_fraction = 0;
}
-
+
picture->quant_i_frames = priv->quant.nQpI;
picture->quant_p_frames = priv->quant.nQpP;
picture->quant_b_frames = priv->quant.nQpB;
@@ -1069,7 +1069,7 @@ static void enc_HandleTask(omx_base_PortType *port, struct encode_task *task,
unsigned size = priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX]->sPortParam.nBufferSize;
struct pipe_video_buffer *vbuf = task->buf;
struct pipe_h264_enc_picture_desc picture = {};
-
+
/* -------------- scale input image --------- */
enc_ScaleInput(port, &vbuf, &size);
priv->s_pipe->flush(priv->s_pipe, NULL, 0);
@@ -1160,7 +1160,7 @@ static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEAD
priv->force_pic_type.IntraRefreshVOP) {
enc_ClearBframes(port, inp);
picture_type = PIPE_H264_ENC_PICTURE_TYPE_IDR;
- priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
+ priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
priv->frame_num = 0;
} else if (priv->codec->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE ||
!(priv->pic_order_cnt % OMX_VID_ENC_P_PERIOD_DEFAULT) ||
@@ -1169,7 +1169,7 @@ static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEAD
} else {
picture_type = PIPE_H264_ENC_PICTURE_TYPE_B;
}
-
+
task->pic_order_cnt = priv->pic_order_cnt++;
if (picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
@@ -1245,7 +1245,7 @@ static void vid_enc_BufferEncoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE*
output->pBuffer = priv->t_pipe->transfer_map(priv->t_pipe, outp->bitstream, 0,
PIPE_TRANSFER_READ_WRITE,
&box, &outp->transfer);
-
+
/* ------------- get size of result ----------------- */
priv->codec->get_feedback(priv->codec, task->feedback, &size);
diff --git a/src/gallium/tests/graw/quad-tex.c b/src/gallium/tests/graw/quad-tex.c
index 5f90166..8a9d1b8 100644
--- a/src/gallium/tests/graw/quad-tex.c
+++ b/src/gallium/tests/graw/quad-tex.c
@@ -92,6 +92,7 @@ static void set_fragment_shader( void )
"DCL OUT[0], COLOR\n"
"DCL TEMP[0]\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], 2D, FLOAT\n"
" 0: TXP TEMP[0], IN[0], SAMP[0], 2D\n"
" 1: MOV OUT[0], TEMP[0]\n"
" 2: END\n";
diff --git a/src/gallium/tests/graw/tex-srgb.c b/src/gallium/tests/graw/tex-srgb.c
index af989d7..3b43bcb 100644
--- a/src/gallium/tests/graw/tex-srgb.c
+++ b/src/gallium/tests/graw/tex-srgb.c
@@ -108,6 +108,7 @@ static void set_fragment_shader( void )
"DCL OUT[0], COLOR\n"
"DCL TEMP[0]\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], 2D, FLOAT\n"
" 0: TXP TEMP[0], IN[0], SAMP[0], 2D\n"
" 1: MOV OUT[0], TEMP[0]\n"
" 2: END\n";
diff --git a/src/gallium/tests/graw/tex-swizzle.c b/src/gallium/tests/graw/tex-swizzle.c
index e45b848..8b472c9 100644
--- a/src/gallium/tests/graw/tex-swizzle.c
+++ b/src/gallium/tests/graw/tex-swizzle.c
@@ -89,6 +89,7 @@ static void set_fragment_shader(void)
"DCL IN[0], GENERIC[0], PERSPECTIVE\n"
"DCL OUT[0], COLOR\n"
"DCL SAMP[0]\n"
+ "DCL SVIEW[0], 2D, FLOAT\n"
" 0: TXP OUT[0], IN[0], SAMP[0], 2D\n"
" 2: END\n";
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index b670f26..c79bed4 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -519,7 +519,8 @@ amdgpu_bo_create(struct radeon_winsys *rws,
static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
struct winsys_handle *whandle,
- unsigned *stride)
+ unsigned *stride,
+ unsigned *offset)
{
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
struct amdgpu_winsys_bo *bo;
@@ -587,6 +588,8 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
if (stride)
*stride = whandle->stride;
+ if (offset)
+ *offset = whandle->offset;
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
ws->allocated_vram += align(bo->base.size, ws->gart_page_size);
@@ -609,7 +612,8 @@ error:
}
static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
- unsigned stride,
+ unsigned stride, unsigned offset,
+ unsigned slice_size,
struct winsys_handle *whandle)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer);
@@ -637,6 +641,8 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
return FALSE;
whandle->stride = stride;
+ whandle->offset = offset;
+ whandle->offset += slice_size * whandle->layer;
bo->is_shared = true;
return TRUE;
}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 83da740..a9fc55f 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -335,8 +335,7 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
enum ring_type ring_type,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
- void *flush_ctx,
- struct pb_buffer *trace_buf)
+ void *flush_ctx)
{
struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
struct amdgpu_cs *cs;
@@ -609,8 +608,7 @@ DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", FALSE)
static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
unsigned flags,
- struct pipe_fence_handle **fence,
- uint32_t cs_trace_id)
+ struct pipe_fence_handle **fence)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys *ws = cs->ctx->ws;
diff --git a/src/gallium/winsys/radeon/drm/Makefile.am b/src/gallium/winsys/radeon/drm/Makefile.am
index 0320aca..b413b0b 100644
--- a/src/gallium/winsys/radeon/drm/Makefile.am
+++ b/src/gallium/winsys/radeon/drm/Makefile.am
@@ -8,5 +8,3 @@ AM_CFLAGS = \
noinst_LTLIBRARIES = libradeonwinsys.la
libradeonwinsys_la_SOURCES = $(C_SOURCES)
-
-EXTRA_DIST = $(TOOLS_HDR)
diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources b/src/gallium/winsys/radeon/drm/Makefile.sources
index a00c84d..2762c91 100644
--- a/src/gallium/winsys/radeon/drm/Makefile.sources
+++ b/src/gallium/winsys/radeon/drm/Makefile.sources
@@ -2,12 +2,8 @@ C_SOURCES := \
radeon_drm_bo.c \
radeon_drm_bo.h \
radeon_drm_cs.c \
- radeon_drm_cs_dump.c \
radeon_drm_cs.h \
radeon_drm_public.h \
radeon_drm_surface.c \
radeon_drm_winsys.c \
radeon_drm_winsys.h
-
-TOOLS_HDR := \
- radeon_ctx.h
diff --git a/src/gallium/winsys/radeon/drm/radeon_ctx.h b/src/gallium/winsys/radeon/drm/radeon_ctx.h
deleted file mode 100644
index 5618b3a..0000000
--- a/src/gallium/winsys/radeon/drm/radeon_ctx.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright 2011 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jérôme Glisse
- */
-#ifndef RADEON_CTX_H
-#define RADEON_CTX_H
-
-#define _FILE_OFFSET_BITS 64
-#include <sys/mman.h>
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include "xf86drm.h"
-#include "radeon_drm.h"
-
-struct ctx {
- int fd;
-};
-
-struct bo {
- uint32_t handle;
- uint32_t alignment;
- uint64_t size;
- uint64_t va;
- void *ptr;
-};
-
-static void ctx_init(struct ctx *ctx)
-{
- ctx->fd = drmOpen("radeon", NULL);
- if (ctx->fd < 0) {
- fprintf(stderr, "failed to open radeon drm device file\n");
- exit(-1);
- }
-}
-
-static void bo_wait(struct ctx *ctx, struct bo *bo)
-{
- struct drm_radeon_gem_wait_idle args;
- void *ptr;
- int r;
-
- /* Zero out args to make valgrind happy */
- memset(&args, 0, sizeof(args));
- args.handle = bo->handle;
- do {
- r = drmCommandWrite(ctx->fd, DRM_RADEON_GEM_WAIT_IDLE, &args, sizeof(args));
- } while (r == -EBUSY);
-}
-
-
-static void ctx_cs(struct ctx *ctx, uint32_t *cs, uint32_t cs_flags[2], unsigned ndw,
- struct bo **bo, uint32_t *bo_relocs, unsigned nbo)
-{
- struct drm_radeon_cs args;
- struct drm_radeon_cs_chunk chunks[3];
- uint64_t chunk_array[3];
- unsigned i;
- int r;
-
- /* update handle */
- for (i = 0; i < nbo; i++) {
- bo_relocs[i*4+0] = bo[i]->handle;
- }
-
- args.num_chunks = 2;
- if (cs_flags[0] || cs_flags[1]) {
- /* enable RADEON_CHUNK_ID_FLAGS */
- args.num_chunks = 3;
- }
- args.chunks = (uint64_t)(uintptr_t)chunk_array;
- chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
- chunks[0].length_dw = ndw;
- chunks[0].chunk_data = (uintptr_t)cs;
- chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
- chunks[1].length_dw = nbo * 4;
- chunks[1].chunk_data = (uintptr_t)bo_relocs;
- chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
- chunks[2].length_dw = 2;
- chunks[2].chunk_data = (uintptr_t)cs_flags;
- chunk_array[0] = (uintptr_t)&chunks[0];
- chunk_array[1] = (uintptr_t)&chunks[1];
- chunk_array[2] = (uintptr_t)&chunks[2];
-
- fprintf(stderr, "emiting cs %ddw with %d bo\n", ndw, nbo);
- r = drmCommandWriteRead(ctx->fd, DRM_RADEON_CS, &args, sizeof(args));
- if (r) {
- fprintf(stderr, "cs submission failed with %d\n", r);
- return;
- }
-}
-
-static void bo_map(struct ctx *ctx, struct bo *bo)
-{
- struct drm_radeon_gem_mmap args;
- void *ptr;
- int r;
-
- /* Zero out args to make valgrind happy */
- memset(&args, 0, sizeof(args));
- args.handle = bo->handle;
- args.offset = 0;
- args.size = (uint64_t)bo->size;
- r = drmCommandWriteRead(ctx->fd, DRM_RADEON_GEM_MMAP, &args, sizeof(args));
- if (r) {
- fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n", bo, bo->handle, r);
- exit(-1);
- }
- ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, ctx->fd, args.addr_ptr);
- if (ptr == MAP_FAILED) {
- fprintf(stderr, "%s failed to map bo\n", __func__);
- exit(-1);
- }
- bo->ptr = ptr;
-}
-
-static void bo_va(struct ctx *ctx, struct bo *bo)
-{
- struct drm_radeon_gem_va args;
- int r;
-
- args.handle = bo->handle;
- args.vm_id = 0;
- args.operation = RADEON_VA_MAP;
- args.flags = RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_WRITEABLE | RADEON_VM_PAGE_SNOOPED;
- args.offset = bo->va;
- r = drmCommandWriteRead(ctx->fd, DRM_RADEON_GEM_VA, &args, sizeof(args));
- if (r && args.operation == RADEON_VA_RESULT_ERROR) {
- fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
- fprintf(stderr, "radeon: size : %d bytes\n", bo->size);
- fprintf(stderr, "radeon: alignment : %d bytes\n", bo->alignment);
- fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
- exit(-1);
- }
-}
-
-static struct bo *bo_new(struct ctx *ctx, unsigned ndw, uint32_t *data, uint64_t va, uint32_t alignment)
-{
- struct drm_radeon_gem_create args;
- struct bo *bo;
- int r;
-
- bo = calloc(1, sizeof(*bo));
- if (bo == NULL) {
- fprintf(stderr, "failed to malloc bo struct\n");
- exit(-1);
- }
- bo->size = ndw * 4ULL;
- bo->va = va;
- bo->alignment = alignment;
-
- args.size = bo->size;
- args.alignment = bo->alignment;
- args.initial_domain = RADEON_GEM_DOMAIN_GTT;
- args.flags = 0;
- args.handle = 0;
-
- r = drmCommandWriteRead(ctx->fd, DRM_RADEON_GEM_CREATE, &args, sizeof(args));
- bo->handle = args.handle;
- if (r) {
- fprintf(stderr, "Failed to allocate :\n");
- fprintf(stderr, " size : %d bytes\n", bo->size);
- fprintf(stderr, " alignment : %d bytes\n", bo->alignment);
- free(bo);
- exit(-1);
- }
-
- if (data) {
- bo_map(ctx, bo);
- memcpy(bo->ptr, data, bo->size);
- }
-
- if (va) {
- bo_va(ctx, bo);
- }
-
- return bo;
-}
-
-
-#endif
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 978df52..08856df 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -851,7 +851,8 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
struct winsys_handle *whandle,
- unsigned *stride)
+ unsigned *stride,
+ unsigned *offset)
{
struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
struct radeon_bo *bo;
@@ -941,6 +942,8 @@ done:
if (stride)
*stride = whandle->stride;
+ if (offset)
+ *offset = whandle->offset;
if (ws->info.has_virtual_memory && !bo->va) {
struct drm_radeon_gem_va va;
@@ -991,7 +994,8 @@ fail:
}
static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
- unsigned stride,
+ unsigned stride, unsigned offset,
+ unsigned slice_size,
struct winsys_handle *whandle)
{
struct drm_gem_flink flink;
@@ -1025,6 +1029,9 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
}
whandle->stride = stride;
+ whandle->offset = offset;
+ whandle->offset += slice_size * whandle->layer;
+
return TRUE;
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 155a130..b50e19c 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -168,8 +168,7 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
enum ring_type ring_type,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
- void *flush_ctx,
- struct pb_buffer *trace_buf)
+ void *flush_ctx)
{
struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
struct radeon_drm_cs *cs;
@@ -183,7 +182,6 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
cs->ws = ws;
cs->flush_cs = flush;
cs->flush_data = flush_ctx;
- cs->trace_buf = (struct radeon_bo*)trace_buf;
if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
FREE(cs);
@@ -439,10 +437,6 @@ void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs
}
}
- if (cs->trace_buf) {
- radeon_dump_cs_on_lockup(cs, csc);
- }
-
for (i = 0; i < csc->crelocs; i++)
p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
@@ -467,8 +461,7 @@ DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
unsigned flags,
- struct pipe_fence_handle **fence,
- uint32_t cs_trace_id)
+ struct pipe_fence_handle **fence)
{
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
struct radeon_cs_context *tmp;
@@ -520,8 +513,6 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
cs->csc = cs->cst;
cs->cst = tmp;
- cs->cst->cs_trace_id = cs_trace_id;
-
/* If the CS is not empty or overflowed, emit it in a separate thread. */
if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
unsigned i, crelocs;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index 81f66f5..4ffa91a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -43,8 +43,6 @@ struct radeon_cs_context {
uint64_t chunk_array[3];
uint32_t flags[2];
- uint32_t cs_trace_id;
-
/* Buffers. */
unsigned nrelocs;
unsigned crelocs;
@@ -80,7 +78,6 @@ struct radeon_drm_cs {
void *flush_data;
pipe_semaphore flush_completed;
- struct radeon_bo *trace_buf;
};
int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo);
@@ -126,6 +123,4 @@ void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs);
void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws);
void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc);
-void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context *csc);
-
#endif
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c
deleted file mode 100644
index 9958595..0000000
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright © 2013 Jérôme Glisse
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-/*
- * Authors:
- * Jérôme Glisse <jglisse@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <inttypes.h>
-#include <xf86drm.h>
-#include "radeon_drm_cs.h"
-#include "radeon_drm_bo.h"
-
-#define RADEON_CS_DUMP_AFTER_MS_TIMEOUT 500
-
-void radeon_dump_cs_on_lockup(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
-{
- struct drm_radeon_gem_busy args;
- FILE *dump;
- unsigned i, lockup;
- uint32_t *ptr;
- char fname[32];
-
- /* only dump the first cs to cause a lockup */
- if (!csc->crelocs) {
- /* can not determine if there was a lockup if no bo were use by
- * the cs and most likely in such case no lockup occurs
- */
- return;
- }
-
- memset(&args, 0, sizeof(args));
- args.handle = csc->relocs_bo[0].bo->handle;
- for (i = 0; i < RADEON_CS_DUMP_AFTER_MS_TIMEOUT; i++) {
- usleep(1);
- lockup = drmCommandWriteRead(csc->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args));
- if (!lockup) {
- break;
- }
- }
- if (!lockup || i < RADEON_CS_DUMP_AFTER_MS_TIMEOUT) {
- return;
- }
-
- ptr = radeon_bo_do_map(cs->trace_buf);
- fprintf(stderr, "timeout on cs lockup likely happen at cs 0x%08x dw 0x%08x\n", ptr[1], ptr[0]);
-
- if (csc->cs_trace_id != ptr[1]) {
- return;
- }
-
- /* ok we are most likely facing a lockup write the standalone replay file */
- snprintf(fname, sizeof(fname), "rlockup_0x%08x.c", csc->cs_trace_id);
- dump = fopen(fname, "w");
- if (dump == NULL) {
- return;
- }
- fprintf(dump, "/* To build this file you will need to copy radeon_ctx.h\n");
- fprintf(dump, " * in same directory. You can find radeon_ctx.h in mesa tree :\n");
- fprintf(dump, " * mesa/src/gallium/winsys/radeon/drm/radeon_ctx.h\n");
- fprintf(dump, " * Build with :\n");
- fprintf(dump, " * gcc -O0 -g `pkg-config --cflags --libs libdrm` %s -o rlockup_0x%08x \n", fname, csc->cs_trace_id);
- fprintf(dump, " */\n");
- fprintf(dump, " /* timeout on cs lockup likely happen at cs 0x%08x dw 0x%08x*/\n", ptr[1], ptr[0]);
- fprintf(dump, "#include <stdio.h>\n");
- fprintf(dump, "#include <stdint.h>\n");
- fprintf(dump, "#include \"radeon_ctx.h\"\n");
- fprintf(dump, "\n");
- fprintf(dump, "#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))\n");
- fprintf(dump, "\n");
-
- for (i = 0; i < csc->crelocs; i++) {
- unsigned j, ndw = (csc->relocs_bo[i].bo->base.size + 3) >> 2;
-
- ptr = radeon_bo_do_map(csc->relocs_bo[i].bo);
- if (ptr) {
- fprintf(dump, "static uint32_t bo_%04d_data[%d] = {\n ", i, ndw);
- for (j = 0; j < ndw; j++) {
- if (j && !(j % 8)) {
- uint32_t offset = (j - 8) << 2;
- fprintf(dump, " /* [0x%08x] va[0x%016"PRIx64"] */\n ", offset, offset + csc->relocs_bo[i].bo->va);
- }
- fprintf(dump, " 0x%08x,", ptr[j]);
- }
- fprintf(dump, "};\n\n");
- }
- }
-
- fprintf(dump, "static uint32_t bo_relocs[%d] = {\n", csc->crelocs * 4);
- for (i = 0; i < csc->crelocs; i++) {
- fprintf(dump, " 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
- 0, csc->relocs[i].read_domains, csc->relocs[i].write_domain, csc->relocs[i].flags);
- }
- fprintf(dump, "};\n\n");
-
- fprintf(dump, "/* cs %d dw */\n", csc->chunks[0].length_dw);
- fprintf(dump, "static uint32_t cs[] = {\n");
- ptr = csc->buf;
- for (i = 0; i < csc->chunks[0].length_dw; i++) {
- fprintf(dump, " 0x%08x,\n", ptr[i]);
- }
- fprintf(dump, "};\n\n");
-
- fprintf(dump, "static uint32_t cs_flags[2] = {\n");
- fprintf(dump, " 0x%08x,\n", csc->flags[0]);
- fprintf(dump, " 0x%08x,\n", csc->flags[1]);
- fprintf(dump, "};\n\n");
-
- fprintf(dump, "int main(int argc, char *argv[])\n");
- fprintf(dump, "{\n");
- fprintf(dump, " struct bo *bo[%d];\n", csc->crelocs);
- fprintf(dump, " struct ctx ctx;\n");
- fprintf(dump, "\n");
- fprintf(dump, " ctx_init(&ctx);\n");
- fprintf(dump, "\n");
-
- for (i = 0; i < csc->crelocs; i++) {
- unsigned ndw = (csc->relocs_bo[i].bo->base.size + 3) >> 2;
- uint32_t *ptr;
-
- ptr = radeon_bo_do_map(csc->relocs_bo[i].bo);
- if (ptr) {
- fprintf(dump, " bo[%d] = bo_new(&ctx, %d, bo_%04d_data, 0x%016"PRIx64", 0x%08x);\n",
- i, ndw, i, csc->relocs_bo[i].bo->va, csc->relocs_bo[i].bo->base.alignment);
- } else {
- fprintf(dump, " bo[%d] = bo_new(&ctx, %d, NULL, 0x%016"PRIx64", 0x%08x);\n",
- i, ndw, csc->relocs_bo[i].bo->va, csc->relocs_bo[i].bo->base.alignment);
- }
- }
- fprintf(dump, "\n");
- fprintf(dump, " ctx_cs(&ctx, cs, cs_flags, ARRAY_SIZE(cs), bo, bo_relocs, %d);\n", csc->crelocs);
- fprintf(dump, "\n");
- fprintf(dump, " fprintf(stderr, \"waiting for cs execution to end ....\\n\");\n");
- fprintf(dump, " bo_wait(&ctx, bo[0]);\n");
- fprintf(dump, "}\n");
- fclose(dump);
-}
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_dri.c b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
index 01bb0e2..baa22a9 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
@@ -357,6 +357,7 @@ vmw_drm_surface_get_handle(struct svga_winsys_screen *sws,
vsrf = vmw_svga_winsys_surface(surface);
whandle->handle = vsrf->sid;
whandle->stride = stride;
+ whandle->offset = 0;
switch (whandle->type) {
case DRM_API_HANDLE_TYPE_SHARED:
diff --git a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
index 1e85971..9aaee88 100644
--- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
@@ -309,17 +309,20 @@ kms_sw_displaytarget_get_handle(struct sw_winsys *winsys,
case DRM_API_HANDLE_TYPE_KMS:
whandle->handle = kms_sw_dt->handle;
whandle->stride = kms_sw_dt->stride;
+ whandle->offset = 0;
return TRUE;
case DRM_API_HANDLE_TYPE_FD:
if (!drmPrimeHandleToFD(kms_sw->fd, kms_sw_dt->handle,
DRM_CLOEXEC, (int*)&whandle->handle)) {
whandle->stride = kms_sw_dt->stride;
+ whandle->offset = 0;
return TRUE;
}
/* fallthrough */
default:
whandle->handle = 0;
whandle->stride = 0;
+ whandle->offset = 0;
return FALSE;
}
}
diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c
index e23b697..218499a 100644
--- a/src/intel/vulkan/anv_meta_blit.c
+++ b/src/intel/vulkan/anv_meta_blit.c
@@ -100,7 +100,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
tex->texture = nir_deref_var_create(tex, sampler);
tex->sampler = nir_deref_var_create(tex, sampler);
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c
index 4a0bed1..87c3358 100644
--- a/src/intel/vulkan/anv_meta_blit2d.c
+++ b/src/intel/vulkan/anv_meta_blit2d.c
@@ -455,7 +455,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
tex->texture = nir_deref_var_create(tex, sampler);
tex->sampler = NULL;
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex");
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c
index f50af52..3e7c7d3 100644
--- a/src/intel/vulkan/anv_meta_resolve.c
+++ b/src/intel/vulkan/anv_meta_resolve.c
@@ -164,7 +164,7 @@ build_nir_fs(uint32_t num_samples)
tex->dest_type = nir_type_float;
tex->is_array = false;
tex->coord_components = 3;
- nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex");
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
accum = nir_fadd(&b, accum, &tex->dest.ssa);
diff --git a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
index 46bc5d2..234855c 100644
--- a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
+++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
@@ -85,7 +85,7 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state)
offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa,
nir_imm_int(b, 8)));
- nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL);
+ nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, 32, NULL);
nir_builder_instr_insert(b, &offset_load->instr);
nir_src *offset_src = nir_get_io_offset_src(intrin);
@@ -107,7 +107,8 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state)
/* It's a load, we need a phi node */
nir_phi_instr *phi = nir_phi_instr_create(b->shader);
nir_ssa_dest_init(&phi->instr, &phi->dest,
- intrin->num_components, NULL);
+ intrin->num_components,
+ intrin->dest.ssa.bit_size, NULL);
nir_phi_src *src1 = ralloc(phi, nir_phi_src);
struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list);
@@ -117,7 +118,7 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state)
b->cursor = nir_after_cf_list(&if_stmt->else_list);
nir_ssa_def *zero = nir_build_imm(b, intrin->num_components,
- (nir_const_value) { .u = { 0, 0, 0, 0 } });
+ (nir_const_value) { .u32 = { 0, 0, 0, 0 } });
nir_phi_src *src2 = ralloc(phi, nir_phi_src);
struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list);
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index eeb9b97..ef81afa 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -119,7 +119,7 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
nir_ssa_def *block_index;
if (const_block_idx) {
- block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]);
+ block_index = nir_imm_int(b, surface_index + const_block_idx->u32[0]);
} else {
block_index = nir_iadd(b, nir_imm_int(b, surface_index),
nir_ssa_for_src(b, intrin->src[0], 1));
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index 0066f7f..6761238 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -597,6 +597,7 @@ blitframebuffer_texture(struct gl_context *ctx,
GLenum filter, GLint flipX, GLint flipY,
GLboolean glsl_version, GLboolean do_depth)
{
+ struct save_state *save = &ctx->Meta->Save[ctx->Meta->SaveStackDepth - 1];
int att_index = do_depth ? BUFFER_DEPTH : readFb->_ColorReadBufferIndex;
const struct gl_renderbuffer_attachment *readAtt =
&readFb->Attachment[att_index];
@@ -709,7 +710,7 @@ blitframebuffer_texture(struct gl_context *ctx,
fb_tex_blit.samp_obj = _mesa_meta_setup_sampler(ctx, texObj, target, filter,
srcLevel);
- /* Always do our blits with no net sRGB decode or encode.
+ /* For desktop GL, we do our blits with no net sRGB decode or encode.
*
* However, if both the src and dst can be srgb decode/encoded, enable them
* so that we do any blending (from scaling or from MSAA resolves) in the
@@ -723,18 +724,42 @@ blitframebuffer_texture(struct gl_context *ctx,
* scissor test."
*
* The GL 4.4 specification disagrees and says that the sRGB part of the
- * fragment pipeline applies, but this was found to break applications.
+ * fragment pipeline applies, but this was found to break applications
+ * (such as Left 4 Dead 2).
+ *
+ * However, for ES 3.0, we follow the specification and perform sRGB
+ * decoding and encoding. The specification has always been clear in
+ * the ES world, and hasn't changed over time.
*/
if (ctx->Extensions.EXT_texture_sRGB_decode) {
- if (_mesa_get_format_color_encoding(rb->Format) == GL_SRGB &&
- drawFb->Visual.sRGBCapable) {
+ bool src_srgb = _mesa_get_format_color_encoding(rb->Format) == GL_SRGB;
+ if (save->API == API_OPENGLES2 && ctx->Version >= 30) {
+ /* From the ES 3.0.4 specification, page 198:
+ * "When values are taken from the read buffer, if the value of
+ * FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING for the framebuffer
+ * attachment corresponding to the read buffer is SRGB (see section
+ * 6.1.13), the red, green, and blue components are converted from
+ * the non-linear sRGB color space according to equation 3.24.
+ *
+ * When values are written to the draw buffers, blit operations
+ * bypass the fragment pipeline. The only fragment operations which
+ * affect a blit are the pixel ownership test, the scissor test,
+ * and sRGB conversion (see section 4.1.8)."
+ */
_mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj,
- GL_DECODE_EXT);
- _mesa_set_framebuffer_srgb(ctx, GL_TRUE);
+ src_srgb ? GL_DECODE_EXT
+ : GL_SKIP_DECODE_EXT);
+ _mesa_set_framebuffer_srgb(ctx, drawFb->Visual.sRGBCapable);
} else {
- _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj,
- GL_SKIP_DECODE_EXT);
- /* set_framebuffer_srgb was set by _mesa_meta_begin(). */
+ if (src_srgb && drawFb->Visual.sRGBCapable) {
+ _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj,
+ GL_DECODE_EXT);
+ _mesa_set_framebuffer_srgb(ctx, GL_TRUE);
+ } else {
+ _mesa_set_sampler_srgb_decode(ctx, fb_tex_blit.samp_obj,
+ GL_SKIP_DECODE_EXT);
+ /* set_framebuffer_srgb was set by _mesa_meta_begin(). */
+ }
}
}
diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c
index 18b9681..9402a46 100644
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -269,6 +269,9 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
if (status != GL_FRAMEBUFFER_COMPLETE)
goto meta_end;
+ /* Explicitly disable sRGB encoding */
+ ctx->DrawBuffer->Visual.sRGBCapable = false;
+
/* Since we've bound a new draw framebuffer, we need to update its
* derived state -- _Xmin, etc -- for BlitFramebuffer's clipping to
* be correct.
diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c
index dfd3327..62c3fce 100644
--- a/src/mesa/drivers/common/meta_tex_subimage.c
+++ b/src/mesa/drivers/common/meta_tex_subimage.c
@@ -263,6 +263,9 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
if (status != GL_FRAMEBUFFER_COMPLETE)
goto fail;
+ /* Explicitly disable sRGB encoding */
+ ctx->DrawBuffer->Visual.sRGBCapable = false;
+
_mesa_update_state(ctx);
if (_mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
@@ -420,6 +423,9 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
if (status != GL_FRAMEBUFFER_COMPLETE)
goto fail;
+ /* Explicitly disable sRGB encoding */
+ ctx->DrawBuffer->Visual.sRGBCapable = false;
+
_mesa_update_state(ctx);
if (_mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index 4497eab..38a3236 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -115,12 +115,11 @@ brw_blorp_surface_info::set(struct brw_context *brw,
this->brw_surfaceformat = BRW_SURFACEFORMAT_R16_UNORM;
break;
default: {
- mesa_format linear_format = _mesa_get_srgb_format_linear(format);
if (is_render_target) {
- assert(brw->format_supported_as_render_target[linear_format]);
- this->brw_surfaceformat = brw->render_target_format[linear_format];
+ assert(brw->format_supported_as_render_target[format]);
+ this->brw_surfaceformat = brw->render_target_format[format];
} else {
- this->brw_surfaceformat = brw_format_for_mesa_format(linear_format);
+ this->brw_surfaceformat = brw_format_for_mesa_format(format);
}
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index a04a1df..f04e196 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -46,7 +46,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
float src_x1, float src_y1,
float dst_x0, float dst_y0,
float dst_x1, float dst_y1,
- GLenum filter, bool mirror_x, bool mirror_y);
+ GLenum filter, bool mirror_x, bool mirror_y,
+ bool decode_srgb, bool encode_srgb);
#ifdef __cplusplus
} /* end extern "C" */
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 05fff91..5fd25f1 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
+#include "main/context.h"
#include "main/teximage.h"
#include "main/fbobject.h"
@@ -63,7 +64,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
float src_x1, float src_y1,
float dst_x0, float dst_y0,
float dst_x1, float dst_y1,
- GLenum filter, bool mirror_x, bool mirror_y)
+ GLenum filter, bool mirror_x, bool mirror_y,
+ bool decode_srgb, bool encode_srgb)
{
/* Get ready to blit. This includes depth resolving the src and dst
* buffers if necessary. Note: it's not necessary to do a color resolve on
@@ -89,6 +91,12 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
dst_level, dst_layer, dst_x0, dst_y0, dst_x1, dst_y1,
mirror_x, mirror_y);
+ if (!decode_srgb && _mesa_get_format_color_encoding(src_format) == GL_SRGB)
+ src_format = _mesa_get_srgb_format_linear(src_format);
+
+ if (!encode_srgb && _mesa_get_format_color_encoding(dst_format) == GL_SRGB)
+ dst_format = _mesa_get_srgb_format_linear(dst_format);
+
brw_blorp_blit_params params(brw,
src_mt, src_level, src_layer, src_format,
dst_mt, dst_level, dst_layer, dst_format,
@@ -114,6 +122,8 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
struct intel_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb);
+ const bool es3 = _mesa_is_gles3(&brw->ctx);
+
/* Do the blit */
brw_blorp_blit_miptrees(brw,
src_mt, src_irb->mt_level, src_irb->mt_layer,
@@ -122,7 +132,8 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
dst_format,
srcX0, srcY0, srcX1, srcY1,
dstX0, dstY0, dstX1, dstY1,
- filter, mirror_x, mirror_y);
+ filter, mirror_x, mirror_y,
+ es3, es3);
dst_irb->need_downsample = true;
}
@@ -289,7 +300,8 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
dst_image->TexFormat,
srcX0, srcY0, srcX1, srcY1,
dstX0, dstY0, dstX1, dstY1,
- GL_NEAREST, false, mirror_y);
+ GL_NEAREST, false, mirror_y,
+ false, false);
/* If we're copying to a packed depth stencil texture and the source
* framebuffer has separate stencil, we need to also copy the stencil data
@@ -314,7 +326,8 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
dst_mt->format,
srcX0, srcY0, srcX1, srcY1,
dstX0, dstY0, dstX1, dstY1,
- GL_NEAREST, false, mirror_y);
+ GL_NEAREST, false, mirror_y,
+ false, false);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c
index a95f51b..b32252f 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -108,6 +108,26 @@ static const struct nir_shader_compiler_options vector_nir_options = {
*/
.fdot_replicates = true,
+ /* Prior to Gen6, there are no three source operations for SIMD4x2. */
+ .lower_flrp = true,
+
+ .lower_pack_snorm_2x16 = true,
+ .lower_pack_unorm_2x16 = true,
+ .lower_unpack_snorm_2x16 = true,
+ .lower_unpack_unorm_2x16 = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+};
+
+static const struct nir_shader_compiler_options vector_nir_options_gen6 = {
+ COMMON_OPTIONS,
+
+ /* In the vec4 backend, our dpN instruction replicates its result to all the
+ * components of a vec4. We would like NIR to give us replicated fdot
+ * instructions because it can optimize better for us.
+ */
+ .fdot_replicates = true,
+
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_2x16 = true,
.lower_unpack_snorm_2x16 = true,
@@ -160,8 +180,12 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
if (devinfo->gen < 7)
compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
- compiler->glsl_compiler_options[i].NirOptions =
- is_scalar ? &scalar_nir_options : &vector_nir_options;
+ if (is_scalar) {
+ compiler->glsl_compiler_options[i].NirOptions = &scalar_nir_options;
+ } else {
+ compiler->glsl_compiler_options[i].NirOptions =
+ devinfo->gen < 6 ? &vector_nir_options : &vector_nir_options_gen6;
+ }
compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
}
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 0b99356..5b14252 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1294,7 +1294,7 @@ pop_if_stack(struct brw_codegen *p)
static void
push_loop_stack(struct brw_codegen *p, brw_inst *inst)
{
- if (p->loop_stack_array_size < p->loop_stack_depth) {
+ if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
p->loop_stack_array_size *= 2;
p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
p->loop_stack_array_size);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 874053c..33c4adc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2307,17 +2307,6 @@ fs_visitor::opt_algebraic()
progress = true;
}
break;
- case SHADER_OPCODE_RCP: {
- fs_inst *prev = (fs_inst *)inst->prev;
- if (prev->opcode == SHADER_OPCODE_SQRT) {
- if (inst->src[0].equals(prev->dst)) {
- inst->opcode = SHADER_OPCODE_RSQ;
- inst->src[0] = prev->src[0];
- progress = true;
- }
- }
- break;
- }
case SHADER_OPCODE_BROADCAST:
if (is_uniform(inst->src[0])) {
inst->opcode = BRW_OPCODE_MOV;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 2616e65..ffab0a8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -654,21 +654,6 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
}
break;
- case SHADER_OPCODE_RCP:
- /* The hardware doesn't do math on immediate values
- * (because why are you doing that, seriously?), but
- * the correct answer is to just constant fold it
- * anyway.
- */
- assert(i == 0);
- if (inst->src[0].f != 0.0f) {
- inst->opcode = BRW_OPCODE_MOV;
- inst->src[0] = val;
- inst->src[0].f = 1.0f / inst->src[0].f;
- progress = true;
- }
- break;
-
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 29ef609..aa4c745 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -518,10 +518,10 @@ fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
enum opcode extract_op;
if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) {
- assert(element->u[0] <= 1);
+ assert(element->u32[0] <= 1);
extract_op = SHADER_OPCODE_EXTRACT_WORD;
} else {
- assert(element->u[0] <= 3);
+ assert(element->u32[0] <= 3);
extract_op = SHADER_OPCODE_EXTRACT_BYTE;
}
@@ -530,7 +530,7 @@ fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
op0 = offset(op0, bld, src0->src[0].swizzle[0]);
set_saturate(instr->dest.saturate,
- bld.emit(extract_op, result, op0, brw_imm_ud(element->u[0])));
+ bld.emit(extract_op, result, op0, brw_imm_ud(element->u32[0])));
return true;
}
@@ -549,11 +549,11 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
return false;
nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
- if (!value1 || fabsf(value1->f[0]) != 1.0f)
+ if (!value1 || fabsf(value1->f32[0]) != 1.0f)
return false;
nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src);
- if (!value2 || fabsf(value2->f[0]) != 1.0f)
+ if (!value2 || fabsf(value2->f32[0]) != 1.0f)
return false;
fs_reg tmp = vgrf(glsl_type::int_type);
@@ -573,7 +573,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
* surely be TRIANGLES
*/
- if (value1->f[0] == -1.0f) {
+ if (value1->f32[0] == -1.0f) {
g0.negate = true;
}
@@ -601,7 +601,7 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
* surely be TRIANGLES
*/
- if (value1->f[0] == -1.0f) {
+ if (value1->f32[0] == -1.0f) {
g1_6.negate = true;
}
@@ -1180,7 +1180,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_extract_i8: {
nir_const_value *byte = nir_src_as_const_value(instr->src[1].src);
bld.emit(SHADER_OPCODE_EXTRACT_BYTE,
- result, op[0], brw_imm_ud(byte->u[0]));
+ result, op[0], brw_imm_ud(byte->u32[0]));
break;
}
@@ -1188,7 +1188,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_extract_i16: {
nir_const_value *word = nir_src_as_const_value(instr->src[1].src);
bld.emit(SHADER_OPCODE_EXTRACT_WORD,
- result, op[0], brw_imm_ud(word->u[0]));
+ result, op[0], brw_imm_ud(word->u32[0]));
break;
}
@@ -1215,7 +1215,7 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
for (unsigned i = 0; i < instr->def.num_components; i++)
- bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i[i]));
+ bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i]));
nir_ssa_values[instr->def.index] = reg;
}
@@ -1769,9 +1769,9 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
const bool is_point_size = (base_offset == 0);
if (offset_const != NULL && vertex_const != NULL &&
- 4 * (base_offset + offset_const->u[0]) < push_reg_count) {
- int imm_offset = (base_offset + offset_const->u[0]) * 4 +
- vertex_const->u[0] * push_reg_count;
+ 4 * (base_offset + offset_const->u32[0]) < push_reg_count) {
+ int imm_offset = (base_offset + offset_const->u32[0]) * 4 +
+ vertex_const->u32[0] * push_reg_count;
/* This input was pushed into registers. */
if (is_point_size) {
/* gl_PointSize comes in .w */
@@ -1793,7 +1793,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
if (vertex_const) {
/* The vertex index is constant; just select the proper URB handle. */
icp_handle =
- retype(brw_vec8_grf(first_icp_handle + vertex_const->i[0], 0),
+ retype(brw_vec8_grf(first_icp_handle + vertex_const->i32[0], 0),
BRW_REGISTER_TYPE_UD);
} else {
/* The vertex index is non-constant. We need to use indirect
@@ -1837,7 +1837,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
if (offset_const) {
/* Constant indexing - use global offset. */
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
- inst->offset = base_offset + offset_const->u[0];
+ inst->offset = base_offset + offset_const->u32[0];
inst->base_mrf = -1;
inst->mlen = 1;
inst->regs_written = num_components;
@@ -1875,7 +1875,7 @@ fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
* add_const_offset_to_base() will fold other constant offsets
* into instr->const_index[0].
*/
- assert(const_value->u[0] == 0);
+ assert(const_value->u32[0] == 0);
return fs_reg();
}
@@ -2193,7 +2193,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
if (const_sample) {
- unsigned msg_data = const_sample->i[0] << 4;
+ unsigned msg_data = const_sample->i32[0] << 4;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
@@ -2260,8 +2260,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
- unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
- unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
+ unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
+ unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
@@ -2420,7 +2420,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
fs_reg offset_reg;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
- offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]);
+ offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
} else {
offset_reg = vgrf(glsl_type::uint_type);
bld.ADD(offset_reg,
@@ -2464,7 +2464,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset) {
- offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] +
+ offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
4 * first_component);
} else {
offset_reg = vgrf(glsl_type::uint_type);
@@ -2695,8 +2695,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
/* Offsets are in bytes but they should always be multiples of 4 */
- assert(const_offset->u[0] % 4 == 0);
- src.reg_offset = const_offset->u[0] / 4;
+ assert(const_offset->u32[0] % 4 == 0);
+ src.reg_offset = const_offset->u32[0] / 4;
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
@@ -2729,7 +2729,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (const_index) {
const unsigned index = stage_prog_data->binding_table.ubo_start +
- const_index->u[0];
+ const_index->u32[0];
surf_index = brw_imm_ud(index);
brw_mark_surface_used(prog_data, index);
} else {
@@ -2762,12 +2762,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg packed_consts = vgrf(glsl_type::float_type);
packed_consts.type = dest.type;
- struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u[0] & ~15);
+ struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u32[0] & ~15);
bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
surf_index, const_offset_reg);
for (unsigned i = 0; i < instr->num_components; i++) {
- packed_consts.set_smear(const_offset->u[0] % 16 / 4 + i);
+ packed_consts.set_smear(const_offset->u32[0] % 16 / 4 + i);
/* The std140 packing rules don't allow vectors to cross 16-byte
* boundaries, and a reg is 32 bytes.
@@ -2790,7 +2790,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg surf_index;
if (const_uniform_block) {
unsigned index = stage_prog_data->binding_table.ssbo_start +
- const_uniform_block->u[0];
+ const_uniform_block->u32[0];
surf_index = brw_imm_ud(index);
brw_mark_surface_used(prog_data, index);
} else {
@@ -2809,7 +2809,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg offset_reg;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset) {
- offset_reg = brw_imm_ud(const_offset->u[0]);
+ offset_reg = brw_imm_ud(const_offset->u32[0]);
} else {
offset_reg = get_nir_src(instr->src[1]);
}
@@ -2837,7 +2837,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
assert(const_offset && "Indirect input loads not allowed");
- src = offset(src, bld, const_offset->u[0]);
+ src = offset(src, bld, const_offset->u32[0]);
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
@@ -2854,7 +2854,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir_src_as_const_value(instr->src[1]);
if (const_uniform_block) {
unsigned index = stage_prog_data->binding_table.ssbo_start +
- const_uniform_block->u[0];
+ const_uniform_block->u32[0];
surf_index = brw_imm_ud(index);
brw_mark_surface_used(prog_data, index);
} else {
@@ -2885,7 +2885,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg offset_reg;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
if (const_offset) {
- offset_reg = brw_imm_ud(const_offset->u[0] + 4 * first_component);
+ offset_reg = brw_imm_ud(const_offset->u32[0] + 4 * first_component);
} else {
offset_reg = vgrf(glsl_type::uint_type);
bld.ADD(offset_reg,
@@ -2913,7 +2913,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
assert(const_offset && "Indirect output stores not allowed");
- new_dest = offset(new_dest, bld, const_offset->u[0]);
+ new_dest = offset(new_dest, bld, const_offset->u32[0]);
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
@@ -2954,7 +2954,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_get_buffer_size: {
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
- unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+ unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0;
int reg_width = dispatch_width / 8;
/* Set LOD = 0 */
@@ -3005,7 +3005,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
if (const_surface) {
unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
- const_surface->u[0];
+ const_surface->u32[0];
surface = brw_imm_ud(surf_index);
brw_mark_surface_used(prog_data, surf_index);
} else {
@@ -3134,7 +3134,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
nir_const_value *const_offset =
nir_src_as_const_value(instr->src[i].src);
if (const_offset) {
- tex_offset = brw_imm_ud(brw_texture_offset(const_offset->i, 3));
+ tex_offset = brw_imm_ud(brw_texture_offset(const_offset->i32, 3));
} else {
tex_offset = retype(src, BRW_REGISTER_TYPE_D);
}
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 941920a..ab6000b 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -77,7 +77,7 @@ add_const_offset_to_base_block(nir_block *block, void *closure)
nir_const_value *const_offset = nir_src_as_const_value(*offset);
if (const_offset) {
- intrin->const_index[0] += const_offset->u[0];
+ intrin->const_index[0] += const_offset->u32[0];
b->cursor = nir_before_instr(&intrin->instr);
nir_instr_rewrite_src(&intrin->instr, offset,
nir_src_for_ssa(nir_imm_int(b, 0)));
@@ -175,7 +175,7 @@ remap_patch_urb_offsets(nir_block *block, void *closure)
if (vertex) {
nir_const_value *const_vertex = nir_src_as_const_value(*vertex);
if (const_vertex) {
- intrin->const_index[0] += const_vertex->u[0] *
+ intrin->const_index[0] += const_vertex->u32[0] *
state->vue_map->num_per_vertex_slots;
} else {
state->b.cursor = nir_before_instr(&intrin->instr);
@@ -623,12 +623,24 @@ brw_type_for_nir_type(nir_alu_type type)
{
switch (type) {
case nir_type_uint:
+ case nir_type_uint32:
return BRW_REGISTER_TYPE_UD;
case nir_type_bool:
case nir_type_int:
+ case nir_type_bool32:
+ case nir_type_int32:
return BRW_REGISTER_TYPE_D;
case nir_type_float:
+ case nir_type_float32:
return BRW_REGISTER_TYPE_F;
+ case nir_type_float64:
+ return BRW_REGISTER_TYPE_DF;
+ case nir_type_int64:
+ case nir_type_uint64:
+ /* TODO we should only see these in moves, so for now it's ok, but when
+ * we add actual 64-bit integer support we should fix this.
+ */
+ return BRW_REGISTER_TYPE_DF;
default:
unreachable("unknown type");
}
@@ -644,12 +656,18 @@ brw_glsl_base_type_for_nir_type(nir_alu_type type)
{
switch (type) {
case nir_type_float:
+ case nir_type_float32:
return GLSL_TYPE_FLOAT;
+ case nir_type_float64:
+ return GLSL_TYPE_DOUBLE;
+
case nir_type_int:
+ case nir_type_int32:
return GLSL_TYPE_INT;
case nir_type_uint:
+ case nir_type_uint32:
return GLSL_TYPE_UINT;
default:
diff --git a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
index 56e15ef..22eeb1a 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
+++ b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
@@ -165,7 +165,7 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state)
}
default:
- if (nir_op_infos[alu->op].output_type == nir_type_bool) {
+ if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_bool) {
/* This instructions will turn into a CMP when we actually emit
* them so the result will have to be resolved before it can be
* used.
@@ -225,7 +225,7 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state)
* have to worry about resolving them.
*/
instr->pass_flags &= ~BRW_NIR_BOOLEAN_MASK;
- if (load->value.u[0] == NIR_TRUE || load->value.u[0] == NIR_FALSE) {
+ if (load->value.u32[0] == NIR_TRUE || load->value.u32[0] == NIR_FALSE) {
instr->pass_flags |= BRW_NIR_BOOLEAN_NO_RESOLVE;
} else {
instr->pass_flags |= BRW_NIR_NON_BOOLEAN;
diff --git a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
index 5ff2cba..6e8b1f9 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
+++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
@@ -168,7 +168,9 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
if (add->op != nir_op_fadd)
continue;
- /* TODO: Maybe bail if this expression is considered "precise"? */
+ assert(add->dest.dest.is_ssa);
+ if (add->exact)
+ continue;
assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
@@ -201,6 +203,8 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
if (mul == NULL)
continue;
+ unsigned bit_size = add->dest.dest.ssa.bit_size;
+
nir_ssa_def *mul_src[2];
mul_src[0] = mul->src[0].src.ssa;
mul_src[1] = mul->src[1].src.ssa;
@@ -220,7 +224,7 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
nir_op_fabs);
abs->src[0].src = nir_src_for_ssa(mul_src[i]);
nir_ssa_dest_init(&abs->instr, &abs->dest.dest,
- mul_src[i]->num_components, NULL);
+ mul_src[i]->num_components, bit_size, NULL);
abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1;
nir_instr_insert_before(&add->instr, &abs->instr);
mul_src[i] = &abs->dest.dest.ssa;
@@ -232,7 +236,7 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
nir_op_fneg);
neg->src[0].src = nir_src_for_ssa(mul_src[0]);
nir_ssa_dest_init(&neg->instr, &neg->dest.dest,
- mul_src[0]->num_components, NULL);
+ mul_src[0]->num_components, bit_size, NULL);
neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1;
nir_instr_insert_before(&add->instr, &neg->instr);
mul_src[0] = &neg->dest.dest.ssa;
@@ -253,6 +257,7 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
add->dest.dest.ssa.num_components,
+ bit_size,
add->dest.dest.ssa.name);
nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
nir_src_for_ssa(&ffma->dest.dest.ssa));
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 6b85eac..783af78 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -34,6 +34,7 @@
#define BRW_STATE_H
#include "brw_context.h"
+#include "brw_defines.h"
#ifdef __cplusplus
extern "C" {
@@ -406,6 +407,59 @@ void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
void
gen7_restore_default_l3_config(struct brw_context *brw);
+static inline bool
+is_drawing_points(const struct brw_context *brw)
+{
+ /* Determine if the primitives *reaching the SF* are points */
+ /* _NEW_POLYGON */
+ if (brw->ctx.Polygon.FrontMode == GL_POINT ||
+ brw->ctx.Polygon.BackMode == GL_POINT) {
+ return true;
+ }
+
+ if (brw->geometry_program) {
+ /* BRW_NEW_GEOMETRY_PROGRAM */
+ return brw->geometry_program->OutputType == GL_POINTS;
+ } else if (brw->tes.prog_data) {
+ /* BRW_NEW_TES_PROG_DATA */
+ return brw->tes.prog_data->output_topology ==
+ BRW_TESS_OUTPUT_TOPOLOGY_POINT;
+ } else {
+ /* BRW_NEW_PRIMITIVE */
+ return brw->primitive == _3DPRIM_POINTLIST;
+ }
+}
+
+static inline bool
+is_drawing_lines(const struct brw_context *brw)
+{
+ /* Determine if the primitives *reaching the SF* are points */
+ /* _NEW_POLYGON */
+ if (brw->ctx.Polygon.FrontMode == GL_LINE ||
+ brw->ctx.Polygon.BackMode == GL_LINE) {
+ return true;
+ }
+
+ if (brw->geometry_program) {
+ /* BRW_NEW_GEOMETRY_PROGRAM */
+ return brw->geometry_program->OutputType == GL_LINE_STRIP;
+ } else if (brw->tes.prog_data) {
+ /* BRW_NEW_TES_PROG_DATA */
+ return brw->tes.prog_data->output_topology ==
+ BRW_TESS_OUTPUT_TOPOLOGY_LINE;
+ } else {
+ /* BRW_NEW_PRIMITIVE */
+ switch (brw->primitive) {
+ case _3DPRIM_LINELIST:
+ case _3DPRIM_LINESTRIP:
+ case _3DPRIM_LINELOOP:
+ return true;
+ }
+ }
+ return false;
+}
+
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index 4666788..b7b0a86 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -423,11 +423,12 @@ static void gen7_dump_sampler_state(struct brw_context *brw,
GET_BITS(samp[1], 15, 8)
);
batch_out(brw, name, offset, i+2, "Border Color\n"); /* FINISHME: gen8+ */
- batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] Address Control: %s|%s|%s\n",
+ batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] Address Control: %s|%s|%s, %snormalized coords\n",
(GET_FIELD(samp[3], BRW_SAMPLER_MAX_ANISOTROPY) + 1) * 2,
sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCX_WRAP_MODE)],
sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCY_WRAP_MODE)],
- sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCZ_WRAP_MODE)]
+ sampler_addr_mode[GET_FIELD(samp[3], BRW_SAMPLER_TCZ_WRAP_MODE)],
+ (samp[3] & GEN7_SAMPLER_NON_NORMALIZED_COORDINATES) ? "non-" : ""
);
samp += 4;
diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h
index 1f27e98..3e9a6ee 100644
--- a/src/mesa/drivers/dri/i965/brw_util.h
+++ b/src/mesa/drivers/dri/i965/brw_util.h
@@ -34,6 +34,7 @@
#define BRW_UTIL_H
#include "brw_context.h"
+#include "main/framebuffer.h"
extern GLuint brw_translate_blend_factor( GLenum factor );
extern GLuint brw_translate_blend_equation( GLenum mode );
@@ -49,13 +50,13 @@ brw_get_line_width(struct brw_context *brw)
* implementation-dependent maximum non-antialiased line width."
*/
float line_width =
- CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag
+ CLAMP(!_mesa_is_multisample_enabled(&brw->ctx) && !brw->ctx.Line.SmoothFlag
? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
0.0f, brw->ctx.Const.MaxLineWidth);
uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
/* Line width of 0 is not allowed when MSAA enabled */
- if (brw->ctx.Multisample._Enabled) {
+ if (_mesa_is_multisample_enabled(&brw->ctx)) {
if (line_width_u3_7 == 0)
line_width_u3_7 = 1;
} else if (brw->ctx.Line.SmoothFlag && line_width < 1.5f) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 65e57ba..0025343 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -693,17 +693,6 @@ vec4_visitor::opt_algebraic()
break;
}
break;
- case SHADER_OPCODE_RCP: {
- vec4_instruction *prev = (vec4_instruction *)inst->prev;
- if (prev->opcode == SHADER_OPCODE_SQRT) {
- if (inst->src[0].equals(src_reg(prev->dst))) {
- inst->opcode = SHADER_OPCODE_RSQ;
- inst->src[0] = prev->src[0];
- progress = true;
- }
- }
- break;
- }
case SHADER_OPCODE_BROADCAST:
if (is_uniform(inst->src[0]) ||
inst->src[1].is_zero()) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
index d9c048e..e915aee 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -70,8 +70,8 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
/* Make up a type...we have no way of knowing... */
const glsl_type *const type = glsl_type::ivec(instr->num_components);
- src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u[0] +
- instr->const_index[0] + offset->u[0],
+ src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] +
+ instr->const_index[0] + offset->u32[0],
type);
/* gl_PointSize is passed in the .w component of the VUE header */
if (instr->const_index[0] == VARYING_SLOT_PSIZ)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 4686f20..7c06f92 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -343,7 +343,7 @@ vec4_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
* add_const_offset_to_base() will fold other constant offsets
* into instr->const_index[0].
*/
- assert(const_value->u[0] == 0);
+ assert(const_value->u32[0] == 0);
return src_reg();
}
@@ -369,13 +369,13 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
continue;
for (unsigned j = i; j < instr->def.num_components; j++) {
- if (instr->value.u[i] == instr->value.u[j]) {
+ if (instr->value.u32[i] == instr->value.u32[j]) {
writemask |= 1 << j;
}
}
reg.writemask = writemask;
- emit(MOV(reg, brw_imm_d(instr->value.i[i])));
+ emit(MOV(reg, brw_imm_d(instr->value.i32[i])));
remaining &= ~writemask;
}
@@ -400,7 +400,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
/* We set EmitNoIndirectInput for VS */
assert(const_offset);
- src = src_reg(ATTR, instr->const_index[0] + const_offset->u[0],
+ src = src_reg(ATTR, instr->const_index[0] + const_offset->u32[0],
glsl_type::uvec4_type);
dest = get_nir_dest(instr->dest, src.type);
@@ -414,7 +414,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
assert(const_offset);
- int varying = instr->const_index[0] + const_offset->u[0];
+ int varying = instr->const_index[0] + const_offset->u32[0];
src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
instr->num_components);
@@ -425,7 +425,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_get_buffer_size: {
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
- unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+ unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0;
const unsigned index =
prog_data->base.binding_table.ssbo_start + ssbo_index;
@@ -458,7 +458,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir_src_as_const_value(instr->src[1]);
if (const_uniform_block) {
unsigned index = prog_data->base.binding_table.ssbo_start +
- const_uniform_block->u[0];
+ const_uniform_block->u32[0];
surf_index = brw_imm_ud(index);
brw_mark_surface_used(&prog_data->base, index);
} else {
@@ -476,7 +476,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg offset_reg;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
if (const_offset) {
- offset_reg = brw_imm_ud(const_offset->u[0]);
+ offset_reg = brw_imm_ud(const_offset->u32[0]);
} else {
offset_reg = get_nir_src(instr->src[2], 1);
}
@@ -596,7 +596,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg surf_index;
if (const_uniform_block) {
unsigned index = prog_data->base.binding_table.ssbo_start +
- const_uniform_block->u[0];
+ const_uniform_block->u32[0];
surf_index = brw_imm_ud(index);
brw_mark_surface_used(&prog_data->base, index);
@@ -617,7 +617,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg offset_reg;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset) {
- offset_reg = brw_imm_ud(const_offset->u[0]);
+ offset_reg = brw_imm_ud(const_offset->u32[0]);
} else {
offset_reg = get_nir_src(instr->src[1], 1);
}
@@ -697,8 +697,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
/* Offsets are in bytes but they should always be multiples of 16 */
- assert(const_offset->u[0] % 16 == 0);
- src.reg_offset = const_offset->u[0] / 16;
+ assert(const_offset->u32[0] % 16 == 0);
+ src.reg_offset = const_offset->u32[0] / 16;
emit(MOV(dest, src));
} else {
@@ -760,7 +760,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
* as an immediate.
*/
const unsigned index = prog_data->base.binding_table.ubo_start +
- const_block_index->u[0];
+ const_block_index->u32[0];
surf_index = brw_imm_ud(index);
brw_mark_surface_used(&prog_data->base, index);
} else {
@@ -785,7 +785,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg offset;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset) {
- offset = brw_imm_ud(const_offset->u[0] & ~15);
+ offset = brw_imm_ud(const_offset->u32[0] & ~15);
} else {
offset = get_nir_src(instr->src[1], nir_type_int, 1);
}
@@ -800,10 +800,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
if (const_offset) {
- packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u[0] % 16 / 4,
- const_offset->u[0] % 16 / 4,
- const_offset->u[0] % 16 / 4,
- const_offset->u[0] % 16 / 4);
+ packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u32[0] % 16 / 4,
+ const_offset->u32[0] % 16 / 4,
+ const_offset->u32[0] % 16 / 4,
+ const_offset->u32[0] % 16 / 4);
}
emit(MOV(dest, packed_consts));
@@ -845,7 +845,7 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
if (const_surface) {
unsigned surf_index = prog_data->base.binding_table.ssbo_start +
- const_surface->u[0];
+ const_surface->u32[0];
surface = brw_imm_ud(surf_index);
brw_mark_surface_used(&prog_data->base, surf_index);
} else {
@@ -1042,12 +1042,12 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
* operand. If we can determine that one of the args is in the low
* 16 bits, though, we can just emit a single MUL.
*/
- if (value0 && value0->u[0] < (1 << 16)) {
+ if (value0 && value0->u32[0] < (1 << 16)) {
if (devinfo->gen < 7)
emit(MUL(dst, op[0], op[1]));
else
emit(MUL(dst, op[1], op[0]));
- } else if (value1 && value1->u[0] < (1 << 16)) {
+ } else if (value1 && value1->u32[0] < (1 << 16)) {
if (devinfo->gen < 7)
emit(MUL(dst, op[1], op[0]));
else
@@ -1793,7 +1793,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
nir_const_value *const_offset =
nir_src_as_const_value(instr->src[i].src);
if (const_offset) {
- constant_offset = brw_texture_offset(const_offset->i, 3);
+ constant_offset = brw_texture_offset(const_offset->i32, 3);
} else {
offset_value =
get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index f344eaa..0ce48b8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -353,7 +353,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]);
src_reg vertex_index =
- vertex_const ? src_reg(brw_imm_ud(vertex_const->u[0]))
+ vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0]))
: get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
@@ -400,6 +400,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
}
} else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
dst.type = BRW_REGISTER_TYPE_F;
+ unsigned swiz = BRW_SWIZZLE_WZYX;
/* This is a read of gl_TessLevelOuter[], which lives in the
* high 4 DWords of the Patch URB header, in reverse order.
@@ -412,6 +413,8 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
dst.writemask = WRITEMASK_XYZ;
break;
case GL_ISOLINES:
+ /* Isolines are not reversed; swizzle .zw -> .xy */
+ swiz = BRW_SWIZZLE_ZWZW;
dst.writemask = WRITEMASK_XY;
return;
default:
@@ -420,7 +423,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
dst_reg tmp(this, glsl_type::vec4_type);
emit_output_urb_read(tmp, 1, src_reg());
- emit(MOV(dst, swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
+ emit(MOV(dst, swizzle(src_reg(tmp), swiz)));
} else {
emit_output_urb_read(dst, imm_offset, indirect_offset);
}
@@ -473,8 +476,15 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
* Patch URB Header at DWords 4-7. However, it's reversed, so
* instead of .xyzw we have .wzyx.
*/
- swiz = BRW_SWIZZLE_WZYX;
- mask = writemask_for_backwards_vector(mask);
+ if (key->tes_primitive_mode == GL_ISOLINES) {
+ /* Isolines .xy should be stored in .zw, in order. */
+ swiz = BRW_SWIZZLE4(0, 0, 0, 1);
+ mask <<= 2;
+ } else {
+ /* Other domains are reversed; store .wzyx instead of .xyzw. */
+ swiz = BRW_SWIZZLE_WZYX;
+ mask = writemask_for_backwards_vector(mask);
+ }
}
emit_urb_write(swizzle(value, swiz), mask,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
index e3c23f1..7ba494f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
@@ -149,9 +149,15 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg(brw_vec8_grf(1, 0))));
break;
case nir_intrinsic_load_tess_level_outer:
- emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
- swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
- BRW_SWIZZLE_WZYX)));
+ if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
+ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+ swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
+ BRW_SWIZZLE_ZWZW)));
+ } else {
+ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+ swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
+ BRW_SWIZZLE_WZYX)));
+ }
break;
case nir_intrinsic_load_tess_level_inner:
if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index cee139b..f5a7d4d 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -198,14 +198,14 @@ gen6_upload_blend_state(struct brw_context *brw)
if(!is_buffer_zero_integer_format) {
/* _NEW_MULTISAMPLE */
blend[b].blend1.alpha_to_coverage =
- ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToCoverage;
+ _mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToCoverage;
/* From SandyBridge PRM, volume 2 Part 1, section 8.2.3, BLEND_STATE:
* DWord 1, Bit 30 (AlphaToOne Enable):
* "If Dual Source Blending is enabled, this bit must be disabled"
*/
WARN_ONCE(ctx->Color.Blend[b]._UsesDualSrc &&
- ctx->Multisample._Enabled &&
+ _mesa_is_multisample_enabled(ctx) &&
ctx->Multisample.SampleAlphaToOne,
"HW workaround: disabling alpha to one with dual src "
"blending\n");
@@ -213,7 +213,7 @@ gen6_upload_blend_state(struct brw_context *brw)
blend[b].blend1.alpha_to_one = false;
else
blend[b].blend1.alpha_to_one =
- ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToOne;
+ _mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToOne;
blend[b].blend1.alpha_to_coverage_dither = (brw->gen >= 7);
}
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 9a29366..004eceb 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -174,12 +174,14 @@ upload_clip_state(struct brw_context *brw)
else
enable = GEN6_CLIP_ENABLE;
+ if (!is_drawing_points(brw) && !is_drawing_lines(brw))
+ dw2 |= GEN6_CLIP_XY_TEST;
+
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
OUT_BATCH(dw1);
OUT_BATCH(enable |
GEN6_CLIP_MODE_NORMAL |
- GEN6_CLIP_XY_TEST |
dw2);
OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
@@ -195,7 +197,9 @@ const struct brw_tracked_state gen6_clip_state = {
_NEW_TRANSFORM,
.brw = BRW_NEW_CONTEXT |
BRW_NEW_FS_PROG_DATA |
+ BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_META_IN_PROGRESS |
+ BRW_NEW_PRIMITIVE |
BRW_NEW_RASTERIZER_DISCARD,
},
.emit = upload_clip_state,
@@ -209,7 +213,9 @@ const struct brw_tracked_state gen7_clip_state = {
_NEW_TRANSFORM,
.brw = BRW_NEW_CONTEXT |
BRW_NEW_FS_PROG_DATA |
+ BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_META_IN_PROGRESS |
+ BRW_NEW_PRIMITIVE |
BRW_NEW_RASTERIZER_DISCARD,
},
.emit = upload_clip_state,
diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
index 8eb620d..fcd313a 100644
--- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
@@ -171,7 +171,7 @@ gen6_determine_sample_mask(struct brw_context *brw)
/* BRW_NEW_NUM_SAMPLES */
unsigned num_samples = brw->num_samples;
- if (ctx->Multisample._Enabled) {
+ if (_mesa_is_multisample_enabled(ctx)) {
if (ctx->Multisample.SampleCoverage) {
coverage = ctx->Multisample.SampleCoverageValue;
coverage_invert = ctx->Multisample.SampleCoverageInvert;
diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
index 17b4a7f..a206732 100644
--- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
@@ -58,10 +58,10 @@ gen6_upload_scissor_state(struct brw_context *brw)
for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
int bbox[4];
- bbox[0] = 0;
- bbox[1] = fb_width;
- bbox[2] = 0;
- bbox[3] = fb_height;
+ bbox[0] = MAX2(ctx->ViewportArray[i].X, 0);
+ bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width);
+ bbox[2] = MAX2(ctx->ViewportArray[i].Y, 0);
+ bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height);
_mesa_intersect_scissor_bounding_box(ctx, i, bbox);
if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) {
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 2634e6b..42f9a5c 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -147,26 +147,6 @@ get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
}
-static bool
-is_drawing_points(const struct brw_context *brw)
-{
- /* Determine if the primitives *reaching the SF* are points */
- /* _NEW_POLYGON */
- if (brw->ctx.Polygon.FrontMode == GL_POINT ||
- brw->ctx.Polygon.BackMode == GL_POINT) {
- return true;
- }
-
- if (brw->geometry_program) {
- /* BRW_NEW_GEOMETRY_PROGRAM */
- return brw->geometry_program->OutputType == GL_POINTS;
- } else {
- /* BRW_NEW_PRIMITIVE */
- return brw->primitive == _3DPRIM_POINTLIST;
- }
-}
-
-
/**
* Create the mapping from the FS inputs we produce to the previous pipeline
* stage (GS or VS) outputs they source from.
@@ -216,8 +196,10 @@ calculate_attr_overrides(const struct brw_context *brw,
* This is not required on Haswell, as the hardware ignores this state
* when drawing non-points -- although we do still need to be careful to
* correctly set the attr overrides.
+ *
+ * _NEW_POLYGON
+ * BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA
*/
- /* BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM */
bool drawing_points = is_drawing_points(brw);
/* Initialize all the attr_overrides to 0. In the loop below we'll modify
@@ -369,8 +351,9 @@ upload_sf_state(struct brw_context *brw)
unreachable("not reached");
}
- /* _NEW_SCISSOR */
- if (ctx->Scissor.EnableFlags)
+ /* _NEW_SCISSOR _NEW_POLYGON BRW_NEW_GEOMETRY_PROGRAM BRW_NEW_PRIMITIVE */
+ if (ctx->Scissor.EnableFlags ||
+ is_drawing_points(brw) || is_drawing_lines(brw))
dw3 |= GEN6_SF_SCISSOR_ENABLE;
/* _NEW_POLYGON */
@@ -484,6 +467,7 @@ const struct brw_tracked_state gen6_sf_state = {
BRW_NEW_FS_PROG_DATA |
BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_PRIMITIVE |
+ BRW_NEW_TES_PROG_DATA |
BRW_NEW_VUE_MAP_GEOM_OUT,
},
.emit = upload_sf_state,
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index b1f13ac..7c98c73 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -188,8 +188,9 @@ upload_sf_state(struct brw_context *brw)
dw2 |= GEN6_SF_CULL_NONE;
}
- /* _NEW_SCISSOR */
- if (ctx->Scissor.EnableFlags)
+ /* _NEW_SCISSOR _NEW_POLYGON BRW_NEW_GEOMETRY_PROGRAM BRW_NEW_PRIMITIVE */
+ if (ctx->Scissor.EnableFlags ||
+ is_drawing_points(brw) || is_drawing_lines(brw))
dw2 |= GEN6_SF_SCISSOR_ENABLE;
/* _NEW_LINE */
@@ -254,7 +255,8 @@ const struct brw_tracked_state gen7_sf_state = {
_NEW_POLYGON |
_NEW_PROGRAM |
_NEW_SCISSOR,
- .brw = BRW_NEW_CONTEXT,
+ .brw = BRW_NEW_CONTEXT |
+ BRW_NEW_PRIMITIVE,
},
.emit = upload_sf_state,
};
diff --git a/src/mesa/drivers/dri/i965/gen8_blend_state.c b/src/mesa/drivers/dri/i965/gen8_blend_state.c
index 786c79a..63186bd 100644
--- a/src/mesa/drivers/dri/i965/gen8_blend_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_blend_state.c
@@ -65,7 +65,7 @@ gen8_upload_blend_state(struct brw_context *brw)
if (rb_zero_type != GL_INT && rb_zero_type != GL_UNSIGNED_INT) {
/* _NEW_MULTISAMPLE */
- if (ctx->Multisample._Enabled) {
+ if (_mesa_is_multisample_enabled(ctx)) {
if (ctx->Multisample.SampleAlphaToCoverage) {
blend[0] |= GEN8_BLEND_ALPHA_TO_COVERAGE_ENABLE;
blend[0] |= GEN8_BLEND_ALPHA_TO_COVERAGE_DITHER_ENABLE;
@@ -183,7 +183,7 @@ gen8_upload_blend_state(struct brw_context *brw)
* "If Dual Source Blending is enabled, this bit must be disabled."
*/
WARN_ONCE(ctx->Color.Blend[i]._UsesDualSrc &&
- ctx->Multisample._Enabled &&
+ _mesa_is_multisample_enabled(ctx) &&
ctx->Multisample.SampleAlphaToOne,
"HW workaround: disabling alpha to one with dual src "
"blending\n");
@@ -226,7 +226,7 @@ gen8_upload_ps_blend(struct brw_context *brw)
dw1 |= GEN8_PS_BLEND_ALPHA_TEST_ENABLE;
/* _NEW_MULTISAMPLE */
- if (ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToCoverage)
+ if (_mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToCoverage)
dw1 |= GEN8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE;
/* Used for implementing the following bit of GL_EXT_texture_integer:
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 93100a0..8aaa1a8 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -29,6 +29,7 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_wm.h"
+#include "main/framebuffer.h"
/**
* Helper function to emit depth related command packets.
@@ -303,7 +304,7 @@ pma_fix_enable(const struct brw_context *brw)
const bool kill_pixel =
brw->wm.prog_data->uses_kill ||
brw->wm.prog_data->uses_omask ||
- (ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToCoverage) ||
+ (_mesa_is_multisample_enabled(ctx) && ctx->Multisample.SampleAlphaToCoverage) ||
ctx->Color.AlphaEnabled;
/* The big formula in CACHE_MODE_1::NP PMA FIX ENABLE. */
diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c
index 8b6f31f..2ac21f7 100644
--- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
@@ -178,7 +178,7 @@ upload_sf(struct brw_context *brw)
dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
/* _NEW_POINT | _NEW_MULTISAMPLE */
- if ((ctx->Point.SmoothFlag || ctx->Multisample._Enabled) &&
+ if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
!ctx->Point.PointSprite) {
dw3 |= GEN8_SF_SMOOTH_POINT_ENABLE;
}
@@ -249,7 +249,7 @@ upload_raster(struct brw_context *brw)
if (ctx->Point.SmoothFlag)
dw1 |= GEN8_RASTER_SMOOTH_POINT_ENABLE;
- if (ctx->Multisample._Enabled)
+ if (_mesa_is_multisample_enabled(ctx))
dw1 |= GEN8_RASTER_API_MULTISAMPLE_ENABLE;
if (ctx->Polygon.OffsetFill)
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c
index 08b7623..ccb82b6 100644
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -140,9 +140,9 @@ copy_image_with_memcpy(struct brw_context *brw,
_mesa_get_format_block_size(src_mt->format, &src_bw, &src_bh);
assert(src_width % src_bw == 0);
- assert(src_height % src_bw == 0);
+ assert(src_height % src_bh == 0);
assert(src_x % src_bw == 0);
- assert(src_y % src_bw == 0);
+ assert(src_y % src_bh == 0);
/* If we are on the same miptree, same level, and same slice, then
* intel_miptree_map won't let us map it twice. We have to do things a
@@ -153,7 +153,7 @@ copy_image_with_memcpy(struct brw_context *brw,
if (same_slice) {
assert(dst_x % src_bw == 0);
- assert(dst_y % src_bw == 0);
+ assert(dst_y % src_bh == 0);
map_x1 = MIN2(src_x, dst_x);
map_y1 = MIN2(src_y, dst_y);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 6c233d8..9e84abb 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2172,7 +2172,8 @@ intel_miptree_updownsample(struct brw_context *brw,
src->logical_width0, src->logical_height0,
0, 0,
dst->logical_width0, dst->logical_height0,
- GL_NEAREST, false, false /*mirror x, y*/);
+ GL_NEAREST, false, false /*mirror x, y*/,
+ false, false);
} else if (src->format == MESA_FORMAT_S_UINT8) {
brw_meta_stencil_updownsample(brw, src, dst);
} else {
@@ -2194,7 +2195,8 @@ intel_miptree_updownsample(struct brw_context *brw,
src->logical_width0, src->logical_height0,
0, 0,
dst->logical_width0, dst->logical_height0,
- GL_NEAREST, false, false /*mirror x, y*/);
+ GL_NEAREST, false, false /*mirror x, y*/,
+ false, false /* decode/encode srgb */);
}
}
diff --git a/src/mesa/main/debug_output.c b/src/mesa/main/debug_output.c
index c2b9f05..85f64bd 100644
--- a/src/mesa/main/debug_output.c
+++ b/src/mesa/main/debug_output.c
@@ -779,7 +779,7 @@ _mesa_get_debug_state_int(struct gl_context *ctx, GLenum pname)
break;
case GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH:
val = (debug->Log.NumMessages) ?
- debug->Log.Messages[debug->Log.NextMessage].length : 0;
+ debug->Log.Messages[debug->Log.NextMessage].length + 1 : 0;
break;
case GL_DEBUG_GROUP_STACK_DEPTH:
val = debug->CurrentGroup + 1;
@@ -1009,15 +1009,16 @@ _mesa_DebugMessageInsert(GLenum source, GLenum type, GLuint id,
if (!validate_length(ctx, callerstr, length, buf))
return; /* GL_INVALID_VALUE */
+ /* if length not specified, string will be null terminated: */
+ if (length < 0)
+ length = strlen(buf);
+
_mesa_log_msg(ctx, gl_enum_to_debug_source(source),
gl_enum_to_debug_type(type), id,
gl_enum_to_debug_severity(severity),
length, buf);
if (type == GL_DEBUG_TYPE_MARKER && ctx->Driver.EmitStringMarker) {
- /* if length not specified, string will be null terminated: */
- if (length < 0)
- length = strlen(buf);
ctx->Driver.EmitStringMarker(ctx, buf, length);
}
}
@@ -1188,6 +1189,9 @@ _mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei length,
if (!validate_length(ctx, callerstr, length, message))
return; /* GL_INVALID_VALUE */
+ if (length < 0)
+ length = strlen(message);
+
debug = _mesa_lock_debug_state(ctx);
if (!debug)
return;
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index d490918..bb8d4c3 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3623,6 +3623,23 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
_mesa_enum_to_string(attachment));
return;
}
+
+ /* The specs are not clear about how to handle
+ * GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME with the default framebuffer,
+ * but dEQP-GLES3 expects an INVALID_ENUM error. This has also been
+ * discussed in:
+ *
+ * https://cvs.khronos.org/bugzilla/show_bug.cgi?id=12928#c1
+ * and https://bugs.freedesktop.org/show_bug.cgi?id=31947
+ */
+ if (pname == GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME) {
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "%s(requesting GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME "
+ "when GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is "
+ "GL_FRAMEBUFFER_DEFAULT is not allowed)", caller);
+ return;
+ }
+
/* the default / window-system FBO */
att = _mesa_get_fb0_attachment(ctx, buffer, attachment);
}
diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index d18166d..f69dc6c 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -983,3 +983,22 @@ _mesa_is_front_buffer_drawing(const struct gl_framebuffer *fb)
return (fb->_NumColorDrawBuffers >= 1 &&
fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT);
}
+
+static inline GLuint
+_mesa_geometric_nonvalidated_samples(const struct gl_framebuffer *buffer)
+{
+ return buffer->_HasAttachments ?
+ buffer->Visual.samples :
+ buffer->DefaultGeometry.NumSamples;
+}
+
+bool _mesa_is_multisample_enabled(const struct gl_context *ctx)
+{
+ /* The sample count may not be validated by the driver, but when it is set,
+ * we know that is in a valid range and no driver should ever validate a
+ * multisampled framebuffer to non-multisampled and vice-versa.
+ */
+ return ctx->Multisample.Enabled &&
+ ctx->DrawBuffer &&
+ _mesa_geometric_nonvalidated_samples(ctx->DrawBuffer) > 1;
+}
diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h
index fa434d4..384f749 100644
--- a/src/mesa/main/framebuffer.h
+++ b/src/mesa/main/framebuffer.h
@@ -146,4 +146,7 @@ _mesa_is_front_buffer_reading(const struct gl_framebuffer *fb);
extern bool
_mesa_is_front_buffer_drawing(const struct gl_framebuffer *fb);
+extern bool
+_mesa_is_multisample_enabled(const struct gl_context *ctx);
+
#endif /* FRAMEBUFFER_H */
diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c
index 6eacd42..1a6ae9a 100644
--- a/src/mesa/main/genmipmap.c
+++ b/src/mesa/main/genmipmap.c
@@ -79,6 +79,20 @@ bool
_mesa_is_valid_generate_texture_mipmap_internalformat(struct gl_context *ctx,
GLenum internalformat)
{
+ if (_mesa_is_gles3(ctx)) {
+ /* From the ES 3.2 specification's description of GenerateMipmap():
+ * "An INVALID_OPERATION error is generated if the levelbase array was
+ * not specified with an unsized internal format from table 8.3 or a
+ * sized internal format that is both color-renderable and
+ * texture-filterable according to table 8.10."
+ */
+ return internalformat == GL_RGBA || internalformat == GL_RGB ||
+ internalformat == GL_LUMINANCE_ALPHA ||
+ internalformat == GL_LUMINANCE || internalformat == GL_ALPHA ||
+ (_mesa_is_es3_color_renderable(internalformat) &&
+ _mesa_is_es3_texture_filterable(internalformat));
+ }
+
return (!_mesa_is_enum_format_integer(internalformat) &&
!_mesa_is_depthstencil_format(internalformat) &&
!_mesa_is_astc_format(internalformat) &&
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index cf64958..96ab393 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -3556,3 +3556,86 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
*/
unreachable("Unsupported format");
}
+
+/**
+ * Returns true if \p internal_format is a sized internal format that
+ * is marked "Color Renderable" in Table 8.10 of the ES 3.2 specification.
+ */
+bool
+_mesa_is_es3_color_renderable(GLenum internal_format)
+{
+ switch (internal_format) {
+ case GL_R8:
+ case GL_RG8:
+ case GL_RGB8:
+ case GL_RGB565:
+ case GL_RGBA4:
+ case GL_RGB5_A1:
+ case GL_RGBA8:
+ case GL_RGB10_A2:
+ case GL_RGB10_A2UI:
+ case GL_SRGB8_ALPHA8:
+ case GL_R16F:
+ case GL_RG16F:
+ case GL_RGBA16F:
+ case GL_R32F:
+ case GL_RG32F:
+ case GL_RGBA32F:
+ case GL_R11F_G11F_B10F:
+ case GL_R8I:
+ case GL_R8UI:
+ case GL_R16I:
+ case GL_R16UI:
+ case GL_R32I:
+ case GL_R32UI:
+ case GL_RG8I:
+ case GL_RG8UI:
+ case GL_RG16I:
+ case GL_RG16UI:
+ case GL_RG32I:
+ case GL_RG32UI:
+ case GL_RGBA8I:
+ case GL_RGBA8UI:
+ case GL_RGBA16I:
+ case GL_RGBA16UI:
+ case GL_RGBA32I:
+ case GL_RGBA32UI:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * Returns true if \p internal_format is a sized internal format that
+ * is marked "Texture Filterable" in Table 8.10 of the ES 3.2 specification.
+ */
+bool
+_mesa_is_es3_texture_filterable(GLenum internal_format)
+{
+ switch (internal_format) {
+ case GL_R8:
+ case GL_R8_SNORM:
+ case GL_RG8:
+ case GL_RG8_SNORM:
+ case GL_RGB8:
+ case GL_RGB8_SNORM:
+ case GL_RGB565:
+ case GL_RGBA4:
+ case GL_RGB5_A1:
+ case GL_RGBA8:
+ case GL_RGBA8_SNORM:
+ case GL_RGB10_A2:
+ case GL_SRGB8:
+ case GL_SRGB8_ALPHA8:
+ case GL_R16F:
+ case GL_RG16F:
+ case GL_RGB16F:
+ case GL_RGBA16F:
+ case GL_R11F_G11F_B10F:
+ case GL_RGB9_E5:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h
index 00d2767..c73f464 100644
--- a/src/mesa/main/glformats.h
+++ b/src/mesa/main/glformats.h
@@ -28,6 +28,7 @@
#define GLFORMATS_H
+#include <stdbool.h>
#include <GL/gl.h>
@@ -144,6 +145,12 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat );
extern uint32_t
_mesa_format_from_format_and_type(GLenum format, GLenum type);
+extern bool
+_mesa_is_es3_color_renderable(GLenum internal_format);
+
+extern bool
+_mesa_is_es3_texture_filterable(GLenum internal_format);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 2e43996..71aae17 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -667,7 +667,6 @@ struct gl_list_attrib
struct gl_multisample_attrib
{
GLboolean Enabled;
- GLboolean _Enabled; /**< true if Enabled and multisample buffer */
GLboolean SampleAlphaToCoverage;
GLboolean SampleAlphaToOne;
GLboolean SampleCoverage;
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index 57f1341..917ae4d 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -344,20 +344,6 @@ update_frontbit(struct gl_context *ctx)
/**
- * Update derived multisample state.
- */
-static void
-update_multisample(struct gl_context *ctx)
-{
- ctx->Multisample._Enabled = GL_FALSE;
- if (ctx->Multisample.Enabled &&
- ctx->DrawBuffer &&
- _mesa_geometric_samples(ctx->DrawBuffer) > 0)
- ctx->Multisample._Enabled = GL_TRUE;
-}
-
-
-/**
* Update the ctx->VertexProgram._TwoSideEnabled flag.
*/
static void
@@ -450,9 +436,6 @@ _mesa_update_state_locked( struct gl_context *ctx )
if (new_state & _NEW_PIXEL)
_mesa_update_pixel( ctx, new_state );
- if (new_state & (_NEW_MULTISAMPLE | _NEW_BUFFERS))
- update_multisample( ctx );
-
/* ctx->_NeedEyeCoords is now up to date.
*
* If the truth value of this variable has changed, update for the
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 10d931c..1d9047e 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2356,7 +2356,7 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
file = PROGRAM_UNIFORM;
}
- int index = _mesa_lookup_parameter_index(params, -1, name);
+ int index = _mesa_lookup_parameter_index(params, name);
if (index < 0) {
index = _mesa_add_parameter(params, file, name, size, type->gl_type,
NULL, NULL);
diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index 34183d4..02d84f2 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -37,6 +37,99 @@
#include "prog_statevars.h"
+/**
+ * Look for a float vector in the given parameter list. The float vector
+ * may be of length 1, 2, 3 or 4. If swizzleOut is non-null, we'll try
+ * swizzling to find a match.
+ * \param list the parameter list to search
+ * \param v the float vector to search for
+ * \param vSize number of element in v
+ * \param posOut returns the position of the constant, if found
+ * \param swizzleOut returns a swizzle mask describing location of the
+ * vector elements if found.
+ * \return GL_TRUE if found, GL_FALSE if not found
+ */
+static GLboolean
+lookup_parameter_constant(const struct gl_program_parameter_list *list,
+ const gl_constant_value v[], GLuint vSize,
+ GLint *posOut, GLuint *swizzleOut)
+{
+ GLuint i;
+
+ assert(vSize >= 1);
+ assert(vSize <= 4);
+
+ if (!list) {
+ *posOut = -1;
+ return GL_FALSE;
+ }
+
+ for (i = 0; i < list->NumParameters; i++) {
+ if (list->Parameters[i].Type == PROGRAM_CONSTANT) {
+ if (!swizzleOut) {
+ /* swizzle not allowed */
+ GLuint j, match = 0;
+ for (j = 0; j < vSize; j++) {
+ if (v[j].u == list->ParameterValues[i][j].u)
+ match++;
+ }
+ if (match == vSize) {
+ *posOut = i;
+ return GL_TRUE;
+ }
+ }
+ else {
+ /* try matching w/ swizzle */
+ if (vSize == 1) {
+ /* look for v[0] anywhere within float[4] value */
+ GLuint j;
+ for (j = 0; j < list->Parameters[i].Size; j++) {
+ if (list->ParameterValues[i][j].u == v[0].u) {
+ /* found it */
+ *posOut = i;
+ *swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
+ return GL_TRUE;
+ }
+ }
+ }
+ else if (vSize <= list->Parameters[i].Size) {
+ /* see if we can match this constant (with a swizzle) */
+ GLuint swz[4];
+ GLuint match = 0, j, k;
+ for (j = 0; j < vSize; j++) {
+ if (v[j].u == list->ParameterValues[i][j].u) {
+ swz[j] = j;
+ match++;
+ }
+ else {
+ for (k = 0; k < list->Parameters[i].Size; k++) {
+ if (v[j].u == list->ParameterValues[i][k].u) {
+ swz[j] = k;
+ match++;
+ break;
+ }
+ }
+ }
+ }
+ /* smear last value to remaining positions */
+ for (; j < 4; j++)
+ swz[j] = swz[j-1];
+
+ if (match == vSize) {
+ *posOut = i;
+ *swizzleOut = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
+ return GL_TRUE;
+ }
+ }
+ }
+ }
+ }
+
+ *posOut = -1;
+ return GL_FALSE;
+}
+
+
struct gl_program_parameter_list *
_mesa_new_parameter_list(void)
{
@@ -54,17 +147,17 @@ _mesa_new_parameter_list_sized(unsigned size)
/* alloc arrays */
p->Parameters = (struct gl_program_parameter *)
- calloc(size, sizeof(struct gl_program_parameter));
+ calloc(size, sizeof(struct gl_program_parameter));
p->ParameterValues = (gl_constant_value (*)[4])
_mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16);
if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) {
- free(p->Parameters);
- _mesa_align_free(p->ParameterValues);
- free(p);
- p = NULL;
+ free(p->Parameters);
+ _mesa_align_free(p->ParameterValues);
+ free(p);
+ p = NULL;
}
}
@@ -191,7 +284,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
else {
/* silence valgrind */
for (j = 0; j < 4; j++)
- paramList->ParameterValues[oldNum + i][j].f = 0;
+ paramList->ParameterValues[oldNum + i][j].f = 0;
}
size -= 4;
}
@@ -228,8 +321,7 @@ _mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
assert(size <= 4);
if (swizzleOut &&
- _mesa_lookup_parameter_constant(paramList, values,
- size, &pos, swizzleOut)) {
+ lookup_parameter_constant(paramList, values, size, &pos, swizzleOut)) {
return pos;
}
@@ -264,28 +356,6 @@ _mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
return pos;
}
-/**
- * Add a new unnamed constant to the parameter list. This will be used
- * when a fragment/vertex program contains something like this:
- * MOV r, { 0, 1, 2, 3 };
- * If swizzleOut is non-null we'll search the parameter list for an
- * existing instance of the constant which matches with a swizzle.
- *
- * \param paramList the parameter list
- * \param values four float values
- * \param swizzleOut returns swizzle mask for accessing the constant
- * \return index/position of the new parameter in the parameter list.
- * \sa _mesa_add_typed_unnamed_constant
- */
-GLint
-_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
- const gl_constant_value values[4], GLuint size,
- GLuint *swizzleOut)
-{
- return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE,
- swizzleOut);
-}
-
/**
* Add a new state reference to the parameter list.
@@ -307,8 +377,8 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
/* Check if the state reference is already in the list */
for (index = 0; index < (GLint) paramList->NumParameters; index++) {
if (!memcmp(paramList->Parameters[index].StateIndexes,
- stateTokens, STATE_LENGTH * sizeof(gl_state_index))) {
- return index;
+ stateTokens, STATE_LENGTH * sizeof(gl_state_index))) {
+ return index;
}
}
@@ -323,134 +393,3 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
return index;
}
-
-
-/**
- * Given a program parameter name, find its position in the list of parameters.
- * \param paramList the parameter list to search
- * \param nameLen length of name (in chars).
- * If length is negative, assume that name is null-terminated.
- * \param name the name to search for
- * \return index of parameter in the list.
- */
-GLint
-_mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
- GLsizei nameLen, const char *name)
-{
- GLint i;
-
- if (!paramList)
- return -1;
-
- if (nameLen == -1) {
- /* name is null-terminated */
- for (i = 0; i < (GLint) paramList->NumParameters; i++) {
- if (paramList->Parameters[i].Name &&
- strcmp(paramList->Parameters[i].Name, name) == 0)
- return i;
- }
- }
- else {
- /* name is not null-terminated, use nameLen */
- for (i = 0; i < (GLint) paramList->NumParameters; i++) {
- if (paramList->Parameters[i].Name &&
- strncmp(paramList->Parameters[i].Name, name, nameLen) == 0
- && strlen(paramList->Parameters[i].Name) == (size_t)nameLen)
- return i;
- }
- }
- return -1;
-}
-
-
-/**
- * Look for a float vector in the given parameter list. The float vector
- * may be of length 1, 2, 3 or 4. If swizzleOut is non-null, we'll try
- * swizzling to find a match.
- * \param list the parameter list to search
- * \param v the float vector to search for
- * \param vSize number of element in v
- * \param posOut returns the position of the constant, if found
- * \param swizzleOut returns a swizzle mask describing location of the
- * vector elements if found.
- * \return GL_TRUE if found, GL_FALSE if not found
- */
-GLboolean
-_mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
- const gl_constant_value v[], GLuint vSize,
- GLint *posOut, GLuint *swizzleOut)
-{
- GLuint i;
-
- assert(vSize >= 1);
- assert(vSize <= 4);
-
- if (!list) {
- *posOut = -1;
- return GL_FALSE;
- }
-
- for (i = 0; i < list->NumParameters; i++) {
- if (list->Parameters[i].Type == PROGRAM_CONSTANT) {
- if (!swizzleOut) {
- /* swizzle not allowed */
- GLuint j, match = 0;
- for (j = 0; j < vSize; j++) {
- if (v[j].u == list->ParameterValues[i][j].u)
- match++;
- }
- if (match == vSize) {
- *posOut = i;
- return GL_TRUE;
- }
- }
- else {
- /* try matching w/ swizzle */
- if (vSize == 1) {
- /* look for v[0] anywhere within float[4] value */
- GLuint j;
- for (j = 0; j < list->Parameters[i].Size; j++) {
- if (list->ParameterValues[i][j].u == v[0].u) {
- /* found it */
- *posOut = i;
- *swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
- return GL_TRUE;
- }
- }
- }
- else if (vSize <= list->Parameters[i].Size) {
- /* see if we can match this constant (with a swizzle) */
- GLuint swz[4];
- GLuint match = 0, j, k;
- for (j = 0; j < vSize; j++) {
- if (v[j].u == list->ParameterValues[i][j].u) {
- swz[j] = j;
- match++;
- }
- else {
- for (k = 0; k < list->Parameters[i].Size; k++) {
- if (v[j].u == list->ParameterValues[i][k].u) {
- swz[j] = k;
- match++;
- break;
- }
- }
- }
- }
- /* smear last value to remaining positions */
- for (; j < 4; j++)
- swz[j] = swz[j-1];
-
- if (match == vSize) {
- *posOut = i;
- *swizzleOut = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
- return GL_TRUE;
- }
- }
- }
- }
- }
-
- *posOut = -1;
- return GL_FALSE;
-}
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index c04d7a2..320f64f 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -34,6 +34,7 @@
#include "main/mtypes.h"
#include "prog_statevars.h"
+#include <string.h>
#ifdef __cplusplus
extern "C" {
@@ -99,12 +100,6 @@ _mesa_new_parameter_list_sized(unsigned size);
extern void
_mesa_free_parameter_list(struct gl_program_parameter_list *paramList);
-static inline GLuint
-_mesa_num_parameters(const struct gl_program_parameter_list *list)
-{
- return list ? list->NumParameters : 0;
-}
-
extern void
_mesa_reserve_parameter_storage(struct gl_program_parameter_list *paramList,
unsigned reserve_slots);
@@ -121,23 +116,36 @@ _mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
const gl_constant_value values[4], GLuint size,
GLenum datatype, GLuint *swizzleOut);
-extern GLint
+static inline GLint
_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
const gl_constant_value values[4], GLuint size,
- GLuint *swizzleOut);
+ GLuint *swizzleOut)
+{
+ return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE,
+ swizzleOut);
+}
extern GLint
_mesa_add_state_reference(struct gl_program_parameter_list *paramList,
const gl_state_index stateTokens[STATE_LENGTH]);
-extern GLint
+
+static inline GLint
_mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
- GLsizei nameLen, const char *name);
+ const char *name)
+{
+ if (!paramList)
+ return -1;
-extern GLboolean
-_mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
- const gl_constant_value v[], GLuint vSize,
- GLint *posOut, GLuint *swizzleOut);
+ /* name must be null-terminated */
+ for (GLint i = 0; i < (GLint) paramList->NumParameters; i++) {
+ if (paramList->Parameters[i].Name &&
+ strcmp(paramList->Parameters[i].Name, name) == 0)
+ return i;
+ }
+
+ return -1;
+}
#ifdef __cplusplus
}
diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index db53377..03ece67 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -502,7 +502,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[],
minImplSize = ctx->Const.MinPointSizeAA;
maxImplSize = ctx->Const.MaxPointSize;
}
- else if (ctx->Point.SmoothFlag || ctx->Multisample._Enabled) {
+ else if (ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) {
minImplSize = ctx->Const.MinPointSizeAA;
maxImplSize = ctx->Const.MaxPointSizeAA;
}
diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index 1f916ab..16b79c9 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -142,7 +142,7 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
load->num_components = 4;
load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
- nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
src.src = nir_src_for_ssa(&load->dest.ssa);
@@ -171,7 +171,7 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
- nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
load->num_components = 4;
load->variables[0] = nir_deref_var_create(load, c->parameters);
@@ -246,7 +246,7 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
} else {
assert(swizzle != SWIZZLE_NIL);
nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
mov->dest.write_mask = 0x1;
mov->src[0] = src;
mov->src[0].swizzle[0] = swizzle;
@@ -676,7 +676,7 @@ ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
assert(src_number == num_srcs);
- nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &instr->instr);
/* Resolve the writemask on the texture op. */
@@ -974,7 +974,7 @@ setup_registers_and_variables(struct ptn_compile *c)
nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
load_x->num_components = 1;
load_x->variables[0] = nir_deref_var_create(load_x, var);
- nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL);
+ nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &load_x->instr);
nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c
index c20cadf..366163e 100644
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -31,6 +31,7 @@
*/
#include "main/macros.h"
+#include "main/framebuffer.h"
#include "st_context.h"
#include "st_atom.h"
#include "st_debug.h"
@@ -235,12 +236,12 @@ static void update_raster_state( struct st_context *st )
raster->line_stipple_factor = ctx->Line.StippleFactor - 1;
/* _NEW_MULTISAMPLE */
- raster->multisample = ctx->Multisample._Enabled;
+ raster->multisample = _mesa_is_multisample_enabled(ctx);
/* _NEW_MULTISAMPLE | _NEW_BUFFERS */
raster->force_persample_interp =
!st->force_persample_in_shader &&
- ctx->Multisample._Enabled &&
+ _mesa_is_multisample_enabled(ctx) &&
ctx->Multisample.SampleShading &&
ctx->Multisample.MinSampleShadingValue *
ctx->DrawBuffer->Visual.samples > 1;
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index ff90bd6..709f0cb 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -74,7 +74,7 @@ update_fp( struct st_context *st )
/* _NEW_MULTISAMPLE | _NEW_BUFFERS */
key.persample_shading =
st->force_persample_in_shader &&
- st->ctx->Multisample._Enabled &&
+ _mesa_is_multisample_enabled(st->ctx) &&
st->ctx->Multisample.SampleShading &&
st->ctx->Multisample.MinSampleShadingValue *
_mesa_geometric_samples(st->ctx->DrawBuffer) > 1;
diff --git a/src/mesa/state_tracker/st_cb_bitmap.h b/src/mesa/state_tracker/st_cb_bitmap.h
index 4d1ae22..323158e 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.h
+++ b/src/mesa/state_tracker/st_cb_bitmap.h
@@ -49,7 +49,7 @@ st_flush_bitmap_cache(struct st_context *st);
extern const struct tgsi_token *
st_get_bitmap_shader(const struct tgsi_token *tokens,
- unsigned sampler_index,
+ unsigned tex_target, unsigned sampler_index,
bool use_texcoord, bool swizzle_xxxx);
#endif /* ST_CB_BITMAP_H */
diff --git a/src/mesa/state_tracker/st_cb_bitmap_shader.c b/src/mesa/state_tracker/st_cb_bitmap_shader.c
index cddea36..7ce078d 100644
--- a/src/mesa/state_tracker/st_cb_bitmap_shader.c
+++ b/src/mesa/state_tracker/st_cb_bitmap_shader.c
@@ -36,6 +36,7 @@ struct tgsi_bitmap_transform {
struct tgsi_transform_context base;
struct tgsi_shader_info info;
unsigned sampler_index;
+ unsigned tex_target;
bool use_texcoord;
bool swizzle_xxxx;
bool first_instruction_emitted;
@@ -52,8 +53,9 @@ transform_instr(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction *current_inst)
{
struct tgsi_bitmap_transform *ctx = tgsi_bitmap_transform(tctx);
- struct tgsi_full_declaration decl;
struct tgsi_full_instruction inst;
+ unsigned tgsi_tex_target = ctx->tex_target == PIPE_TEXTURE_2D
+ ? TGSI_TEXTURE_2D : TGSI_TEXTURE_RECT;
unsigned i, semantic;
int texcoord_index = -1;
@@ -66,9 +68,7 @@ transform_instr(struct tgsi_transform_context *tctx,
/* Add TEMP[0] if it's missing. */
if (ctx->info.file_max[TGSI_FILE_TEMPORARY] == -1) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_TEMPORARY;
- tctx->emit_declaration(tctx, &decl);
+ tgsi_transform_temp_decl(tctx, 0);
}
/* Add TEXCOORD[0] if it's missing. */
@@ -83,45 +83,23 @@ transform_instr(struct tgsi_transform_context *tctx,
}
if (texcoord_index == -1) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_INPUT;
- decl.Declaration.Semantic = 1;
- decl.Semantic.Name = semantic;
- decl.Declaration.Interpolate = 1;
- decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
- decl.Range.First = decl.Range.Last = ctx->info.num_inputs;
texcoord_index = ctx->info.num_inputs;
- tctx->emit_declaration(tctx, &decl);
+ tgsi_transform_input_decl(tctx, texcoord_index,
+ semantic, 0, TGSI_INTERPOLATE_PERSPECTIVE);
}
/* Declare the sampler. */
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_SAMPLER;
- decl.Range.First = decl.Range.Last = ctx->sampler_index;
- tctx->emit_declaration(tctx, &decl);
+ tgsi_transform_sampler_decl(tctx, ctx->sampler_index);
- /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
- inst = tgsi_default_full_instruction();
- inst.Instruction.Opcode = TGSI_OPCODE_TEX;
- inst.Instruction.Texture = 1;
- inst.Texture.Texture = TGSI_TEXTURE_2D;
-
- inst.Instruction.NumDstRegs = 1;
- inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
- inst.Dst[0].Register.Index = 0;
- inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
-
- inst.Instruction.NumSrcRegs = 2;
- inst.Src[0].Register.File = TGSI_FILE_INPUT;
- inst.Src[0].Register.Index = texcoord_index;
- inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
- inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
- inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z;
- inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W;
- inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
- inst.Src[1].Register.Index = ctx->sampler_index;
+ /* Declare the sampler view. */
+ tgsi_transform_sampler_view_decl(tctx, ctx->sampler_index,
+ tgsi_tex_target, TGSI_RETURN_TYPE_FLOAT);
- tctx->emit_instruction(tctx, &inst);
+ /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+ tgsi_transform_tex_inst(tctx,
+ TGSI_FILE_TEMPORARY, 0,
+ TGSI_FILE_INPUT, texcoord_index,
+ tgsi_tex_target, ctx->sampler_index);
/* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
inst = tgsi_default_full_instruction();
@@ -150,15 +128,19 @@ transform_instr(struct tgsi_transform_context *tctx,
const struct tgsi_token *
st_get_bitmap_shader(const struct tgsi_token *tokens,
- unsigned sampler_index,
+ unsigned tex_target, unsigned sampler_index,
bool use_texcoord, bool swizzle_xxxx)
{
struct tgsi_bitmap_transform ctx;
struct tgsi_token *newtoks;
int newlen;
+ assert(tex_target == PIPE_TEXTURE_2D ||
+ tex_target == PIPE_TEXTURE_RECT);
+
memset(&ctx, 0, sizeof(ctx));
ctx.base.transform_instruction = transform_instr;
+ ctx.tex_target = tex_target;
ctx.sampler_index = sampler_index;
ctx.use_texcoord = use_texcoord;
ctx.swizzle_xxxx = swizzle_xxxx;
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 51d4ae5..09f4d8e 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -142,11 +142,21 @@ get_drawpix_z_stencil_program(struct st_context *st,
out_color = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
depth_sampler = ureg_DECL_sampler(ureg, 0);
+ ureg_DECL_sampler_view(ureg, 0, TGSI_TEXTURE_2D,
+ TGSI_RETURN_TYPE_FLOAT,
+ TGSI_RETURN_TYPE_FLOAT,
+ TGSI_RETURN_TYPE_FLOAT,
+ TGSI_RETURN_TYPE_FLOAT);
out_depth = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
}
if (write_stencil) {
stencil_sampler = ureg_DECL_sampler(ureg, 1);
+ ureg_DECL_sampler_view(ureg, 1, TGSI_TEXTURE_2D,
+ TGSI_RETURN_TYPE_UINT,
+ TGSI_RETURN_TYPE_UINT,
+ TGSI_RETURN_TYPE_UINT,
+ TGSI_RETURN_TYPE_UINT);
out_stencil = ureg_DECL_output(ureg, TGSI_SEMANTIC_STENCIL, 0);
}
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.h b/src/mesa/state_tracker/st_cb_drawpixels.h
index f1fb32d..24526d5 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.h
+++ b/src/mesa/state_tracker/st_cb_drawpixels.h
@@ -46,6 +46,6 @@ st_get_drawpix_shader(const struct tgsi_token *tokens, bool use_texcoord,
bool scale_and_bias, unsigned scale_const,
unsigned bias_const, bool pixel_maps,
unsigned drawpix_sampler, unsigned pixelmap_sampler,
- unsigned texcoord_const);
+ unsigned texcoord_const, unsigned tex_target);
#endif /* ST_CB_DRAWPIXELS_H */
diff --git a/src/mesa/state_tracker/st_cb_drawpixels_shader.c b/src/mesa/state_tracker/st_cb_drawpixels_shader.c
index 749b46c..35a9da0 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels_shader.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels_shader.c
@@ -43,6 +43,7 @@ struct tgsi_drawpix_transform {
unsigned drawpix_sampler;
unsigned pixelmap_sampler;
unsigned texcoord_const;
+ unsigned tex_target;
};
static inline struct tgsi_drawpix_transform *
@@ -72,8 +73,8 @@ transform_instr(struct tgsi_transform_context *tctx,
struct tgsi_full_instruction *current_inst)
{
struct tgsi_drawpix_transform *ctx = tgsi_drawpix_transform(tctx);
- struct tgsi_full_declaration decl;
- struct tgsi_full_instruction inst;
+ const unsigned tgsi_tex_target = ctx->tex_target == PIPE_TEXTURE_2D
+ ? TGSI_TEXTURE_2D : TGSI_TEXTURE_RECT;
unsigned i, sem_texcoord = ctx->use_texcoord ? TGSI_SEMANTIC_TEXCOORD :
TGSI_SEMANTIC_GENERIC;
int texcoord_index = -1;
@@ -86,33 +87,21 @@ transform_instr(struct tgsi_transform_context *tctx,
/* Add scale and bias constants. */
if (ctx->scale_and_bias) {
if (ctx->info.const_file_max[0] < (int)ctx->scale_const) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_CONSTANT;
- decl.Range.First = decl.Range.Last = ctx->scale_const;
- tctx->emit_declaration(tctx, &decl);
+ tgsi_transform_const_decl(tctx, ctx->scale_const, ctx->scale_const);
}
if (ctx->info.const_file_max[0] < (int)ctx->bias_const) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_CONSTANT;
- decl.Range.First = decl.Range.Last = ctx->bias_const;
- tctx->emit_declaration(tctx, &decl);
+ tgsi_transform_const_decl(tctx, ctx->bias_const, ctx->bias_const);
}
}
if (ctx->info.const_file_max[0] < (int)ctx->texcoord_const) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_CONSTANT;
- decl.Range.First = decl.Range.Last = ctx->texcoord_const;
- tctx->emit_declaration(tctx, &decl);
+ tgsi_transform_const_decl(tctx, ctx->texcoord_const, ctx->texcoord_const);
}
/* Add a new temp. */
ctx->color_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1;
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_TEMPORARY;
- decl.Range.First = decl.Range.Last = ctx->color_temp;
- tctx->emit_declaration(tctx, &decl);
+ tgsi_transform_temp_decl(tctx, ctx->color_temp);
/* Add TEXCOORD[texcoord_slot] if it's missing. */
for (i = 0; i < ctx->info.num_inputs; i++) {
@@ -124,75 +113,51 @@ transform_instr(struct tgsi_transform_context *tctx,
}
if (texcoord_index == -1) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_INPUT;
- decl.Declaration.Semantic = 1;
- decl.Semantic.Name = sem_texcoord;
- decl.Declaration.Interpolate = 1;
- decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
- decl.Range.First = decl.Range.Last = ctx->info.num_inputs;
texcoord_index = ctx->info.num_inputs;
- tctx->emit_declaration(tctx, &decl);
+ tgsi_transform_input_decl(tctx, texcoord_index, sem_texcoord, 0,
+ TGSI_INTERPOLATE_PERSPECTIVE);
}
/* Declare the drawpix sampler if it's missing. */
if (!(ctx->info.samplers_declared & (1 << ctx->drawpix_sampler))) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_SAMPLER;
- decl.Range.First = decl.Range.Last = ctx->drawpix_sampler;
- tctx->emit_declaration(tctx, &decl);
+ tgsi_transform_sampler_decl(tctx, ctx->drawpix_sampler);
+
+ /* emit sampler view declaration */
+ tgsi_transform_sampler_view_decl(tctx, ctx->drawpix_sampler,
+ tgsi_tex_target, TGSI_RETURN_TYPE_FLOAT);
}
/* Declare the pixel map sampler if it's missing. */
if (ctx->pixel_maps &&
!(ctx->info.samplers_declared & (1 << ctx->pixelmap_sampler))) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_SAMPLER;
- decl.Range.First = decl.Range.Last = ctx->pixelmap_sampler;
- tctx->emit_declaration(tctx, &decl);
+ tgsi_transform_sampler_decl(tctx, ctx->pixelmap_sampler);
+
+ /* emit sampler view declaration */
+ tgsi_transform_sampler_view_decl(tctx, ctx->pixelmap_sampler,
+ TGSI_TEXTURE_2D, TGSI_RETURN_TYPE_FLOAT);
}
/* Get initial pixel color from the texture.
* TEX temp, fragment.texcoord[0], texture[0], 2D;
*/
- inst = tgsi_default_full_instruction();
- inst.Instruction.Opcode = TGSI_OPCODE_TEX;
- inst.Instruction.Texture = 1;
- inst.Texture.Texture = TGSI_TEXTURE_2D;
-
- inst.Instruction.NumDstRegs = 1;
- inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
- inst.Dst[0].Register.Index = ctx->color_temp;
- inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
-
- inst.Instruction.NumSrcRegs = 2;
- SET_SRC(&inst, 0, TGSI_FILE_INPUT, texcoord_index, X, Y, Z, W);
- inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
- inst.Src[1].Register.Index = ctx->drawpix_sampler;
-
- tctx->emit_instruction(tctx, &inst);
+ tgsi_transform_tex_inst(tctx, TGSI_FILE_TEMPORARY, ctx->color_temp,
+ TGSI_FILE_INPUT, texcoord_index,
+ tgsi_tex_target, ctx->drawpix_sampler);
/* Apply the scale and bias. */
if (ctx->scale_and_bias) {
/* MAD temp, temp, scale, bias; */
- inst = tgsi_default_full_instruction();
- inst.Instruction.Opcode = TGSI_OPCODE_MAD;
-
- inst.Instruction.NumDstRegs = 1;
- inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
- inst.Dst[0].Register.Index = ctx->color_temp;
- inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
-
- inst.Instruction.NumSrcRegs = 3;
- SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->color_temp, X, Y, Z, W);
- SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, ctx->scale_const, X, Y, Z, W);
- SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, ctx->bias_const, X, Y, Z, W);
-
- tctx->emit_instruction(tctx, &inst);
+ tgsi_transform_op3_inst(tctx, TGSI_OPCODE_MAD,
+ TGSI_FILE_TEMPORARY, ctx->color_temp,
+ TGSI_WRITEMASK_XYZW,
+ TGSI_FILE_TEMPORARY, ctx->color_temp,
+ TGSI_FILE_CONSTANT, ctx->scale_const,
+ TGSI_FILE_CONSTANT, ctx->bias_const);
}
if (ctx->pixel_maps) {
/* do four pixel map look-ups with two TEX instructions: */
+ struct tgsi_full_instruction inst;
/* TEX temp.xy, temp.xyyy, texture[1], 2D; */
inst = tgsi_default_full_instruction();
@@ -250,12 +215,15 @@ st_get_drawpix_shader(const struct tgsi_token *tokens, bool use_texcoord,
bool scale_and_bias, unsigned scale_const,
unsigned bias_const, bool pixel_maps,
unsigned drawpix_sampler, unsigned pixelmap_sampler,
- unsigned texcoord_const)
+ unsigned texcoord_const, unsigned tex_target)
{
struct tgsi_drawpix_transform ctx;
struct tgsi_token *newtoks;
int newlen;
+ assert(tex_target == PIPE_TEXTURE_2D ||
+ tex_target == PIPE_TEXTURE_RECT);
+
memset(&ctx, 0, sizeof(ctx));
ctx.base.transform_instruction = transform_instr;
ctx.use_texcoord = use_texcoord;
@@ -266,9 +234,10 @@ st_get_drawpix_shader(const struct tgsi_token *tokens, bool use_texcoord,
ctx.drawpix_sampler = drawpix_sampler;
ctx.pixelmap_sampler = pixelmap_sampler;
ctx.texcoord_const = texcoord_const;
+ ctx.tex_target = tex_target;
tgsi_scan_shader(tokens, &ctx.info);
- newlen = tgsi_num_tokens(tokens) + 30;
+ newlen = tgsi_num_tokens(tokens) + 60;
newtoks = tgsi_alloc_tokens(newlen);
if (!newtoks)
return NULL;
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 82ab914..ff570e0 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -387,6 +387,7 @@ st_update_renderbuffer_surface(struct st_context *st,
{
struct pipe_context *pipe = st->pipe;
struct pipe_resource *resource = strb->texture;
+ struct st_texture_object *stTexObj = NULL;
unsigned rtt_width = strb->Base.Width;
unsigned rtt_height = strb->Base.Height;
unsigned rtt_depth = strb->Base.Depth;
@@ -398,9 +399,18 @@ st_update_renderbuffer_surface(struct st_context *st,
*/
boolean enable_srgb = (st->ctx->Color.sRGBEnabled &&
_mesa_get_format_color_encoding(strb->Base.Format) == GL_SRGB);
- enum pipe_format format = (enable_srgb) ?
- util_format_srgb(resource->format) :
- util_format_linear(resource->format);
+ enum pipe_format format = resource->format;
+
+ if (strb->is_rtt) {
+ stTexObj = st_texture_object(strb->Base.TexImage->TexObject);
+ if (stTexObj->surface_based)
+ format = stTexObj->surface_format;
+ }
+
+ format = (enable_srgb) ?
+ util_format_srgb(format) :
+ util_format_linear(format);
+
unsigned first_layer, last_layer, level;
if (resource->target == PIPE_TEXTURE_1D_ARRAY) {
@@ -431,8 +441,8 @@ st_update_renderbuffer_surface(struct st_context *st,
/* Adjust for texture views */
if (strb->is_rtt && resource->array_size > 1 &&
- strb->Base.TexImage->TexObject->Immutable) {
- struct gl_texture_object *tex = strb->Base.TexImage->TexObject;
+ stTexObj->base.Immutable) {
+ struct gl_texture_object *tex = &stTexObj->base;
first_layer += tex->MinLayer;
if (!strb->rtt_layered)
last_layer += tex->MinLayer;
@@ -492,8 +502,6 @@ st_render_texture(struct gl_context *ctx,
st_update_renderbuffer_surface(st, strb);
- strb->Base.Format = st_pipe_format_to_mesa_format(pt->format);
-
/* Invalidate buffer state so that the pipe's framebuffer state
* gets updated.
* That's where the new renderbuffer (which we just created) gets
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index bffa4d0..460c179 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -2886,10 +2886,13 @@ st_finalize_texture(struct gl_context *ctx,
/* Need to import images in main memory or held in other textures.
*/
if (stImage && stObj->pt != stImage->pt) {
+ GLuint depth = stObj->depth0;
+ if (stObj->base.Target == GL_TEXTURE_3D)
+ depth = u_minify(depth, level);
if (level == 0 ||
(stImage->base.Width == u_minify(stObj->width0, level) &&
stImage->base.Height == u_minify(stObj->height0, level) &&
- stImage->base.Depth == u_minify(stObj->depth0, level))) {
+ stImage->base.Depth == depth)) {
/* src image fits expected dest mipmap level size */
copy_image_data_to_texture(st, stObj, level, stImage);
}
diff --git a/src/mesa/state_tracker/st_cb_texturebarrier.c b/src/mesa/state_tracker/st_cb_texturebarrier.c
index 2de150b..fecba65 100644
--- a/src/mesa/state_tracker/st_cb_texturebarrier.c
+++ b/src/mesa/state_tracker/st_cb_texturebarrier.c
@@ -63,16 +63,54 @@ st_MemoryBarrier(struct gl_context *ctx, GLbitfield barriers)
struct pipe_context *pipe = st_context(ctx)->pipe;
unsigned flags = 0;
+ if (barriers & GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT)
+ flags |= PIPE_BARRIER_VERTEX_BUFFER;
+ if (barriers & GL_ELEMENT_ARRAY_BARRIER_BIT)
+ flags |= PIPE_BARRIER_INDEX_BUFFER;
+ if (barriers & GL_UNIFORM_BARRIER_BIT)
+ flags |= PIPE_BARRIER_CONSTANT_BUFFER;
+ if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
+ flags |= PIPE_BARRIER_TEXTURE;
+ if (barriers & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)
+ flags |= PIPE_BARRIER_IMAGE;
+ if (barriers & GL_COMMAND_BARRIER_BIT)
+ flags |= PIPE_BARRIER_INDIRECT_BUFFER;
+ if (barriers & GL_PIXEL_BUFFER_BARRIER_BIT) {
+ /* The PBO may be
+ * (1) bound as a texture for PBO uploads, or
+ * (2) accessed by the CPU via transfer ops.
+ * For case (2), we assume automatic flushing by the driver.
+ */
+ flags |= PIPE_BARRIER_TEXTURE;
+ }
+ /* GL_TEXTURE_UPDATE_BARRIER_BIT:
+ * Texture updates translate to:
+ * (1) texture transfers to/from the CPU,
+ * (2) texture as blit destination, or
+ * (3) texture as framebuffer.
+ * In all cases, we assume the driver does the required flushing
+ * automatically.
+ */
+ /* GL_BUFFER_UPDATE_BARRIER_BIT:
+ * Buffer updates translate to
+ * (1) buffer transfers to/from the CPU,
+ * (2) resource copies and clears.
+ * In all cases, we assume the driver does the required flushing
+ * automatically.
+ */
if (barriers & GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT)
flags |= PIPE_BARRIER_MAPPED_BUFFER;
+ if (barriers & GL_QUERY_BUFFER_BARRIER_BIT)
+ flags |= PIPE_BARRIER_QUERY_BUFFER;
+ if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
+ flags |= PIPE_BARRIER_FRAMEBUFFER;
+ if (barriers & GL_TRANSFORM_FEEDBACK_BARRIER_BIT)
+ flags |= PIPE_BARRIER_STREAMOUT_BUFFER;
if (barriers & GL_ATOMIC_COUNTER_BARRIER_BIT)
flags |= PIPE_BARRIER_SHADER_BUFFER;
if (barriers & GL_SHADER_STORAGE_BARRIER_BIT)
flags |= PIPE_BARRIER_SHADER_BUFFER;
- if (barriers & GL_QUERY_BUFFER_BARRIER_BIT)
- flags |= PIPE_BARRIER_QUERY_BUFFER;
-
if (flags && pipe->memory_barrier)
pipe->memory_barrier(pipe, flags);
}
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 3666ece..2fdaba0 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -253,6 +253,13 @@ void st_init_limits(struct pipe_screen *screen,
pc->MaxLocalParams = MIN2(pc->MaxParameters, MAX_PROGRAM_LOCAL_PARAMS);
pc->MaxEnvParams = MIN2(pc->MaxParameters, MAX_PROGRAM_ENV_PARAMS);
+ if (screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_INTEGERS)) {
+ pc->LowInt.RangeMin = 31;
+ pc->LowInt.RangeMax = 30;
+ pc->LowInt.Precision = 0;
+ pc->MediumInt = pc->HighInt = pc->LowInt;
+ }
+
options->EmitNoNoise = TRUE;
/* TODO: make these more fine-grained if anyone needs it */
@@ -783,6 +790,7 @@ void st_init_extensions(struct pipe_screen *screen,
extensions->ARB_fragment_shader = GL_TRUE;
extensions->ARB_half_float_vertex = GL_TRUE;
extensions->ARB_internalformat_query = GL_TRUE;
+ extensions->ARB_internalformat_query2 = GL_TRUE;
extensions->ARB_map_buffer_range = GL_TRUE;
extensions->ARB_texture_border_clamp = GL_TRUE; /* XXX temp */
extensions->ARB_texture_cube_map = GL_TRUE;
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 5392c23..9a280fc 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -1114,12 +1114,12 @@ static const struct format_mapping format_map[] = {
},
{
{ GL_RGB10_A2, 0 },
- { PIPE_FORMAT_B10G10R10A2_UNORM, PIPE_FORMAT_R10G10B10A2_UNORM,
+ { PIPE_FORMAT_R10G10B10A2_UNORM, PIPE_FORMAT_B10G10R10A2_UNORM,
DEFAULT_RGBA_FORMATS }
},
{
{ 4, GL_RGBA, GL_RGBA8, 0 },
- { DEFAULT_RGBA_FORMATS }
+ { PIPE_FORMAT_R8G8B8A8_UNORM, DEFAULT_RGBA_FORMATS }
},
{
{ GL_BGRA, 0 },
@@ -1127,7 +1127,7 @@ static const struct format_mapping format_map[] = {
},
{
{ 3, GL_RGB, GL_RGB8, 0 },
- { DEFAULT_RGB_FORMATS }
+ { PIPE_FORMAT_R8G8B8X8_UNORM, DEFAULT_RGB_FORMATS }
},
{
{ GL_RGB12, GL_RGB16, 0 },
@@ -1309,7 +1309,7 @@ static const struct format_mapping format_map[] = {
},
{
{ GL_SRGB_ALPHA_EXT, GL_SRGB8_ALPHA8_EXT, 0 },
- { DEFAULT_SRGBA_FORMATS }
+ { PIPE_FORMAT_R8G8B8A8_SRGB, DEFAULT_SRGBA_FORMATS }
},
{
{ GL_COMPRESSED_SRGB_EXT, GL_COMPRESSED_SRGB_S3TC_DXT1_EXT, 0 },
@@ -2022,20 +2022,10 @@ static const struct exact_format_mapping rgbx8888_tbl[] =
{ 0, 0, 0 }
};
-static const struct exact_format_mapping rgba1010102_tbl[] =
-{
- { GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV, PIPE_FORMAT_B10G10R10A2_UNORM },
- /* No Mesa formats for these Gallium formats:
- { GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, PIPE_FORMAT_R10G10B10A2_UNORM },
- { GL_ABGR_EXT, GL_UNSIGNED_INT_10_10_10_2, PIPE_FORMAT_R10G10B10A2_UNORM },
- { GL_ABGR_EXT, GL_UNSIGNED_INT, PIPE_FORMAT_R10G10B10A2_UNORM },
- */
- { 0, 0, 0 }
-};
-
/**
- * If there is an exact pipe_format match for {internalFormat, format, type}
- * return that, otherwise return PIPE_FORMAT_NONE so we can do fuzzy matching.
+ * For unsized/base internal formats, we may choose a convenient effective
+ * internal format for {format, type}. If one exists, return that, otherwise
+ * return PIPE_FORMAT_NONE.
*/
static enum pipe_format
find_exact_format(GLint internalFormat, GLenum format, GLenum type)
@@ -2049,17 +2039,12 @@ find_exact_format(GLint internalFormat, GLenum format, GLenum type)
switch (internalFormat) {
case 4:
case GL_RGBA:
- case GL_RGBA8:
tbl = rgba8888_tbl;
break;
case 3:
case GL_RGB:
- case GL_RGB8:
tbl = rgbx8888_tbl;
break;
- case GL_RGB10_A2:
- tbl = rgba1010102_tbl;
- break;
default:
return PIPE_FORMAT_NONE;
}
@@ -2216,7 +2201,15 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
enum pipe_format pFormat;
mesa_format mFormat;
unsigned bindings;
- enum pipe_texture_target pTarget = gl_target_to_pipe(target);
+ bool is_renderbuffer = false;
+ enum pipe_texture_target pTarget;
+
+ if (target == GL_RENDERBUFFER) {
+ pTarget = PIPE_TEXTURE_2D;
+ is_renderbuffer = true;
+ } else {
+ pTarget = gl_target_to_pipe(target);
+ }
if (target == GL_TEXTURE_1D || target == GL_TEXTURE_1D_ARRAY) {
/* We don't do compression for these texture targets because of
@@ -2234,7 +2227,7 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
bindings = PIPE_BIND_SAMPLER_VIEW;
if (_mesa_is_depth_or_stencil_format(internalFormat))
bindings |= PIPE_BIND_DEPTH_STENCIL;
- else if (internalFormat == 3 || internalFormat == 4 ||
+ else if (is_renderbuffer || internalFormat == 3 || internalFormat == 4 ||
internalFormat == GL_RGB || internalFormat == GL_RGBA ||
internalFormat == GL_RGB8 || internalFormat == GL_RGBA8 ||
internalFormat == GL_BGRA ||
@@ -2267,19 +2260,21 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
if (pFormat != PIPE_FORMAT_NONE)
return st_pipe_format_to_mesa_format(pFormat);
- /* try choosing format again, this time without render target bindings */
- pFormat = st_choose_matching_format(st, PIPE_BIND_SAMPLER_VIEW,
- format, type,
- ctx->Unpack.SwapBytes);
- if (pFormat != PIPE_FORMAT_NONE)
- return st_pipe_format_to_mesa_format(pFormat);
+ if (!is_renderbuffer) {
+ /* try choosing format again, this time without render target bindings */
+ pFormat = st_choose_matching_format(st, PIPE_BIND_SAMPLER_VIEW,
+ format, type,
+ ctx->Unpack.SwapBytes);
+ if (pFormat != PIPE_FORMAT_NONE)
+ return st_pipe_format_to_mesa_format(pFormat);
+ }
}
}
pFormat = st_choose_format(st, internalFormat, format, type,
pTarget, 0, bindings, ctx->Mesa_DXTn);
- if (pFormat == PIPE_FORMAT_NONE) {
+ if (pFormat == PIPE_FORMAT_NONE && !is_renderbuffer) {
/* try choosing format again, this time without render target bindings */
pFormat = st_choose_format(st, internalFormat, format, type,
pTarget, 0, PIPE_BIND_SAMPLER_VIEW,
@@ -2357,6 +2352,7 @@ void
st_QueryInternalFormat(struct gl_context *ctx, GLenum target,
GLenum internalFormat, GLenum pname, GLint *params)
{
+ struct st_context *st = st_context(ctx);
/* The API entry-point gives us a temporary params buffer that is non-NULL
* and guaranteed to have at least 16 elements.
*/
@@ -2374,7 +2370,30 @@ st_QueryInternalFormat(struct gl_context *ctx, GLenum target,
params[0] = (GLint) num_samples;
break;
}
-
+ case GL_INTERNALFORMAT_PREFERRED: {
+ params[0] = GL_NONE;
+
+ /* We need to resolve an internal format that is compatible with
+ * the passed internal format, and optimal to the driver. By now,
+ * we just validate that the passed internal format is supported by
+ * the driver, and if so return the same internal format, otherwise
+ * return GL_NONE.
+ */
+ uint usage;
+ if (_mesa_is_depth_or_stencil_format(internalFormat))
+ usage = PIPE_BIND_DEPTH_STENCIL;
+ else
+ usage = PIPE_BIND_RENDER_TARGET;
+ enum pipe_format pformat = st_choose_format(st,
+ internalFormat,
+ GL_NONE,
+ GL_NONE,
+ PIPE_TEXTURE_2D, 1,
+ usage, FALSE);
+ if (pformat)
+ params[0] = internalFormat;
+ break;
+ }
default:
/* For the rest of the pnames, we call back the Mesa's default
* function for drivers that don't implement ARB_internalformat_query2.
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 1841405..06b4bb4 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6345,7 +6345,7 @@ st_translate_program(
}
if (program->use_shared_memory)
- t->shared_memory = ureg_DECL_shared_memory(ureg);
+ t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
for (i = 0; i < program->shader->NumImages; i++) {
if (program->images_used & (1 << i)) {
@@ -6370,6 +6370,42 @@ st_translate_program(
t->insn[t->labels[i].branch_target]);
}
+ /* Set the next shader stage hint for VS and TES. */
+ switch (procType) {
+ case TGSI_PROCESSOR_VERTEX:
+ case TGSI_PROCESSOR_TESS_EVAL:
+ if (program->shader_program->SeparateShader)
+ break;
+
+ for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) {
+ if (program->shader_program->_LinkedShaders[i]) {
+ unsigned next;
+
+ switch (i) {
+ case MESA_SHADER_TESS_CTRL:
+ next = TGSI_PROCESSOR_TESS_CTRL;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ next = TGSI_PROCESSOR_TESS_EVAL;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ next = TGSI_PROCESSOR_GEOMETRY;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ next = TGSI_PROCESSOR_FRAGMENT;
+ break;
+ default:
+ assert(0);
+ continue;
+ }
+
+ ureg_set_next_shader_processor(ureg, next);
+ break;
+ }
+ }
+ break;
+ }
+
out:
if (t) {
free(t->arrays);
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 8772efb..7a686b1 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -241,43 +241,75 @@ src_register( struct st_translate *t,
* Map mesa texture target to TGSI texture target.
*/
unsigned
-st_translate_texture_target( GLuint textarget,
- GLboolean shadow )
+st_translate_texture_target(GLuint textarget, GLboolean shadow)
{
if (shadow) {
- switch( textarget ) {
- case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D;
- case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D;
- case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT;
- case TEXTURE_1D_ARRAY_INDEX: return TGSI_TEXTURE_SHADOW1D_ARRAY;
- case TEXTURE_2D_ARRAY_INDEX: return TGSI_TEXTURE_SHADOW2D_ARRAY;
- case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_SHADOWCUBE;
- case TEXTURE_CUBE_ARRAY_INDEX: return TGSI_TEXTURE_SHADOWCUBE_ARRAY;
- default: break;
+ switch (textarget) {
+ case TEXTURE_1D_INDEX:
+ return TGSI_TEXTURE_SHADOW1D;
+ case TEXTURE_2D_INDEX:
+ return TGSI_TEXTURE_SHADOW2D;
+ case TEXTURE_RECT_INDEX:
+ return TGSI_TEXTURE_SHADOWRECT;
+ case TEXTURE_1D_ARRAY_INDEX:
+ return TGSI_TEXTURE_SHADOW1D_ARRAY;
+ case TEXTURE_2D_ARRAY_INDEX:
+ return TGSI_TEXTURE_SHADOW2D_ARRAY;
+ case TEXTURE_CUBE_INDEX:
+ return TGSI_TEXTURE_SHADOWCUBE;
+ case TEXTURE_CUBE_ARRAY_INDEX:
+ return TGSI_TEXTURE_SHADOWCUBE_ARRAY;
+ default:
+ break;
}
}
- switch( textarget ) {
- case TEXTURE_2D_MULTISAMPLE_INDEX: return TGSI_TEXTURE_2D_MSAA;
- case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: return TGSI_TEXTURE_2D_ARRAY_MSAA;
- case TEXTURE_BUFFER_INDEX: return TGSI_TEXTURE_BUFFER;
- case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D;
- case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D;
- case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D;
- case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
- case TEXTURE_CUBE_ARRAY_INDEX: return TGSI_TEXTURE_CUBE_ARRAY;
- case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
- case TEXTURE_1D_ARRAY_INDEX: return TGSI_TEXTURE_1D_ARRAY;
- case TEXTURE_2D_ARRAY_INDEX: return TGSI_TEXTURE_2D_ARRAY;
- case TEXTURE_EXTERNAL_INDEX: return TGSI_TEXTURE_2D;
+ switch (textarget) {
+ case TEXTURE_2D_MULTISAMPLE_INDEX:
+ return TGSI_TEXTURE_2D_MSAA;
+ case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
+ return TGSI_TEXTURE_2D_ARRAY_MSAA;
+ case TEXTURE_BUFFER_INDEX:
+ return TGSI_TEXTURE_BUFFER;
+ case TEXTURE_1D_INDEX:
+ return TGSI_TEXTURE_1D;
+ case TEXTURE_2D_INDEX:
+ return TGSI_TEXTURE_2D;
+ case TEXTURE_3D_INDEX:
+ return TGSI_TEXTURE_3D;
+ case TEXTURE_CUBE_INDEX:
+ return TGSI_TEXTURE_CUBE;
+ case TEXTURE_CUBE_ARRAY_INDEX:
+ return TGSI_TEXTURE_CUBE_ARRAY;
+ case TEXTURE_RECT_INDEX:
+ return TGSI_TEXTURE_RECT;
+ case TEXTURE_1D_ARRAY_INDEX:
+ return TGSI_TEXTURE_1D_ARRAY;
+ case TEXTURE_2D_ARRAY_INDEX:
+ return TGSI_TEXTURE_2D_ARRAY;
+ case TEXTURE_EXTERNAL_INDEX:
+ return TGSI_TEXTURE_2D;
default:
- debug_assert( 0 );
+ debug_assert(!"unexpected texture target index");
return TGSI_TEXTURE_1D;
}
}
/**
+ * Translate a (1 << TEXTURE_x_INDEX) bit into a TGSI_TEXTURE_x enum.
+ */
+static unsigned
+translate_texture_index(GLbitfield texBit, bool shadow)
+{
+ int index = ffs(texBit);
+ assert(index > 0);
+ assert(index - 1 < NUM_TEXTURE_TARGETS);
+ return st_translate_texture_target(index - 1, shadow);
+}
+
+
+/**
* Create a TGSI ureg_dst register from a Mesa dest register.
*/
static struct ureg_dst
@@ -1128,7 +1160,16 @@ st_translate_mesa_program(
/* texture samplers */
for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
if (program->SamplersUsed & (1 << i)) {
+ unsigned target =
+ translate_texture_index(program->TexturesUsed[i],
+ !!(program->ShadowSamplers & (1 << i)));
t->samplers[i] = ureg_DECL_sampler( ureg, i );
+ ureg_DECL_sampler_view(ureg, i, target,
+ TGSI_RETURN_TYPE_FLOAT,
+ TGSI_RETURN_TYPE_FLOAT,
+ TGSI_RETURN_TYPE_FLOAT,
+ TGSI_RETURN_TYPE_FLOAT);
+
}
}
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index c9f390a..80dcfd8 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -871,6 +871,7 @@ st_create_fp_variant(struct st_context *st,
variant->bitmap_sampler = ffs(~stfp->Base.Base.SamplersUsed) - 1;
tokens = st_get_bitmap_shader(tgsi.tokens,
+ st->internal_target,
variant->bitmap_sampler,
st->needs_texcoord_semantic,
st->bitmap.tex_format ==
@@ -923,7 +924,7 @@ st_create_fp_variant(struct st_context *st,
bias_const, key->pixelMaps,
variant->drawpix_sampler,
variant->pixelmap_sampler,
- texcoord_const);
+ texcoord_const, st->internal_target);
if (tokens) {
if (tgsi.tokens != stfp->tgsi.tokens)
diff --git a/src/mesa/swrast/s_points.c b/src/mesa/swrast/s_points.c
index d9aae73..3163b04 100644
--- a/src/mesa/swrast/s_points.c
+++ b/src/mesa/swrast/s_points.c
@@ -22,7 +22,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
+#include "main/framebuffer.h"
#include "main/glheader.h"
#include "main/macros.h"
#include "s_context.h"
@@ -257,7 +257,7 @@ smooth_point(struct gl_context *ctx, const SWvertex *vert)
size = get_size(ctx, vert, GL_TRUE);
/* alpha attenuation / fade factor */
- if (ctx->Multisample._Enabled) {
+ if (_mesa_is_multisample_enabled(ctx)) {
if (vert->pointSize >= ctx->Point.Threshold) {
alphaAtten = 1.0F;
}
diff --git a/src/mesa/swrast/s_texture.c b/src/mesa/swrast/s_texture.c
index 9ccd0e3..d35bea9 100644
--- a/src/mesa/swrast/s_texture.c
+++ b/src/mesa/swrast/s_texture.c
@@ -60,7 +60,7 @@ _swrast_delete_texture_image(struct gl_context *ctx,
}
static unsigned int
-texture_slices(struct gl_texture_image *texImage)
+texture_slices(const struct gl_texture_image *texImage)
{
if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY)
return texImage->Height;
@@ -188,6 +188,7 @@ check_map_teximage(const struct gl_texture_image *texImage,
assert(y < texImage->Height || texImage->Height == 0);
assert(x + w <= texImage->Width);
assert(y + h <= texImage->Height);
+ assert(slice < texture_slices(texImage));
}
/**
@@ -240,7 +241,6 @@ _swrast_map_teximage(struct gl_context *ctx,
assert(swImage->Buffer);
assert(swImage->Buffer == swImage->ImageSlices[0]);
- assert(slice < texture_slices(texImage));
map = swImage->ImageSlices[slice];
/* apply x/y offset to map address */