diff options
Diffstat (limited to 'src/compiler/glsl')
451 files changed, 85231 insertions, 0 deletions
diff --git a/src/compiler/glsl/.gitignore b/src/compiler/glsl/.gitignore new file mode 100644 index 0000000..dda423f --- /dev/null +++ b/src/compiler/glsl/.gitignore @@ -0,0 +1,10 @@ +glsl_compiler +glsl_lexer.cpp +glsl_parser.cpp +glsl_parser.h +glsl_parser.output +glsl_test +subtest-cr/ +subtest-lf/ +subtest-cr-lf/ +subtest-lf-cr/ diff --git a/src/compiler/glsl/Android.gen.mk b/src/compiler/glsl/Android.gen.mk new file mode 100644 index 0000000..c5741b4 --- /dev/null +++ b/src/compiler/glsl/Android.gen.mk @@ -0,0 +1,76 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# included by glsl Android.mk for source generation + +ifeq ($(LOCAL_MODULE_CLASS),) +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +endif + +intermediates := $(call local-generated-sources-dir) + +LOCAL_SRC_FILES := $(LOCAL_SRC_FILES) + +LOCAL_C_INCLUDES += \ + $(intermediates)/glcpp \ + $(MESA_TOP)/src/glsl/glcpp \ + +LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ + $(LIBGLCPP_GENERATED_FILES) \ + $(LIBGLSL_GENERATED_CXX_FILES)) + +define local-l-or-ll-to-c-or-cpp + @mkdir -p $(dir $@) + @echo "Mesa Lex: $(PRIVATE_MODULE) <= $<" + $(hide) $(LEX) --nounistd -o$@ $< +endef + +define glsl_local-y-to-c-and-h + @mkdir -p $(dir $@) + @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" + $(hide) $(YACC) -o $@ -p "glcpp_parser_" $< +endef + +define local-yy-to-cpp-and-h + @mkdir -p $(dir $@) + @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" + $(hide) $(YACC) -p "_mesa_glsl_" -o $@ $< + touch $(@:$1=$(YACC_HEADER_SUFFIX)) + echo '#ifndef '$(@F:$1=_h) > $(@:$1=.h) + echo '#define '$(@F:$1=_h) >> $(@:$1=.h) + cat $(@:$1=$(YACC_HEADER_SUFFIX)) >> $(@:$1=.h) + echo '#endif' >> $(@:$1=.h) + rm -f $(@:$1=$(YACC_HEADER_SUFFIX)) +endef + +$(intermediates)/glsl_lexer.cpp: $(LOCAL_PATH)/glsl_lexer.ll + $(call local-l-or-ll-to-c-or-cpp) + +$(intermediates)/glsl_parser.cpp: $(LOCAL_PATH)/glsl_parser.yy + $(call local-yy-to-cpp-and-h,.cpp) + +$(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l + $(call local-l-or-ll-to-c-or-cpp) + +$(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y + $(call glsl_local-y-to-c-and-h) diff --git a/src/compiler/glsl/Android.mk b/src/compiler/glsl/Android.mk new file mode 100644 index 0000000..9cbb9a3 --- /dev/null +++ b/src/compiler/glsl/Android.mk @@ -0,0 +1,76 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Android.mk for glsl + +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/Makefile.sources + +# --------------------------------------- +# Build libmesa_glsl +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(LIBGLCPP_FILES) \ + $(LIBGLSL_FILES) \ + $(NIR_FILES) + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/gallium/auxiliary + +LOCAL_STATIC_LIBRARIES := libmesa_compiler + +LOCAL_MODULE := libmesa_glsl + +include $(LOCAL_PATH)/Android.gen.mk +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + +# --------------------------------------- +# Build glsl_compiler +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(GLSL_COMPILER_CXX_FILES) + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/gallium/auxiliary + +LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_glsl_utils libmesa_util + +LOCAL_MODULE_TAGS := eng +LOCAL_MODULE := glsl_compiler + +include $(MESA_COMMON_MK) +include $(BUILD_EXECUTABLE) diff --git a/src/compiler/glsl/Makefile.am b/src/compiler/glsl/Makefile.am new file mode 100644 index 0000000..9954b81 --- /dev/null +++ b/src/compiler/glsl/Makefile.am @@ -0,0 +1,228 @@ +# Copyright © 2012 Jon TURNEY +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +AM_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa/ \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/glsl/glcpp \ + -I$(top_srcdir)/src/gtest/include \ + $(DEFINES) +AM_CFLAGS = \ + $(VISIBILITY_CFLAGS) \ + $(MSVC2013_COMPAT_CFLAGS) +AM_CXXFLAGS = \ + $(VISIBILITY_CXXFLAGS) \ + $(MSVC2013_COMPAT_CXXFLAGS) + +EXTRA_DIST = tests glcpp/tests README TODO glcpp/README \ + glsl_lexer.ll \ + glsl_parser.yy \ + glcpp/glcpp-lex.l \ + glcpp/glcpp-parse.y \ + SConscript + +include Makefile.sources + +TESTS = glcpp/tests/glcpp-test \ + glcpp/tests/glcpp-test-cr-lf \ + tests/blob-test \ + tests/general-ir-test \ + tests/optimization-test \ + tests/sampler-types-test \ + tests/uniform-initializer-test + +TESTS_ENVIRONMENT= \ + export PYTHON2=$(PYTHON2); \ + export PYTHON_FLAGS=$(PYTHON_FLAGS); + +noinst_LTLIBRARIES = libglsl.la libglcpp.la +check_PROGRAMS = \ + glcpp/glcpp \ + glsl_test \ + tests/blob-test \ + tests/general-ir-test \ + tests/sampler-types-test \ + tests/uniform-initializer-test + +noinst_PROGRAMS = glsl_compiler + +tests_blob_test_SOURCES = \ + tests/blob_test.c +tests_blob_test_LDADD = \ + $(top_builddir)/src/glsl/libglsl.la + +tests_general_ir_test_SOURCES = \ + standalone_scaffolding.cpp \ + tests/builtin_variable_test.cpp \ + tests/invalidate_locations_test.cpp \ + tests/general_ir_test.cpp \ + tests/varyings_test.cpp +tests_general_ir_test_CFLAGS = \ + $(PTHREAD_CFLAGS) +tests_general_ir_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +tests_uniform_initializer_test_SOURCES = \ + tests/copy_constant_to_storage_tests.cpp \ + tests/set_uniform_initializer_tests.cpp \ + tests/uniform_initializer_utils.cpp \ + tests/uniform_initializer_utils.h +tests_uniform_initializer_test_CFLAGS = \ + $(PTHREAD_CFLAGS) +tests_uniform_initializer_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +tests_sampler_types_test_SOURCES = \ + tests/sampler_types_test.cpp +tests_sampler_types_test_CFLAGS = \ + $(PTHREAD_CFLAGS) +tests_sampler_types_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +libglcpp_la_LIBADD = \ + $(top_builddir)/src/util/libmesautil.la +libglcpp_la_SOURCES = \ + glcpp/glcpp-lex.c \ + glcpp/glcpp-parse.c \ + glcpp/glcpp-parse.h \ + $(LIBGLCPP_FILES) + +glcpp_glcpp_SOURCES = \ + glcpp/glcpp.c +glcpp_glcpp_LDADD = \ + libglcpp.la \ + $(top_builddir)/src/libglsl_util.la \ + -lm + +libglsl_la_LIBADD = \ + $(top_builddir)/src/compiler/nir/libnir.la \ + libglcpp.la + +libglsl_la_SOURCES = \ + glsl_lexer.cpp \ + glsl_parser.cpp \ + glsl_parser.h \ + $(LIBGLSL_FILES) + + +glsl_compiler_SOURCES = \ + $(GLSL_COMPILER_CXX_FILES) + +glsl_compiler_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(PTHREAD_LIBS) + +glsl_test_SOURCES = \ + standalone_scaffolding.cpp \ + test.cpp \ + test_optpass.cpp \ + test_optpass.h + +glsl_test_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +# We write our own rules for yacc and lex below. We'd rather use automake, +# but automake makes it especially difficult for a number of reasons: +# +# * < automake-1.12 generates .h files from .yy and .ypp files, but +# >=automake-1.12 generates .hh and .hpp files respectively. There's no +# good way of making a project that uses C++ yacc files compatible with +# both versions of automake. Strong work automake developers. +# +# * Since we're generating code from .l/.y files in a subdirectory (glcpp/) +# we'd like the resulting generated code to also go in glcpp/ for purposes +# of distribution. Automake gives no way to do this. +# +# * Since we're building multiple yacc parsers into one library (and via one +# Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes +# automake to name the resulting generated code as <library-name>_filename.c. +# Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file. + +# In order to make build output print "LEX" and "YACC", we reproduce the +# automake variables below. + +AM_V_LEX = $(am__v_LEX_$(V)) +am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY)) +am__v_LEX_0 = @echo " LEX " $@; +am__v_LEX_1 = + +AM_V_YACC = $(am__v_YACC_$(V)) +am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY)) +am__v_YACC_0 = @echo " YACC " $@; +am__v_YACC_1 = + +MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) +YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS) +LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS) + +glsl_parser.cpp glsl_parser.h: glsl_parser.yy + $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy + +glsl_lexer.cpp: glsl_lexer.ll + $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll + +glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y + $(MKDIR_GEN) + $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y + +glcpp/glcpp-lex.c: glcpp/glcpp-lex.l + $(MKDIR_GEN) + $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l + +# Only the parsers (specifically the header files generated at the same time) +# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is +# called for the .c/.cpp file and the .h files. By listing the .c/.cpp files +# YACC is only executed once for each parser. The rest of the generated code +# will be created at the appropriate times according to standard automake +# dependency rules. +BUILT_SOURCES = \ + glsl_parser.cpp \ + glsl_lexer.cpp \ + glcpp/glcpp-parse.c \ + glcpp/glcpp-lex.c +CLEANFILES = \ + glcpp/glcpp-parse.h \ + glsl_parser.h \ + $(BUILT_SOURCES) + +clean-local: + $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr + +dist-hook: + $(RM) glcpp/tests/*.out + $(RM) glcpp/tests/subtest*/*.out diff --git a/src/compiler/glsl/Makefile.sources b/src/compiler/glsl/Makefile.sources new file mode 100644 index 0000000..08b40c5 --- /dev/null +++ b/src/compiler/glsl/Makefile.sources @@ -0,0 +1,222 @@ +# shared source lists for Makefile, SConscript, and Android.mk + +# libglcpp + +LIBGLCPP_FILES = \ + glcpp/glcpp.h \ + glcpp/pp.c + +LIBGLCPP_GENERATED_FILES = \ + glcpp/glcpp-lex.c \ + glcpp/glcpp-parse.c + +NIR_GENERATED_FILES = \ + nir/nir_builder_opcodes.h \ + nir/nir_constant_expressions.c \ + nir/nir_opcodes.c \ + nir/nir_opcodes.h \ + nir/nir_opt_algebraic.c + +NIR_FILES = \ + nir/nir.c \ + nir/nir.h \ + nir/nir_array.h \ + nir/nir_builder.h \ + nir/nir_clone.c \ + nir/nir_constant_expressions.h \ + nir/nir_control_flow.c \ + nir/nir_control_flow.h \ + nir/nir_control_flow_private.h \ + nir/nir_dominance.c \ + nir/nir_from_ssa.c \ + nir/nir_gs_count_vertices.c \ + nir/nir_intrinsics.c \ + nir/nir_intrinsics.h \ + nir/nir_instr_set.c \ + nir/nir_instr_set.h \ + nir/nir_liveness.c \ + nir/nir_lower_alu_to_scalar.c \ + nir/nir_lower_atomics.c \ + nir/nir_lower_clip.c \ + nir/nir_lower_global_vars_to_local.c \ + nir/nir_lower_gs_intrinsics.c \ + nir/nir_lower_load_const_to_scalar.c \ + nir/nir_lower_locals_to_regs.c \ + nir/nir_lower_idiv.c \ + nir/nir_lower_io.c \ + nir/nir_lower_outputs_to_temporaries.c \ + nir/nir_lower_phis_to_scalar.c \ + nir/nir_lower_samplers.c \ + nir/nir_lower_system_values.c \ + nir/nir_lower_tex.c \ + nir/nir_lower_to_source_mods.c \ + nir/nir_lower_two_sided_color.c \ + nir/nir_lower_vars_to_ssa.c \ + nir/nir_lower_var_copies.c \ + nir/nir_lower_vec_to_movs.c \ + nir/nir_metadata.c \ + nir/nir_move_vec_src_uses_to_dest.c \ + nir/nir_normalize_cubemap_coords.c \ + nir/nir_opt_constant_folding.c \ + nir/nir_opt_copy_propagate.c \ + nir/nir_opt_cse.c \ + nir/nir_opt_dce.c \ + nir/nir_opt_dead_cf.c \ + nir/nir_opt_gcm.c \ + nir/nir_opt_global_to_local.c \ + nir/nir_opt_peephole_select.c \ + nir/nir_opt_remove_phis.c \ + nir/nir_opt_undef.c \ + nir/nir_print.c \ + nir/nir_remove_dead_variables.c \ + nir/nir_search.c \ + nir/nir_search.h \ + nir/nir_split_var_copies.c \ + nir/nir_sweep.c \ + nir/nir_to_ssa.c \ + nir/nir_validate.c \ + nir/nir_vla.h \ + nir/nir_worklist.c \ + nir/nir_worklist.h + +# libglsl + +LIBGLSL_FILES = \ + ast.h \ + ast_array_index.cpp \ + ast_expr.cpp \ + ast_function.cpp \ + ast_to_hir.cpp \ + ast_type.cpp \ + blob.c \ + blob.h \ + builtin_functions.cpp \ + builtin_types.cpp \ + builtin_variables.cpp \ + glsl_parser_extras.cpp \ + glsl_parser_extras.h \ + glsl_symbol_table.cpp \ + glsl_symbol_table.h \ + hir_field_selection.cpp \ + ir_basic_block.cpp \ + ir_basic_block.h \ + ir_builder.cpp \ + ir_builder.h \ + ir_clone.cpp \ + ir_constant_expression.cpp \ + ir.cpp \ + ir.h \ + ir_equals.cpp \ + ir_expression_flattening.cpp \ + ir_expression_flattening.h \ + ir_function_can_inline.cpp \ + ir_function_detect_recursion.cpp \ + ir_function_inlining.h \ + ir_function.cpp \ + ir_hierarchical_visitor.cpp \ + ir_hierarchical_visitor.h \ + ir_hv_accept.cpp \ + ir_import_prototypes.cpp \ + ir_optimization.h \ + ir_print_visitor.cpp \ + ir_print_visitor.h \ + ir_reader.cpp \ + ir_reader.h \ + ir_rvalue_visitor.cpp \ + ir_rvalue_visitor.h \ + ir_set_program_inouts.cpp \ + ir_uniform.h \ + ir_validate.cpp \ + ir_variable_refcount.cpp \ + ir_variable_refcount.h \ + ir_visitor.h \ + linker.cpp \ + linker.h \ + link_atomics.cpp \ + link_functions.cpp \ + link_interface_blocks.cpp \ + link_uniforms.cpp \ + link_uniform_initializers.cpp \ + link_uniform_block_active_visitor.cpp \ + link_uniform_block_active_visitor.h \ + link_uniform_blocks.cpp \ + link_varyings.cpp \ + link_varyings.h \ + list.h \ + loop_analysis.cpp \ + loop_analysis.h \ + loop_controls.cpp \ + loop_unroll.cpp \ + lower_buffer_access.cpp \ + lower_buffer_access.h \ + lower_clip_distance.cpp \ + lower_const_arrays_to_uniforms.cpp \ + lower_discard.cpp \ + lower_discard_flow.cpp \ + lower_if_to_cond_assign.cpp \ + lower_instructions.cpp \ + lower_jumps.cpp \ + lower_mat_op_to_vec.cpp \ + lower_noise.cpp \ + lower_offset_array.cpp \ + lower_packed_varyings.cpp \ + lower_named_interface_blocks.cpp \ + lower_packing_builtins.cpp \ + lower_subroutine.cpp \ + lower_tess_level.cpp \ + lower_texture_projection.cpp \ + lower_variable_index_to_cond_assign.cpp \ + lower_vec_index_to_cond_assign.cpp \ + lower_vec_index_to_swizzle.cpp \ + lower_vector.cpp \ + lower_vector_derefs.cpp \ + lower_vector_insert.cpp \ + lower_vertex_id.cpp \ + lower_output_reads.cpp \ + lower_shared_reference.cpp \ + lower_ubo_reference.cpp \ + opt_algebraic.cpp \ + opt_array_splitting.cpp \ + opt_conditional_discard.cpp \ + opt_constant_folding.cpp \ + opt_constant_propagation.cpp \ + opt_constant_variable.cpp \ + opt_copy_propagation.cpp \ + opt_copy_propagation_elements.cpp \ + opt_dead_builtin_variables.cpp \ + opt_dead_builtin_varyings.cpp \ + opt_dead_code.cpp \ + opt_dead_code_local.cpp \ + opt_dead_functions.cpp \ + opt_flatten_nested_if_blocks.cpp \ + opt_flip_matrices.cpp \ + opt_function_inlining.cpp \ + opt_if_simplification.cpp \ + opt_minmax.cpp \ + opt_noop_swizzle.cpp \ + opt_rebalance_tree.cpp \ + opt_redundant_jumps.cpp \ + opt_structure_splitting.cpp \ + opt_swizzle_swizzle.cpp \ + opt_tree_grafting.cpp \ + opt_vectorize.cpp \ + program.h \ + s_expression.cpp \ + s_expression.h + +# glsl to nir pass +GLSL_TO_NIR_FILES = \ + nir/glsl_to_nir.cpp \ + nir/glsl_to_nir.h + +# glsl_compiler + +GLSL_COMPILER_CXX_FILES = \ + standalone_scaffolding.cpp \ + standalone_scaffolding.h \ + main.cpp + +# libglsl generated sources +LIBGLSL_GENERATED_CXX_FILES = \ + glsl_lexer.cpp \ + glsl_parser.cpp diff --git a/src/compiler/glsl/README b/src/compiler/glsl/README new file mode 100644 index 0000000..bfcf69f --- /dev/null +++ b/src/compiler/glsl/README @@ -0,0 +1,228 @@ +Welcome to Mesa's GLSL compiler. A brief overview of how things flow: + +1) lex and yacc-based preprocessor takes the incoming shader string +and produces a new string containing the preprocessed shader. This +takes care of things like #if, #ifdef, #define, and preprocessor macro +invocations. Note that #version, #extension, and some others are +passed straight through. See glcpp/* + +2) lex and yacc-based parser takes the preprocessed string and +generates the AST (abstract syntax tree). Almost no checking is +performed in this stage. See glsl_lexer.ll and glsl_parser.yy. + +3) The AST is converted to "HIR". This is the intermediate +representation of the compiler. Constructors are generated, function +calls are resolved to particular function signatures, and all the +semantic checking is performed. See ast_*.cpp for the conversion, and +ir.h for the IR structures. + +4) The driver (Mesa, or main.cpp for the standalone binary) performs +optimizations. These include copy propagation, dead code elimination, +constant folding, and others. Generally the driver will call +optimizations in a loop, as each may open up opportunities for other +optimizations to do additional work. See most files called ir_*.cpp + +5) linking is performed. This does checking to ensure that the +outputs of the vertex shader match the inputs of the fragment shader, +and assigns locations to uniforms, attributes, and varyings. See +linker.cpp. + +6) The driver may perform additional optimization at this point, as +for example dead code elimination previously couldn't remove functions +or global variable usage when we didn't know what other code would be +linked in. + +7) The driver performs code generation out of the IR, taking a linked +shader program and producing a compiled program for each stage. See +../mesa/program/ir_to_mesa.cpp for Mesa IR code generation. + +FAQ: + +Q: What is HIR versus IR versus LIR? + +A: The idea behind the naming was that ast_to_hir would produce a +high-level IR ("HIR"), with things like matrix operations, structure +assignments, etc., present. A series of lowering passes would occur +that do things like break matrix multiplication into a series of dot +products/MADs, make structure assignment be a series of assignment of +components, flatten if statements into conditional moves, and such, +producing a low level IR ("LIR"). + +However, it now appears that each driver will have different +requirements from a LIR. A 915-generation chipset wants all functions +inlined, all loops unrolled, all ifs flattened, no variable array +accesses, and matrix multiplication broken down. The Mesa IR backend +for swrast would like matrices and structure assignment broken down, +but it can support function calls and dynamic branching. A 965 vertex +shader IR backend could potentially even handle some matrix operations +without breaking them down, but the 965 fragment shader IR backend +would want to break to have (almost) all operations down channel-wise +and perform optimization on that. As a result, there's no single +low-level IR that will make everyone happy. So that usage has fallen +out of favor, and each driver will perform a series of lowering passes +to take the HIR down to whatever restrictions it wants to impose +before doing codegen. + +Q: How is the IR structured? + +A: The best way to get started seeing it would be to run the +standalone compiler against a shader: + +./glsl_compiler --dump-lir \ + ~/src/piglit/tests/shaders/glsl-orangebook-ch06-bump.frag + +So for example one of the ir_instructions in main() contains: + +(assign (constant bool (1)) (var_ref litColor) (expression vec3 * (var_ref Surf +aceColor) (var_ref __retval) ) ) + +Or more visually: + (assign) + / | \ + (var_ref) (expression *) (constant bool 1) + / / \ +(litColor) (var_ref) (var_ref) + / \ + (SurfaceColor) (__retval) + +which came from: + +litColor = SurfaceColor * max(dot(normDelta, LightDir), 0.0); + +(the max call is not represented in this expression tree, as it was a +function call that got inlined but not brought into this expression +tree) + +Each of those nodes is a subclass of ir_instruction. A particular +ir_instruction instance may only appear once in the whole IR tree with +the exception of ir_variables, which appear once as variable +declarations: + +(declare () vec3 normDelta) + +and multiple times as the targets of variable dereferences: +... +(assign (constant bool (1)) (var_ref __retval) (expression float dot + (var_ref normDelta) (var_ref LightDir) ) ) +... +(assign (constant bool (1)) (var_ref __retval) (expression vec3 - + (var_ref LightDir) (expression vec3 * (constant float (2.000000)) + (expression vec3 * (expression float dot (var_ref normDelta) (var_ref + LightDir) ) (var_ref normDelta) ) ) ) ) +... + +Each node has a type. Expressions may involve several different types: +(declare (uniform ) mat4 gl_ModelViewMatrix) +((assign (constant bool (1)) (var_ref constructor_tmp) (expression + vec4 * (var_ref gl_ModelViewMatrix) (var_ref gl_Vertex) ) ) + +An expression tree can be arbitrarily deep, and the compiler tries to +keep them structured like that so that things like algebraic +optimizations ((color * 1.0 == color) and ((mat1 * mat2) * vec == mat1 +* (mat2 * vec))) or recognizing operation patterns for code generation +(vec1 * vec2 + vec3 == mad(vec1, vec2, vec3)) are easier. This comes +at the expense of additional trickery in implementing some +optimizations like CSE where one must navigate an expression tree. + +Q: Why no SSA representation? + +A: Converting an IR tree to SSA form makes dead code elimination, +common subexpression elimination, and many other optimizations much +easier. However, in our primarily vector-based language, there's some +major questions as to how it would work. Do we do SSA on the scalar +or vector level? If we do it at the vector level, we're going to end +up with many different versions of the variable when encountering code +like: + +(assign (constant bool (1)) (swiz x (var_ref __retval) ) (var_ref a) ) +(assign (constant bool (1)) (swiz y (var_ref __retval) ) (var_ref b) ) +(assign (constant bool (1)) (swiz z (var_ref __retval) ) (var_ref c) ) + +If every masked update of a component relies on the previous value of +the variable, then we're probably going to be quite limited in our +dead code elimination wins, and recognizing common expressions may +just not happen. On the other hand, if we operate channel-wise, then +we'll be prone to optimizing the operation on one of the channels at +the expense of making its instruction flow different from the other +channels, and a vector-based GPU would end up with worse code than if +we didn't optimize operations on that channel! + +Once again, it appears that our optimization requirements are driven +significantly by the target architecture. For now, targeting the Mesa +IR backend, SSA does not appear to be that important to producing +excellent code, but we do expect to do some SSA-based optimizations +for the 965 fragment shader backend when that is developed. + +Q: How should I expand instructions that take multiple backend instructions? + +Sometimes you'll have to do the expansion in your code generation -- +see, for example, ir_to_mesa.cpp's handling of ir_unop_sqrt. However, +in many cases you'll want to do a pass over the IR to convert +non-native instructions to a series of native instructions. For +example, for the Mesa backend we have ir_div_to_mul_rcp.cpp because +Mesa IR (and many hardware backends) only have a reciprocal +instruction, not a divide. Implementing non-native instructions this +way gives the chance for constant folding to occur, so (a / 2.0) +becomes (a * 0.5) after codegen instead of (a * (1.0 / 2.0)) + +Q: How shoud I handle my special hardware instructions with respect to IR? + +Our current theory is that if multiple targets have an instruction for +some operation, then we should probably be able to represent that in +the IR. Generally this is in the form of an ir_{bin,un}op expression +type. For example, we initially implemented fract() using (a - +floor(a)), but both 945 and 965 have instructions to give that result, +and it would also simplify the implementation of mod(), so +ir_unop_fract was added. The following areas need updating to add a +new expression type: + +ir.h (new enum) +ir.cpp:operator_strs (used for ir_reader) +ir_constant_expression.cpp (you probably want to be able to constant fold) +ir_validate.cpp (check users have the right types) + +You may also need to update the backends if they will see the new expr type: + +../mesa/program/ir_to_mesa.cpp + +You can then use the new expression from builtins (if all backends +would rather see it), or scan the IR and convert to use your new +expression type (see ir_mod_to_floor, for example). + +Q: How is memory management handled in the compiler? + +The hierarchical memory allocator "talloc" developed for the Samba +project is used, so that things like optimization passes don't have to +worry about their garbage collection so much. It has a few nice +features, including low performance overhead and good debugging +support that's trivially available. + +Generally, each stage of the compile creates a talloc context and +allocates its memory out of that or children of it. At the end of the +stage, the pieces still live are stolen to a new context and the old +one freed, or the whole context is kept for use by the next stage. + +For IR transformations, a temporary context is used, then at the end +of all transformations, reparent_ir reparents all live nodes under the +shader's IR list, and the old context full of dead nodes is freed. +When developing a single IR transformation pass, this means that you +want to allocate instruction nodes out of the temporary context, so if +it becomes dead it doesn't live on as the child of a live node. At +the moment, optimization passes aren't passed that temporary context, +so they find it by calling talloc_parent() on a nearby IR node. The +talloc_parent() call is expensive, so many passes will cache the +result of the first talloc_parent(). Cleaning up all the optimization +passes to take a context argument and not call talloc_parent() is left +as an exercise. + +Q: What is the file naming convention in this directory? + +Initially, there really wasn't one. We have since adopted one: + + - Files that implement code lowering passes should be named lower_* + (e.g., lower_noise.cpp). + - Files that implement optimization passes should be named opt_*. + - Files that implement a class that is used throught the code should + take the name of that class (e.g., ir_hierarchical_visitor.cpp). + - Files that contain code not fitting in one of the previous + categories should have a sensible name (e.g., glsl_parser.yy). diff --git a/src/compiler/glsl/SConscript b/src/compiler/glsl/SConscript new file mode 100644 index 0000000..ef82a9d --- /dev/null +++ b/src/compiler/glsl/SConscript @@ -0,0 +1,122 @@ +import common + +Import('*') + +from sys import executable as python_cmd + +env = env.Clone() + +env.MSVC2013Compat() + +env.Prepend(CPPPATH = [ + '#include', + '#src', + '#src/mapi', + '#src/mesa', + '#src/gallium/include', + '#src/gallium/auxiliary', + '#src/glsl', + '#src/glsl/glcpp', +]) + +env.Prepend(LIBS = [mesautil]) + +# Make glcpp-parse.h and glsl_parser.h reachable from the include path. +env.Append(CPPPATH = [Dir('.').abspath, Dir('glcpp').abspath]) + +glcpp_env = env.Clone() +glcpp_env.Append(YACCFLAGS = [ + '-d', + '-p', 'glcpp_parser_' +]) + +glsl_env = env.Clone() +glsl_env.Append(YACCFLAGS = [ + '--defines=%s' % File('glsl_parser.h').abspath, + '-p', '_mesa_glsl_', +]) + +# without this line scons will expect "glsl_parser.hpp" instead of +# "glsl_parser.h", causing glsl_parser.cpp to be regenerated every time +glsl_env['YACCHXXFILESUFFIX'] = '.h' + +glcpp_lexer = glcpp_env.CFile('glcpp/glcpp-lex.c', 'glcpp/glcpp-lex.l') +glcpp_parser = glcpp_env.CFile('glcpp/glcpp-parse.c', 'glcpp/glcpp-parse.y') +glsl_lexer = glsl_env.CXXFile('glsl_lexer.cpp', 'glsl_lexer.ll') +glsl_parser = glsl_env.CXXFile('glsl_parser.cpp', 'glsl_parser.yy') + +# common generated sources +glsl_sources = [ + glcpp_lexer, + glcpp_parser[0], + glsl_lexer, + glsl_parser[0], +] + +# parse Makefile.sources +source_lists = env.ParseSourceList('Makefile.sources') + +# add non-generated sources +for l in ('LIBGLCPP_FILES', 'LIBGLSL_FILES'): + glsl_sources += source_lists[l] + +if env['msvc']: + env.Prepend(CPPPATH = ['#/src/getopt']) + env.PrependUnique(LIBS = [getopt]) + +# Copy these files to avoid generation object files into src/mesa/program +env.Prepend(CPPPATH = ['#src/mesa/main']) +env.Command('imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', '$SOURCE')) +# Copy these files to avoid generation object files into src/mesa/program +env.Prepend(CPPPATH = ['#src/mesa/program']) +env.Command('prog_hash_table.c', '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE')) +env.Command('symbol_table.c', '#src/mesa/program/symbol_table.c', Copy('$TARGET', '$SOURCE')) +env.Command('dummy_errors.c', '#src/mesa/program/dummy_errors.c', Copy('$TARGET', '$SOURCE')) + +compiler_objs = env.StaticObject(source_lists['GLSL_COMPILER_CXX_FILES']) + +mesa_objs = env.StaticObject([ + 'imports.c', + 'prog_hash_table.c', + 'symbol_table.c', + 'dummy_errors.c', +]) + +compiler_objs += mesa_objs + +glsl = env.ConvenienceLibrary( + target = 'glsl', + source = glsl_sources, +) + +# SCons builtin dependency scanner doesn't detect that glsl_lexer.ll depends on +# glsl_parser.h +env.Depends(glsl, glsl_parser) + +Export('glsl') + +# Skip building these programs as they will cause SCons error "Two environments +# with different actions were specified for the same target" +if env['crosscompile'] or env['embedded']: + Return() + +env = env.Clone() + +if env['platform'] == 'windows': + env.PrependUnique(LIBS = [ + 'user32', + ]) + +env.Prepend(LIBS = [compiler, glsl]) + +glsl_compiler = env.Program( + target = 'glsl_compiler', + source = compiler_objs, +) +env.Alias('glsl_compiler', glsl_compiler) + +glcpp = env.Program( + target = 'glcpp/glcpp', + source = ['glcpp/glcpp.c'] + mesa_objs, +) +env.Alias('glcpp', glcpp) diff --git a/src/compiler/glsl/TODO b/src/compiler/glsl/TODO new file mode 100644 index 0000000..bd077a8 --- /dev/null +++ b/src/compiler/glsl/TODO @@ -0,0 +1,12 @@ +- Detect code paths in non-void functions that don't reach a return statement + +- Improve handling of constants and their initializers. Constant initializers + should never generate any code. This is trival for scalar constants. It is + also trivial for arrays, matrices, and vectors that are accessed with + constant index values. For others it is more complicated. Perhaps these + cases should be silently converted to uniforms? + +- Track source locations throughout the IR. There are currently several + places where we cannot emit line numbers for errors (and currently emit 0:0) + because we've "lost" the line number information. This is particularly + noticeable at link time. diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h new file mode 100644 index 0000000..03df6c0 --- /dev/null +++ b/src/compiler/glsl/ast.h @@ -0,0 +1,1204 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef AST_H +#define AST_H + +#include "list.h" +#include "glsl_parser_extras.h" + +struct _mesa_glsl_parse_state; + +struct YYLTYPE; + +/** + * \defgroup AST Abstract syntax tree node definitions + * + * An abstract syntax tree is generated by the parser. This is a fairly + * direct representation of the gramma derivation for the source program. + * No symantic checking is done during the generation of the AST. Only + * syntactic checking is done. Symantic checking is performed by a later + * stage that converts the AST to a more generic intermediate representation. + * + *@{ + */ +/** + * Base class of all abstract syntax tree nodes + */ +class ast_node { +public: + DECLARE_RALLOC_CXX_OPERATORS(ast_node); + + /** + * Print an AST node in something approximating the original GLSL code + */ + virtual void print(void) const; + + /** + * Convert the AST node to the high-level intermediate representation + */ + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual bool has_sequence_subexpression() const; + + /** + * Retrieve the source location of an AST node + * + * This function is primarily used to get the source position of an AST node + * into a form that can be passed to \c _mesa_glsl_error. + * + * \sa _mesa_glsl_error, ast_node::set_location + */ + struct YYLTYPE get_location(void) const + { + struct YYLTYPE locp; + + locp.source = this->location.source; + locp.first_line = this->location.first_line; + locp.first_column = this->location.first_column; + locp.last_line = this->location.last_line; + locp.last_column = this->location.last_column; + + return locp; + } + + /** + * Set the source location of an AST node from a parser location + * + * \sa ast_node::get_location + */ + void set_location(const struct YYLTYPE &locp) + { + this->location.source = locp.source; + this->location.first_line = locp.first_line; + this->location.first_column = locp.first_column; + this->location.last_line = locp.last_line; + this->location.last_column = locp.last_column; + } + + /** + * Set the source location range of an AST node using two location nodes + * + * \sa ast_node::set_location + */ + void set_location_range(const struct YYLTYPE &begin, const struct YYLTYPE &end) + { + this->location.source = begin.source; + this->location.first_line = begin.first_line; + this->location.last_line = end.last_line; + this->location.first_column = begin.first_column; + this->location.last_column = end.last_column; + } + + /** + * Source location of the AST node. + */ + struct { + unsigned source; /**< GLSL source number. */ + unsigned first_line; /**< First line number within the source string. */ + unsigned first_column; /**< First column in the first line. */ + unsigned last_line; /**< Last line number within the source string. */ + unsigned last_column; /**< Last column in the last line. */ + } location; + + exec_node link; + +protected: + /** + * The only constructor is protected so that only derived class objects can + * be created. + */ + ast_node(void); +}; + + +/** + * Operators for AST expression nodes. + */ +enum ast_operators { + ast_assign, + ast_plus, /**< Unary + operator. */ + ast_neg, + ast_add, + ast_sub, + ast_mul, + ast_div, + ast_mod, + ast_lshift, + ast_rshift, + ast_less, + ast_greater, + ast_lequal, + ast_gequal, + ast_equal, + ast_nequal, + ast_bit_and, + ast_bit_xor, + ast_bit_or, + ast_bit_not, + ast_logic_and, + ast_logic_xor, + ast_logic_or, + ast_logic_not, + + ast_mul_assign, + ast_div_assign, + ast_mod_assign, + ast_add_assign, + ast_sub_assign, + ast_ls_assign, + ast_rs_assign, + ast_and_assign, + ast_xor_assign, + ast_or_assign, + + ast_conditional, + + ast_pre_inc, + ast_pre_dec, + ast_post_inc, + ast_post_dec, + ast_field_selection, + ast_array_index, + ast_unsized_array_dim, + + ast_function_call, + + ast_identifier, + ast_int_constant, + ast_uint_constant, + ast_float_constant, + ast_bool_constant, + ast_double_constant, + + ast_sequence, + ast_aggregate +}; + +/** + * Representation of any sort of expression. + */ +class ast_expression : public ast_node { +public: + ast_expression(int oper, ast_expression *, + ast_expression *, ast_expression *); + + ast_expression(const char *identifier) : + oper(ast_identifier) + { + subexpressions[0] = NULL; + subexpressions[1] = NULL; + subexpressions[2] = NULL; + primary_expression.identifier = identifier; + this->non_lvalue_description = NULL; + } + + static const char *operator_string(enum ast_operators op); + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual void hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual bool has_sequence_subexpression() const; + + ir_rvalue *do_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + bool needs_rvalue); + + virtual void print(void) const; + + enum ast_operators oper; + + ast_expression *subexpressions[3]; + + union { + const char *identifier; + int int_constant; + float float_constant; + unsigned uint_constant; + int bool_constant; + double double_constant; + } primary_expression; + + + /** + * List of expressions for an \c ast_sequence or parameters for an + * \c ast_function_call + */ + exec_list expressions; + + /** + * For things that can't be l-values, this describes what it is. + * + * This text is used by the code that generates IR for assignments to + * detect and emit useful messages for assignments to some things that + * can't be l-values. For example, pre- or post-incerement expressions. + * + * \note + * This pointer may be \c NULL. + */ + const char *non_lvalue_description; +}; + +class ast_expression_bin : public ast_expression { +public: + ast_expression_bin(int oper, ast_expression *, ast_expression *); + + virtual void print(void) const; +}; + +/** + * Subclass of expressions for function calls + */ +class ast_function_expression : public ast_expression { +public: + ast_function_expression(ast_expression *callee) + : ast_expression(ast_function_call, callee, + NULL, NULL), + cons(false) + { + /* empty */ + } + + ast_function_expression(class ast_type_specifier *type) + : ast_expression(ast_function_call, (ast_expression *) type, + NULL, NULL), + cons(true) + { + /* empty */ + } + + bool is_constructor() const + { + return cons; + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual void hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual bool has_sequence_subexpression() const; + +private: + /** + * Is this function call actually a constructor? + */ + bool cons; + ir_rvalue * + handle_method(exec_list *instructions, + struct _mesa_glsl_parse_state *state); +}; + +class ast_subroutine_list : public ast_node +{ +public: + virtual void print(void) const; + exec_list declarations; +}; + +class ast_array_specifier : public ast_node { +public: + ast_array_specifier(const struct YYLTYPE &locp, ast_expression *dim) + { + set_location(locp); + array_dimensions.push_tail(&dim->link); + } + + void add_dimension(ast_expression *dim) + { + array_dimensions.push_tail(&dim->link); + } + + bool is_single_dimension() const + { + return this->array_dimensions.tail_pred->prev != NULL && + this->array_dimensions.tail_pred->prev->is_head_sentinel(); + } + + virtual void print(void) const; + + /* This list contains objects of type ast_node containing the + * array dimensions in outermost-to-innermost order. + */ + exec_list array_dimensions; +}; + +class ast_layout_expression : public ast_node { +public: + ast_layout_expression(const struct YYLTYPE &locp, ast_expression *expr) + { + set_location(locp); + layout_const_expressions.push_tail(&expr->link); + } + + bool process_qualifier_constant(struct _mesa_glsl_parse_state *state, + const char *qual_indentifier, + unsigned *value, bool can_be_zero); + + void merge_qualifier(ast_layout_expression *l_expr) + { + layout_const_expressions.append_list(&l_expr->layout_const_expressions); + } + + exec_list layout_const_expressions; +}; + +/** + * C-style aggregate initialization class + * + * Represents C-style initializers of vectors, matrices, arrays, and + * structures. E.g., vec3 pos = {1.0, 0.0, -1.0} is equivalent to + * vec3 pos = vec3(1.0, 0.0, -1.0). + * + * Specified in GLSL 4.20 and GL_ARB_shading_language_420pack. + * + * \sa _mesa_ast_set_aggregate_type + */ +class ast_aggregate_initializer : public ast_expression { +public: + ast_aggregate_initializer() + : ast_expression(ast_aggregate, NULL, NULL, NULL), + constructor_type(NULL) + { + /* empty */ + } + + /** + * glsl_type of the aggregate, which is inferred from the LHS of whatever + * the aggregate is being used to initialize. This can't be inferred at + * parse time (since the parser deals with ast_type_specifiers, not + * glsl_types), so the parser leaves it NULL. However, the ast-to-hir + * conversion code makes sure to fill it in with the appropriate type + * before hir() is called. + */ + const glsl_type *constructor_type; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual void hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state); +}; + +/** + * Number of possible operators for an ast_expression + * + * This is done as a define instead of as an additional value in the enum so + * that the compiler won't generate spurious messages like "warning: + * enumeration value ‘ast_num_operators’ not handled in switch" + */ +#define AST_NUM_OPERATORS (ast_sequence + 1) + + +class ast_compound_statement : public ast_node { +public: + ast_compound_statement(int new_scope, ast_node *statements); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + int new_scope; + exec_list statements; +}; + +class ast_declaration : public ast_node { +public: + ast_declaration(const char *identifier, + ast_array_specifier *array_specifier, + ast_expression *initializer); + virtual void print(void) const; + + const char *identifier; + + ast_array_specifier *array_specifier; + + ast_expression *initializer; +}; + + +enum { + ast_precision_none = 0, /**< Absence of precision qualifier. */ + ast_precision_high, + ast_precision_medium, + ast_precision_low +}; + +struct ast_type_qualifier { + DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier); + + union { + struct { + unsigned invariant:1; + unsigned precise:1; + unsigned constant:1; + unsigned attribute:1; + unsigned varying:1; + unsigned in:1; + unsigned out:1; + unsigned centroid:1; + unsigned sample:1; + unsigned patch:1; + unsigned uniform:1; + unsigned buffer:1; + unsigned shared_storage:1; + unsigned smooth:1; + unsigned flat:1; + unsigned noperspective:1; + + /** \name Layout qualifiers for GL_ARB_fragment_coord_conventions */ + /*@{*/ + unsigned origin_upper_left:1; + unsigned pixel_center_integer:1; + /*@}*/ + + /** + * Flag set if GL_ARB_explicit_attrib_location "location" layout + * qualifier is used. + */ + unsigned explicit_location:1; + /** + * Flag set if GL_ARB_explicit_attrib_location "index" layout + * qualifier is used. + */ + unsigned explicit_index:1; + + /** + * Flag set if GL_ARB_shading_language_420pack "binding" layout + * qualifier is used. + */ + unsigned explicit_binding:1; + + /** + * Flag set if GL_ARB_shader_atomic counter "offset" layout + * qualifier is used. + */ + unsigned explicit_offset:1; + + /** \name Layout qualifiers for GL_AMD_conservative_depth */ + /** \{ */ + unsigned depth_any:1; + unsigned depth_greater:1; + unsigned depth_less:1; + unsigned depth_unchanged:1; + /** \} */ + + /** \name Layout qualifiers for GL_ARB_uniform_buffer_object */ + /** \{ */ + unsigned std140:1; + unsigned std430:1; + unsigned shared:1; + unsigned packed:1; + unsigned column_major:1; + unsigned row_major:1; + /** \} */ + + /** \name Layout qualifiers for GLSL 1.50 geometry shaders */ + /** \{ */ + unsigned prim_type:1; + unsigned max_vertices:1; + /** \} */ + + /** + * local_size_{x,y,z} flags for compute shaders. Bit 0 represents + * local_size_x, and so on. + */ + unsigned local_size:3; + + /** \name Layout and memory qualifiers for ARB_shader_image_load_store. */ + /** \{ */ + unsigned early_fragment_tests:1; + unsigned explicit_image_format:1; + unsigned coherent:1; + unsigned _volatile:1; + unsigned restrict_flag:1; + unsigned read_only:1; /**< "readonly" qualifier. */ + unsigned write_only:1; /**< "writeonly" qualifier. */ + /** \} */ + + /** \name Layout qualifiers for GL_ARB_gpu_shader5 */ + /** \{ */ + unsigned invocations:1; + unsigned stream:1; /**< Has stream value assigned */ + unsigned explicit_stream:1; /**< stream value assigned explicitly by shader code */ + /** \} */ + + /** \name Layout qualifiers for GL_ARB_tessellation_shader */ + /** \{ */ + /* tess eval input layout */ + /* gs prim_type reused for primitive mode */ + unsigned vertex_spacing:1; + unsigned ordering:1; + unsigned point_mode:1; + /* tess control output layout */ + unsigned vertices:1; + /** \} */ + + /** \name Qualifiers for GL_ARB_shader_subroutine */ + /** \{ */ + unsigned subroutine:1; /**< Is this marked 'subroutine' */ + unsigned subroutine_def:1; /**< Is this marked 'subroutine' with a list of types */ + /** \} */ + } + /** \brief Set of flags, accessed by name. */ + q; + + /** \brief Set of flags, accessed as a bitmask. */ + uint64_t i; + } flags; + + /** Precision of the type (highp/medium/lowp). */ + unsigned precision:2; + + /** Geometry shader invocations for GL_ARB_gpu_shader5. */ + ast_layout_expression *invocations; + + /** + * Location specified via GL_ARB_explicit_attrib_location layout + * + * \note + * This field is only valid if \c explicit_location is set. + */ + ast_expression *location; + /** + * Index specified via GL_ARB_explicit_attrib_location layout + * + * \note + * This field is only valid if \c explicit_index is set. + */ + ast_expression *index; + + /** Maximum output vertices in GLSL 1.50 geometry shaders. */ + ast_layout_expression *max_vertices; + + /** Stream in GLSL 1.50 geometry shaders. */ + ast_expression *stream; + + /** + * Input or output primitive type in GLSL 1.50 geometry shaders + * and tessellation shaders. + */ + GLenum prim_type; + + /** + * Binding specified via GL_ARB_shading_language_420pack's "binding" keyword. + * + * \note + * This field is only valid if \c explicit_binding is set. + */ + ast_expression *binding; + + /** + * Offset specified via GL_ARB_shader_atomic_counter's "offset" + * keyword. + * + * \note + * This field is only valid if \c explicit_offset is set. + */ + ast_expression *offset; + + /** + * Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}" + * layout qualifier. Element i of this array is only valid if + * flags.q.local_size & (1 << i) is set. + */ + ast_layout_expression *local_size[3]; + + /** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */ + GLenum vertex_spacing; + + /** Tessellation evaluation shader: vertex ordering (CW or CCW) */ + GLenum ordering; + + /** Tessellation evaluation shader: point mode */ + bool point_mode; + + /** Tessellation control shader: number of output vertices */ + ast_layout_expression *vertices; + + /** + * Image format specified with an ARB_shader_image_load_store + * layout qualifier. + * + * \note + * This field is only valid if \c explicit_image_format is set. + */ + GLenum image_format; + + /** + * Base type of the data read from or written to this image. Only + * the following enumerants are allowed: GLSL_TYPE_UINT, + * GLSL_TYPE_INT, GLSL_TYPE_FLOAT. + * + * \note + * This field is only valid if \c explicit_image_format is set. + */ + glsl_base_type image_base_type; + + /** Flag to know if this represents a default value for a qualifier */ + bool is_default_qualifier; + + /** + * Return true if and only if an interpolation qualifier is present. + */ + bool has_interpolation() const; + + /** + * Return whether a layout qualifier is present. + */ + bool has_layout() const; + + /** + * Return whether a storage qualifier is present. + */ + bool has_storage() const; + + /** + * Return whether an auxiliary storage qualifier is present. + */ + bool has_auxiliary_storage() const; + + /** + * \brief Return string representation of interpolation qualifier. + * + * If an interpolation qualifier is present, then return that qualifier's + * string representation. Otherwise, return null. For example, if the + * noperspective bit is set, then this returns "noperspective". + * + * If multiple interpolation qualifiers are somehow present, then the + * returned string is undefined but not null. + */ + const char *interpolation_string() const; + + bool merge_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + bool is_single_layout_merge); + + bool merge_out_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + ast_node* &node, bool create_node); + + bool merge_in_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + ast_node* &node, bool create_node); + + ast_subroutine_list *subroutine_list; +}; + +class ast_declarator_list; + +class ast_struct_specifier : public ast_node { +public: + ast_struct_specifier(const char *identifier, + ast_declarator_list *declarator_list); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + const char *name; + ast_type_qualifier *layout; + /* List of ast_declarator_list * */ + exec_list declarations; + bool is_declaration; +}; + + + +class ast_type_specifier : public ast_node { +public: + /** Construct a type specifier from a type name */ + ast_type_specifier(const char *name) + : type_name(name), structure(NULL), array_specifier(NULL), + default_precision(ast_precision_none) + { + /* empty */ + } + + /** Construct a type specifier from a structure definition */ + ast_type_specifier(ast_struct_specifier *s) + : type_name(s->name), structure(s), array_specifier(NULL), + default_precision(ast_precision_none) + { + /* empty */ + } + + const struct glsl_type *glsl_type(const char **name, + struct _mesa_glsl_parse_state *state) + const; + + virtual void print(void) const; + + ir_rvalue *hir(exec_list *, struct _mesa_glsl_parse_state *); + + const char *type_name; + ast_struct_specifier *structure; + + ast_array_specifier *array_specifier; + + /** For precision statements, this is the given precision; otherwise none. */ + unsigned default_precision:2; +}; + + +class ast_fully_specified_type : public ast_node { +public: + virtual void print(void) const; + bool has_qualifiers(_mesa_glsl_parse_state *state) const; + + ast_fully_specified_type() : qualifier(), specifier(NULL) + { + } + + const struct glsl_type *glsl_type(const char **name, + struct _mesa_glsl_parse_state *state) + const; + + ast_type_qualifier qualifier; + ast_type_specifier *specifier; +}; + + +class ast_declarator_list : public ast_node { +public: + ast_declarator_list(ast_fully_specified_type *); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_fully_specified_type *type; + /** List of 'ast_declaration *' */ + exec_list declarations; + + /** + * Flags for redeclarations. In these cases, no type is specified, to + * `type` is allowed to be NULL. In all other cases, this would be an error. + */ + int invariant; /** < `invariant` redeclaration */ + int precise; /** < `precise` redeclaration */ +}; + + +class ast_parameter_declarator : public ast_node { +public: + ast_parameter_declarator() : + type(NULL), + identifier(NULL), + array_specifier(NULL), + formal_parameter(false), + is_void(false) + { + /* empty */ + } + + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_fully_specified_type *type; + const char *identifier; + ast_array_specifier *array_specifier; + + static void parameters_to_hir(exec_list *ast_parameters, + bool formal, exec_list *ir_parameters, + struct _mesa_glsl_parse_state *state); + +private: + /** Is this parameter declaration part of a formal parameter list? */ + bool formal_parameter; + + /** + * Is this parameter 'void' type? + * + * This field is set by \c ::hir. + */ + bool is_void; +}; + + +class ast_function : public ast_node { +public: + ast_function(void); + + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_fully_specified_type *return_type; + const char *identifier; + + exec_list parameters; + +private: + /** + * Is this prototype part of the function definition? + * + * Used by ast_function_definition::hir to process the parameters, etc. + * of the function. + * + * \sa ::hir + */ + bool is_definition; + + /** + * Function signature corresponding to this function prototype instance + * + * Used by ast_function_definition::hir to process the parameters, etc. + * of the function. + * + * \sa ::hir + */ + class ir_function_signature *signature; + + friend class ast_function_definition; +}; + + +class ast_expression_statement : public ast_node { +public: + ast_expression_statement(ast_expression *); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_expression *expression; +}; + + +class ast_case_label : public ast_node { +public: + ast_case_label(ast_expression *test_value); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + /** + * An test value of NULL means 'default'. + */ + ast_expression *test_value; +}; + + +class ast_case_label_list : public ast_node { +public: + ast_case_label_list(void); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + /** + * A list of case labels. + */ + exec_list labels; +}; + + +class ast_case_statement : public ast_node { +public: + ast_case_statement(ast_case_label_list *labels); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_case_label_list *labels; + + /** + * A list of statements. + */ + exec_list stmts; +}; + + +class ast_case_statement_list : public ast_node { +public: + ast_case_statement_list(void); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + /** + * A list of cases. + */ + exec_list cases; +}; + + +class ast_switch_body : public ast_node { +public: + ast_switch_body(ast_case_statement_list *stmts); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_case_statement_list *stmts; +}; + + +class ast_selection_statement : public ast_node { +public: + ast_selection_statement(ast_expression *condition, + ast_node *then_statement, + ast_node *else_statement); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_expression *condition; + ast_node *then_statement; + ast_node *else_statement; +}; + + +class ast_switch_statement : public ast_node { +public: + ast_switch_statement(ast_expression *test_expression, + ast_node *body); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_expression *test_expression; + ast_node *body; + +protected: + void test_to_hir(exec_list *, struct _mesa_glsl_parse_state *); +}; + +class ast_iteration_statement : public ast_node { +public: + ast_iteration_statement(int mode, ast_node *init, ast_node *condition, + ast_expression *rest_expression, ast_node *body); + + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *, struct _mesa_glsl_parse_state *); + + enum ast_iteration_modes { + ast_for, + ast_while, + ast_do_while + } mode; + + + ast_node *init_statement; + ast_node *condition; + ast_expression *rest_expression; + + ast_node *body; + + /** + * Generate IR from the condition of a loop + * + * This is factored out of ::hir because some loops have the condition + * test at the top (for and while), and others have it at the end (do-while). + */ + void condition_to_hir(exec_list *, struct _mesa_glsl_parse_state *); +}; + + +class ast_jump_statement : public ast_node { +public: + ast_jump_statement(int mode, ast_expression *return_value); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + enum ast_jump_modes { + ast_continue, + ast_break, + ast_return, + ast_discard + } mode; + + ast_expression *opt_return_value; +}; + + +class ast_function_definition : public ast_node { +public: + ast_function_definition() : prototype(NULL), body(NULL) + { + } + + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_function *prototype; + ast_compound_statement *body; +}; + +class ast_interface_block : public ast_node { +public: + ast_interface_block(ast_type_qualifier layout, + const char *instance_name, + ast_array_specifier *array_specifier) + : layout(layout), block_name(NULL), instance_name(instance_name), + array_specifier(array_specifier) + { + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_type_qualifier layout; + const char *block_name; + + /** + * Declared name of the block instance, if specified. + * + * If the block does not have an instance name, this field will be + * \c NULL. + */ + const char *instance_name; + + /** List of ast_declarator_list * */ + exec_list declarations; + + /** + * Declared array size of the block instance + * + * If the block is not declared as an array or if the block instance array + * is unsized, this field will be \c NULL. + */ + ast_array_specifier *array_specifier; +}; + + +/** + * AST node representing a declaration of the output layout for tessellation + * control shaders. + */ +class ast_tcs_output_layout : public ast_node +{ +public: + ast_tcs_output_layout(const struct YYLTYPE &locp) + { + set_location(locp); + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); +}; + + +/** + * AST node representing a declaration of the input layout for geometry + * shaders. + */ +class ast_gs_input_layout : public ast_node +{ +public: + ast_gs_input_layout(const struct YYLTYPE &locp, GLenum prim_type) + : prim_type(prim_type) + { + set_location(locp); + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +private: + const GLenum prim_type; +}; + + +/** + * AST node representing a decalaration of the input layout for compute + * shaders. + */ +class ast_cs_input_layout : public ast_node +{ +public: + ast_cs_input_layout(const struct YYLTYPE &locp, + ast_layout_expression *const *local_size) + { + for (int i = 0; i < 3; i++) { + this->local_size[i] = local_size[i]; + } + set_location(locp); + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +private: + ast_layout_expression *local_size[3]; +}; + +/*@}*/ + +extern void +_mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state); + +extern ir_rvalue * +_mesa_ast_field_selection_to_hir(const ast_expression *expr, + exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +extern ir_rvalue * +_mesa_ast_array_index_to_hir(void *mem_ctx, + struct _mesa_glsl_parse_state *state, + ir_rvalue *array, ir_rvalue *idx, + YYLTYPE &loc, YYLTYPE &idx_loc); + +extern void +_mesa_ast_set_aggregate_type(const glsl_type *type, + ast_expression *expr); + +void +emit_function(_mesa_glsl_parse_state *state, ir_function *f); + +extern void +check_builtin_array_max_size(const char *name, unsigned size, + YYLTYPE loc, struct _mesa_glsl_parse_state *state); + +extern void _mesa_ast_process_interface_block(YYLTYPE *locp, + _mesa_glsl_parse_state *state, + ast_interface_block *const block, + const struct ast_type_qualifier &q); + +#endif /* AST_H */ diff --git a/src/compiler/glsl/ast_array_index.cpp b/src/compiler/glsl/ast_array_index.cpp new file mode 100644 index 0000000..f5baeb9 --- /dev/null +++ b/src/compiler/glsl/ast_array_index.cpp @@ -0,0 +1,333 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ast.h" +#include "compiler/glsl_types.h" +#include "ir.h" + +void +ast_array_specifier::print(void) const +{ + foreach_list_typed (ast_node, array_dimension, link, &this->array_dimensions) { + printf("[ "); + if (((ast_expression*)array_dimension)->oper != ast_unsized_array_dim) + array_dimension->print(); + printf("] "); + } +} + +/** + * If \c ir is a reference to an array for which we are tracking the max array + * element accessed, track that the given element has been accessed. + * Otherwise do nothing. + * + * This function also checks whether the array is a built-in array whose + * maximum size is too small to accommodate the given index, and if so uses + * loc and state to report the error. + */ +static void +update_max_array_access(ir_rvalue *ir, int idx, YYLTYPE *loc, + struct _mesa_glsl_parse_state *state) +{ + if (ir_dereference_variable *deref_var = ir->as_dereference_variable()) { + ir_variable *var = deref_var->var; + if (idx > (int)var->data.max_array_access) { + var->data.max_array_access = idx; + + /* Check whether this access will, as a side effect, implicitly cause + * the size of a built-in array to be too large. + */ + check_builtin_array_max_size(var->name, idx+1, *loc, state); + } + } else if (ir_dereference_record *deref_record = + ir->as_dereference_record()) { + /* There are three possibilities we need to consider: + * + * - Accessing an element of an array that is a member of a named + * interface block (e.g. ifc.foo[i]) + * + * - Accessing an element of an array that is a member of a named + * interface block array (e.g. ifc[j].foo[i]). + * + * - Accessing an element of an array that is a member of a named + * interface block array of arrays (e.g. ifc[j][k].foo[i]). + */ + ir_dereference_variable *deref_var = + deref_record->record->as_dereference_variable(); + if (deref_var == NULL) { + ir_dereference_array *deref_array = + deref_record->record->as_dereference_array(); + ir_dereference_array *deref_array_prev = NULL; + while (deref_array != NULL) { + deref_array_prev = deref_array; + deref_array = deref_array->array->as_dereference_array(); + } + if (deref_array_prev != NULL) + deref_var = deref_array_prev->array->as_dereference_variable(); + } + + if (deref_var != NULL) { + if (deref_var->var->is_interface_instance()) { + unsigned field_index = + deref_record->record->type->field_index(deref_record->field); + assert(field_index < deref_var->var->get_interface_type()->length); + + unsigned *const max_ifc_array_access = + deref_var->var->get_max_ifc_array_access(); + + assert(max_ifc_array_access != NULL); + + if (idx > (int)max_ifc_array_access[field_index]) { + max_ifc_array_access[field_index] = idx; + + /* Check whether this access will, as a side effect, implicitly + * cause the size of a built-in array to be too large. + */ + check_builtin_array_max_size(deref_record->field, idx+1, *loc, + state); + } + } + } + } +} + + +static int +get_implicit_array_size(struct _mesa_glsl_parse_state *state, + ir_rvalue *array) +{ + ir_variable *var = array->variable_referenced(); + + /* Inputs in control shader are implicitly sized + * to the maximum patch size. + */ + if (state->stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_in) { + return state->Const.MaxPatchVertices; + } + + /* Non-patch inputs in evaluation shader are implicitly sized + * to the maximum patch size. + */ + if (state->stage == MESA_SHADER_TESS_EVAL && + var->data.mode == ir_var_shader_in && + !var->data.patch) { + return state->Const.MaxPatchVertices; + } + + return 0; +} + + +ir_rvalue * +_mesa_ast_array_index_to_hir(void *mem_ctx, + struct _mesa_glsl_parse_state *state, + ir_rvalue *array, ir_rvalue *idx, + YYLTYPE &loc, YYLTYPE &idx_loc) +{ + if (!array->type->is_error() + && !array->type->is_array() + && !array->type->is_matrix() + && !array->type->is_vector()) { + _mesa_glsl_error(& idx_loc, state, + "cannot dereference non-array / non-matrix / " + "non-vector"); + } + + if (!idx->type->is_error()) { + if (!idx->type->is_integer()) { + _mesa_glsl_error(& idx_loc, state, "array index must be integer type"); + } else if (!idx->type->is_scalar()) { + _mesa_glsl_error(& idx_loc, state, "array index must be scalar"); + } + } + + /* If the array index is a constant expression and the array has a + * declared size, ensure that the access is in-bounds. If the array + * index is not a constant expression, ensure that the array has a + * declared size. + */ + ir_constant *const const_index = idx->constant_expression_value(); + if (const_index != NULL && idx->type->is_integer()) { + const int idx = const_index->value.i[0]; + const char *type_name = "error"; + unsigned bound = 0; + + /* From page 24 (page 30 of the PDF) of the GLSL 1.50 spec: + * + * "It is illegal to declare an array with a size, and then + * later (in the same shader) index the same array with an + * integral constant expression greater than or equal to the + * declared size. It is also illegal to index an array with a + * negative constant expression." + */ + if (array->type->is_matrix()) { + if (array->type->row_type()->vector_elements <= idx) { + type_name = "matrix"; + bound = array->type->row_type()->vector_elements; + } + } else if (array->type->is_vector()) { + if (array->type->vector_elements <= idx) { + type_name = "vector"; + bound = array->type->vector_elements; + } + } else { + /* glsl_type::array_size() returns -1 for non-array types. This means + * that we don't need to verify that the type is an array before + * doing the bounds checking. + */ + if ((array->type->array_size() > 0) + && (array->type->array_size() <= idx)) { + type_name = "array"; + bound = array->type->array_size(); + } + } + + if (bound > 0) { + _mesa_glsl_error(& loc, state, "%s index must be < %u", + type_name, bound); + } else if (idx < 0) { + _mesa_glsl_error(& loc, state, "%s index must be >= 0", + type_name); + } + + if (array->type->is_array()) + update_max_array_access(array, idx, &loc, state); + } else if (const_index == NULL && array->type->is_array()) { + if (array->type->is_unsized_array()) { + int implicit_size = get_implicit_array_size(state, array); + if (implicit_size) { + ir_variable *v = array->whole_variable_referenced(); + if (v != NULL) + v->data.max_array_access = implicit_size - 1; + } + else if (state->stage == MESA_SHADER_TESS_CTRL && + array->variable_referenced()->data.mode == ir_var_shader_out && + !array->variable_referenced()->data.patch) { + /* Tessellation control shader output non-patch arrays are + * initially unsized. Despite that, they are allowed to be + * indexed with a non-constant expression (typically + * "gl_InvocationID"). The array size will be determined + * by the linker. + */ + } + else if (array->variable_referenced()->data.mode != + ir_var_shader_storage) { + _mesa_glsl_error(&loc, state, "unsized array index must be constant"); + } + } else if (array->type->without_array()->is_interface() + && (array->variable_referenced()->data.mode == ir_var_uniform || + array->variable_referenced()->data.mode == ir_var_shader_storage) + && !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { + /* Page 50 in section 4.3.9 of the OpenGL ES 3.10 spec says: + * + * "All indices used to index a uniform or shader storage block + * array must be constant integral expressions." + */ + _mesa_glsl_error(&loc, state, "%s block array index must be constant", + array->variable_referenced()->data.mode + == ir_var_uniform ? "uniform" : "shader storage"); + } else { + /* whole_variable_referenced can return NULL if the array is a + * member of a structure. In this case it is safe to not update + * the max_array_access field because it is never used for fields + * of structures. + */ + ir_variable *v = array->whole_variable_referenced(); + if (v != NULL) + v->data.max_array_access = array->type->array_size() - 1; + } + + /* From page 23 (29 of the PDF) of the GLSL 1.30 spec: + * + * "Samplers aggregated into arrays within a shader (using square + * brackets [ ]) can only be indexed with integral constant + * expressions [...]." + * + * This restriction was added in GLSL 1.30. Shaders using earlier + * version of the language should not be rejected by the compiler + * front-end for using this construct. This allows useful things such + * as using a loop counter as the index to an array of samplers. If the + * loop in unrolled, the code should compile correctly. Instead, emit a + * warning. + * + * In GLSL 4.00 / ARB_gpu_shader5, this requirement is relaxed again to allow + * indexing with dynamically uniform expressions. Note that these are not + * required to be uniforms or expressions based on them, but merely that the + * values must not diverge between shader invocations run together. If the + * values *do* diverge, then the behavior of the operation requiring a + * dynamically uniform expression is undefined. + */ + if (array->type->without_array()->is_sampler()) { + if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { + if (state->is_version(130, 300)) + _mesa_glsl_error(&loc, state, + "sampler arrays indexed with non-constant " + "expressions are forbidden in GLSL %s " + "and later", + state->es_shader ? "ES 3.00" : "1.30"); + else if (state->es_shader) + _mesa_glsl_warning(&loc, state, + "sampler arrays indexed with non-constant " + "expressions will be forbidden in GLSL " + "3.00 and later"); + else + _mesa_glsl_warning(&loc, state, + "sampler arrays indexed with non-constant " + "expressions will be forbidden in GLSL " + "1.30 and later"); + } + } + + /* From page 27 of the GLSL ES 3.1 specification: + * + * "When aggregated into arrays within a shader, images can only be + * indexed with a constant integral expression." + * + * On the other hand the desktop GL specification extension allows + * non-constant indexing of image arrays, but behavior is left undefined + * in cases where the indexing expression is not dynamically uniform. + */ + if (state->es_shader && array->type->without_array()->is_image()) { + _mesa_glsl_error(&loc, state, + "image arrays indexed with non-constant " + "expressions are forbidden in GLSL ES."); + } + } + + /* After performing all of the error checking, generate the IR for the + * expression. + */ + if (array->type->is_array() + || array->type->is_matrix() + || array->type->is_vector()) { + return new(mem_ctx) ir_dereference_array(array, idx); + } else if (array->type->is_error()) { + return array; + } else { + ir_rvalue *result = new(mem_ctx) ir_dereference_array(array, idx); + result->type = glsl_type::error_type; + + return result; + } +} diff --git a/src/compiler/glsl/ast_expr.cpp b/src/compiler/glsl/ast_expr.cpp new file mode 100644 index 0000000..e624d11 --- /dev/null +++ b/src/compiler/glsl/ast_expr.cpp @@ -0,0 +1,95 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <assert.h> +#include "ast.h" + +const char * +ast_expression::operator_string(enum ast_operators op) +{ + static const char *const operators[] = { + "=", + "+", + "-", + "+", + "-", + "*", + "/", + "%", + "<<", + ">>", + "<", + ">", + "<=", + ">=", + "==", + "!=", + "&", + "^", + "|", + "~", + "&&", + "^^", + "||", + "!", + + "*=", + "/=", + "%=", + "+=", + "-=", + "<<=", + ">>=", + "&=", + "^=", + "|=", + + "?:", + + "++", + "--", + "++", + "--", + ".", + }; + + assert((unsigned int)op < sizeof(operators) / sizeof(operators[0])); + + return operators[op]; +} + + +ast_expression_bin::ast_expression_bin(int oper, ast_expression *ex0, + ast_expression *ex1) : + ast_expression(oper, ex0, ex1, NULL) +{ + assert((oper >= ast_plus) && (oper <= ast_logic_not)); +} + + +void +ast_expression_bin::print(void) const +{ + subexpressions[0]->print(); + printf("%s ", operator_string(oper)); + subexpressions[1]->print(); +} diff --git a/src/compiler/glsl/ast_function.cpp b/src/compiler/glsl/ast_function.cpp new file mode 100644 index 0000000..0eb456a --- /dev/null +++ b/src/compiler/glsl/ast_function.cpp @@ -0,0 +1,2098 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "glsl_symbol_table.h" +#include "ast.h" +#include "compiler/glsl_types.h" +#include "ir.h" +#include "main/core.h" /* for MIN2 */ +#include "main/shaderobj.h" + +static ir_rvalue * +convert_component(ir_rvalue *src, const glsl_type *desired_type); + +bool +apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from, + struct _mesa_glsl_parse_state *state); + +static unsigned +process_parameters(exec_list *instructions, exec_list *actual_parameters, + exec_list *parameters, + struct _mesa_glsl_parse_state *state) +{ + unsigned count = 0; + + foreach_list_typed(ast_node, ast, link, parameters) { + ir_rvalue *result = ast->hir(instructions, state); + + ir_constant *const constant = result->constant_expression_value(); + if (constant != NULL) + result = constant; + + actual_parameters->push_tail(result); + count++; + } + + return count; +} + + +/** + * Generate a source prototype for a function signature + * + * \param return_type Return type of the function. May be \c NULL. + * \param name Name of the function. + * \param parameters List of \c ir_instruction nodes representing the + * parameter list for the function. This may be either a + * formal (\c ir_variable) or actual (\c ir_rvalue) + * parameter list. Only the type is used. + * + * \return + * A ralloced string representing the prototype of the function. + */ +char * +prototype_string(const glsl_type *return_type, const char *name, + exec_list *parameters) +{ + char *str = NULL; + + if (return_type != NULL) + str = ralloc_asprintf(NULL, "%s ", return_type->name); + + ralloc_asprintf_append(&str, "%s(", name); + + const char *comma = ""; + foreach_in_list(const ir_variable, param, parameters) { + ralloc_asprintf_append(&str, "%s%s", comma, param->type->name); + comma = ", "; + } + + ralloc_strcat(&str, ")"); + return str; +} + +static bool +verify_image_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, + const ir_variable *formal, const ir_variable *actual) +{ + /** + * From the ARB_shader_image_load_store specification: + * + * "The values of image variables qualified with coherent, + * volatile, restrict, readonly, or writeonly may not be passed + * to functions whose formal parameters lack such + * qualifiers. [...] It is legal to have additional qualifiers + * on a formal parameter, but not to have fewer." + */ + if (actual->data.image_coherent && !formal->data.image_coherent) { + _mesa_glsl_error(loc, state, + "function call parameter `%s' drops " + "`coherent' qualifier", formal->name); + return false; + } + + if (actual->data.image_volatile && !formal->data.image_volatile) { + _mesa_glsl_error(loc, state, + "function call parameter `%s' drops " + "`volatile' qualifier", formal->name); + return false; + } + + if (actual->data.image_restrict && !formal->data.image_restrict) { + _mesa_glsl_error(loc, state, + "function call parameter `%s' drops " + "`restrict' qualifier", formal->name); + return false; + } + + if (actual->data.image_read_only && !formal->data.image_read_only) { + _mesa_glsl_error(loc, state, + "function call parameter `%s' drops " + "`readonly' qualifier", formal->name); + return false; + } + + if (actual->data.image_write_only && !formal->data.image_write_only) { + _mesa_glsl_error(loc, state, + "function call parameter `%s' drops " + "`writeonly' qualifier", formal->name); + return false; + } + + return true; +} + +static bool +verify_first_atomic_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, + ir_variable *var) +{ + if (!var || + (!var->is_in_shader_storage_block() && + var->data.mode != ir_var_shader_shared)) { + _mesa_glsl_error(loc, state, "First argument to atomic function " + "must be a buffer or shared variable"); + return false; + } + return true; +} + +static bool +is_atomic_function(const char *func_name) +{ + return !strcmp(func_name, "atomicAdd") || + !strcmp(func_name, "atomicMin") || + !strcmp(func_name, "atomicMax") || + !strcmp(func_name, "atomicAnd") || + !strcmp(func_name, "atomicOr") || + !strcmp(func_name, "atomicXor") || + !strcmp(func_name, "atomicExchange") || + !strcmp(func_name, "atomicCompSwap"); +} + +/** + * Verify that 'out' and 'inout' actual parameters are lvalues. Also, verify + * that 'const_in' formal parameters (an extension in our IR) correspond to + * ir_constant actual parameters. + */ +static bool +verify_parameter_modes(_mesa_glsl_parse_state *state, + ir_function_signature *sig, + exec_list &actual_ir_parameters, + exec_list &actual_ast_parameters) +{ + exec_node *actual_ir_node = actual_ir_parameters.head; + exec_node *actual_ast_node = actual_ast_parameters.head; + + foreach_in_list(const ir_variable, formal, &sig->parameters) { + /* The lists must be the same length. */ + assert(!actual_ir_node->is_tail_sentinel()); + assert(!actual_ast_node->is_tail_sentinel()); + + const ir_rvalue *const actual = (ir_rvalue *) actual_ir_node; + const ast_expression *const actual_ast = + exec_node_data(ast_expression, actual_ast_node, link); + + /* FIXME: 'loc' is incorrect (as of 2011-01-21). It is always + * FIXME: 0:0(0). + */ + YYLTYPE loc = actual_ast->get_location(); + + /* Verify that 'const_in' parameters are ir_constants. */ + if (formal->data.mode == ir_var_const_in && + actual->ir_type != ir_type_constant) { + _mesa_glsl_error(&loc, state, + "parameter `in %s' must be a constant expression", + formal->name); + return false; + } + + /* Verify that shader_in parameters are shader inputs */ + if (formal->data.must_be_shader_input) { + ir_variable *var = actual->variable_referenced(); + if (var && var->data.mode != ir_var_shader_in) { + _mesa_glsl_error(&loc, state, + "parameter `%s` must be a shader input", + formal->name); + return false; + } + + if (actual->ir_type == ir_type_swizzle) { + _mesa_glsl_error(&loc, state, + "parameter `%s` must not be swizzled", + formal->name); + return false; + } + } + + /* Verify that 'out' and 'inout' actual parameters are lvalues. */ + if (formal->data.mode == ir_var_function_out + || formal->data.mode == ir_var_function_inout) { + const char *mode = NULL; + switch (formal->data.mode) { + case ir_var_function_out: mode = "out"; break; + case ir_var_function_inout: mode = "inout"; break; + default: assert(false); break; + } + + /* This AST-based check catches errors like f(i++). The IR-based + * is_lvalue() is insufficient because the actual parameter at the + * IR-level is just a temporary value, which is an l-value. + */ + if (actual_ast->non_lvalue_description != NULL) { + _mesa_glsl_error(&loc, state, + "function parameter '%s %s' references a %s", + mode, formal->name, + actual_ast->non_lvalue_description); + return false; + } + + ir_variable *var = actual->variable_referenced(); + if (var) + var->data.assigned = true; + + if (var && var->data.read_only) { + _mesa_glsl_error(&loc, state, + "function parameter '%s %s' references the " + "read-only variable '%s'", + mode, formal->name, + actual->variable_referenced()->name); + return false; + } else if (!actual->is_lvalue()) { + _mesa_glsl_error(&loc, state, + "function parameter '%s %s' is not an lvalue", + mode, formal->name); + return false; + } + } + + if (formal->type->is_image() && + actual->variable_referenced()) { + if (!verify_image_parameter(&loc, state, formal, + actual->variable_referenced())) + return false; + } + + actual_ir_node = actual_ir_node->next; + actual_ast_node = actual_ast_node->next; + } + + /* The first parameter of atomic functions must be a buffer variable */ + const char *func_name = sig->function_name(); + bool is_atomic = is_atomic_function(func_name); + if (is_atomic) { + const ir_rvalue *const actual = (ir_rvalue *) actual_ir_parameters.head; + + const ast_expression *const actual_ast = + exec_node_data(ast_expression, actual_ast_parameters.head, link); + YYLTYPE loc = actual_ast->get_location(); + + if (!verify_first_atomic_parameter(&loc, state, + actual->variable_referenced())) { + return false; + } + } + + return true; +} + +static void +fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type, + exec_list *before_instructions, exec_list *after_instructions, + bool parameter_is_inout) +{ + ir_expression *const expr = actual->as_expression(); + + /* If the types match exactly and the parameter is not a vector-extract, + * nothing needs to be done to fix the parameter. + */ + if (formal_type == actual->type + && (expr == NULL || expr->operation != ir_binop_vector_extract)) + return; + + /* To convert an out parameter, we need to create a temporary variable to + * hold the value before conversion, and then perform the conversion after + * the function call returns. + * + * This has the effect of transforming code like this: + * + * void f(out int x); + * float value; + * f(value); + * + * Into IR that's equivalent to this: + * + * void f(out int x); + * float value; + * int out_parameter_conversion; + * f(out_parameter_conversion); + * value = float(out_parameter_conversion); + * + * If the parameter is an ir_expression of ir_binop_vector_extract, + * additional conversion is needed in the post-call re-write. + */ + ir_variable *tmp = + new(mem_ctx) ir_variable(formal_type, "inout_tmp", ir_var_temporary); + + before_instructions->push_tail(tmp); + + /* If the parameter is an inout parameter, copy the value of the actual + * parameter to the new temporary. Note that no type conversion is allowed + * here because inout parameters must match types exactly. + */ + if (parameter_is_inout) { + /* Inout parameters should never require conversion, since that would + * require an implicit conversion to exist both to and from the formal + * parameter type, and there are no bidirectional implicit conversions. + */ + assert (actual->type == formal_type); + + ir_dereference_variable *const deref_tmp_1 = + new(mem_ctx) ir_dereference_variable(tmp); + ir_assignment *const assignment = + new(mem_ctx) ir_assignment(deref_tmp_1, actual); + before_instructions->push_tail(assignment); + } + + /* Replace the parameter in the call with a dereference of the new + * temporary. + */ + ir_dereference_variable *const deref_tmp_2 = + new(mem_ctx) ir_dereference_variable(tmp); + actual->replace_with(deref_tmp_2); + + + /* Copy the temporary variable to the actual parameter with optional + * type conversion applied. + */ + ir_rvalue *rhs = new(mem_ctx) ir_dereference_variable(tmp); + if (actual->type != formal_type) + rhs = convert_component(rhs, actual->type); + + ir_rvalue *lhs = actual; + if (expr != NULL && expr->operation == ir_binop_vector_extract) { + lhs = new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL), + expr->operands[1]->clone(mem_ctx, NULL)); + } + + ir_assignment *const assignment_2 = new(mem_ctx) ir_assignment(lhs, rhs); + after_instructions->push_tail(assignment_2); +} + +/** + * Generate a function call. + * + * For non-void functions, this returns a dereference of the temporary variable + * which stores the return value for the call. For void functions, this returns + * NULL. + */ +static ir_rvalue * +generate_call(exec_list *instructions, ir_function_signature *sig, + exec_list *actual_parameters, + ir_variable *sub_var, + ir_rvalue *array_idx, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + exec_list post_call_conversions; + + /* Perform implicit conversion of arguments. For out parameters, we need + * to place them in a temporary variable and do the conversion after the + * call takes place. Since we haven't emitted the call yet, we'll place + * the post-call conversions in a temporary exec_list, and emit them later. + */ + foreach_two_lists(formal_node, &sig->parameters, + actual_node, actual_parameters) { + ir_rvalue *actual = (ir_rvalue *) actual_node; + ir_variable *formal = (ir_variable *) formal_node; + + if (formal->type->is_numeric() || formal->type->is_boolean()) { + switch (formal->data.mode) { + case ir_var_const_in: + case ir_var_function_in: { + ir_rvalue *converted + = convert_component(actual, formal->type); + actual->replace_with(converted); + break; + } + case ir_var_function_out: + case ir_var_function_inout: + fix_parameter(ctx, actual, formal->type, + instructions, &post_call_conversions, + formal->data.mode == ir_var_function_inout); + break; + default: + assert (!"Illegal formal parameter mode"); + break; + } + } + } + + /* Section 4.3.2 (Const) of the GLSL 1.10.59 spec says: + * + * "Initializers for const declarations must be formed from literal + * values, other const variables (not including function call + * paramaters), or expressions of these. + * + * Constructors may be used in such expressions, but function calls may + * not." + * + * Section 4.3.3 (Constant Expressions) of the GLSL 1.20.8 spec says: + * + * "A constant expression is one of + * + * ... + * + * - a built-in function call whose arguments are all constant + * expressions, with the exception of the texture lookup + * functions, the noise functions, and ftransform. The built-in + * functions dFdx, dFdy, and fwidth must return 0 when evaluated + * inside an initializer with an argument that is a constant + * expression." + * + * Section 5.10 (Constant Expressions) of the GLSL ES 1.00.17 spec says: + * + * "A constant expression is one of + * + * ... + * + * - a built-in function call whose arguments are all constant + * expressions, with the exception of the texture lookup + * functions." + * + * Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec says: + * + * "A constant expression is one of + * + * ... + * + * - a built-in function call whose arguments are all constant + * expressions, with the exception of the texture lookup + * functions. The built-in functions dFdx, dFdy, and fwidth must + * return 0 when evaluated inside an initializer with an argument + * that is a constant expression." + * + * If the function call is a constant expression, don't generate any + * instructions; just generate an ir_constant. + */ + if (state->is_version(120, 100)) { + ir_constant *value = sig->constant_expression_value(actual_parameters, NULL); + if (value != NULL) { + return value; + } + } + + ir_dereference_variable *deref = NULL; + if (!sig->return_type->is_void()) { + /* Create a new temporary to hold the return value. */ + char *const name = ir_variable::temporaries_allocate_names + ? ralloc_asprintf(ctx, "%s_retval", sig->function_name()) + : NULL; + + ir_variable *var; + + var = new(ctx) ir_variable(sig->return_type, name, ir_var_temporary); + instructions->push_tail(var); + + ralloc_free(name); + + deref = new(ctx) ir_dereference_variable(var); + } + + ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters, sub_var, array_idx); + instructions->push_tail(call); + + /* Also emit any necessary out-parameter conversions. */ + instructions->append_list(&post_call_conversions); + + return deref ? deref->clone(ctx, NULL) : NULL; +} + +/** + * Given a function name and parameter list, find the matching signature. + */ +static ir_function_signature * +match_function_by_name(const char *name, + exec_list *actual_parameters, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + ir_function *f = state->symbols->get_function(name); + ir_function_signature *local_sig = NULL; + ir_function_signature *sig = NULL; + + /* Is the function hidden by a record type constructor? */ + if (state->symbols->get_type(name)) + goto done; /* no match */ + + /* Is the function hidden by a variable (impossible in 1.10)? */ + if (!state->symbols->separate_function_namespace + && state->symbols->get_variable(name)) + goto done; /* no match */ + + if (f != NULL) { + /* In desktop GL, the presence of a user-defined signature hides any + * built-in signatures, so we must ignore them. In contrast, in ES2 + * user-defined signatures add new overloads, so we must consider them. + */ + bool allow_builtins = state->es_shader || !f->has_user_signature(); + + /* Look for a match in the local shader. If exact, we're done. */ + bool is_exact = false; + sig = local_sig = f->matching_signature(state, actual_parameters, + allow_builtins, &is_exact); + if (is_exact) + goto done; + + if (!allow_builtins) + goto done; + } + + /* Local shader has no exact candidates; check the built-ins. */ + _mesa_glsl_initialize_builtin_functions(); + sig = _mesa_glsl_find_builtin_function(state, name, actual_parameters); + +done: + if (sig != NULL) { + /* If the match is from a linked built-in shader, import the prototype. */ + if (sig != local_sig) { + if (f == NULL) { + f = new(ctx) ir_function(name); + state->symbols->add_global_function(f); + emit_function(state, f); + } + f->add_signature(sig->clone_prototype(f, NULL)); + } + } + return sig; +} + +static ir_function_signature * +match_subroutine_by_name(const char *name, + exec_list *actual_parameters, + struct _mesa_glsl_parse_state *state, + ir_variable **var_r) +{ + void *ctx = state; + ir_function_signature *sig = NULL; + ir_function *f, *found = NULL; + const char *new_name; + ir_variable *var; + bool is_exact = false; + + new_name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), name); + var = state->symbols->get_variable(new_name); + if (!var) + return NULL; + + for (int i = 0; i < state->num_subroutine_types; i++) { + f = state->subroutine_types[i]; + if (strcmp(f->name, var->type->without_array()->name)) + continue; + found = f; + break; + } + + if (!found) + return NULL; + *var_r = var; + sig = found->matching_signature(state, actual_parameters, + false, &is_exact); + return sig; +} + +static ir_rvalue * +generate_array_index(void *mem_ctx, exec_list *instructions, + struct _mesa_glsl_parse_state *state, YYLTYPE loc, + const ast_expression *array, ast_expression *idx, + const char **function_name, exec_list *actual_parameters) +{ + if (array->oper == ast_array_index) { + /* This handles arrays of arrays */ + ir_rvalue *outer_array = generate_array_index(mem_ctx, instructions, + state, loc, + array->subexpressions[0], + array->subexpressions[1], + function_name, actual_parameters); + ir_rvalue *outer_array_idx = idx->hir(instructions, state); + + YYLTYPE index_loc = idx->get_location(); + return _mesa_ast_array_index_to_hir(mem_ctx, state, outer_array, + outer_array_idx, loc, + index_loc); + } else { + ir_variable *sub_var = NULL; + *function_name = array->primary_expression.identifier; + + match_subroutine_by_name(*function_name, actual_parameters, + state, &sub_var); + + ir_rvalue *outer_array_idx = idx->hir(instructions, state); + return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx); + } +} + +static void +print_function_prototypes(_mesa_glsl_parse_state *state, YYLTYPE *loc, + ir_function *f) +{ + if (f == NULL) + return; + + foreach_in_list(ir_function_signature, sig, &f->signatures) { + if (sig->is_builtin() && !sig->is_builtin_available(state)) + continue; + + char *str = prototype_string(sig->return_type, f->name, &sig->parameters); + _mesa_glsl_error(loc, state, " %s", str); + ralloc_free(str); + } +} + +/** + * Raise a "no matching function" error, listing all possible overloads the + * compiler considered so developers can figure out what went wrong. + */ +static void +no_matching_function_error(const char *name, + YYLTYPE *loc, + exec_list *actual_parameters, + _mesa_glsl_parse_state *state) +{ + gl_shader *sh = _mesa_glsl_get_builtin_function_shader(); + + if (state->symbols->get_function(name) == NULL + && (!state->uses_builtin_functions + || sh->symbols->get_function(name) == NULL)) { + _mesa_glsl_error(loc, state, "no function with name '%s'", name); + } else { + char *str = prototype_string(NULL, name, actual_parameters); + _mesa_glsl_error(loc, state, + "no matching function for call to `%s'; candidates are:", + str); + ralloc_free(str); + + print_function_prototypes(state, loc, state->symbols->get_function(name)); + + if (state->uses_builtin_functions) { + print_function_prototypes(state, loc, sh->symbols->get_function(name)); + } + } +} + +/** + * Perform automatic type conversion of constructor parameters + * + * This implements the rules in the "Conversion and Scalar Constructors" + * section (GLSL 1.10 section 5.4.1), not the "Implicit Conversions" rules. + */ +static ir_rvalue * +convert_component(ir_rvalue *src, const glsl_type *desired_type) +{ + void *ctx = ralloc_parent(src); + const unsigned a = desired_type->base_type; + const unsigned b = src->type->base_type; + ir_expression *result = NULL; + + if (src->type->is_error()) + return src; + + assert(a <= GLSL_TYPE_BOOL); + assert(b <= GLSL_TYPE_BOOL); + + if (a == b) + return src; + + switch (a) { + case GLSL_TYPE_UINT: + switch (b) { + case GLSL_TYPE_INT: + result = new(ctx) ir_expression(ir_unop_i2u, src); + break; + case GLSL_TYPE_FLOAT: + result = new(ctx) ir_expression(ir_unop_f2u, src); + break; + case GLSL_TYPE_BOOL: + result = new(ctx) ir_expression(ir_unop_i2u, + new(ctx) ir_expression(ir_unop_b2i, src)); + break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2u, src); + break; + } + break; + case GLSL_TYPE_INT: + switch (b) { + case GLSL_TYPE_UINT: + result = new(ctx) ir_expression(ir_unop_u2i, src); + break; + case GLSL_TYPE_FLOAT: + result = new(ctx) ir_expression(ir_unop_f2i, src); + break; + case GLSL_TYPE_BOOL: + result = new(ctx) ir_expression(ir_unop_b2i, src); + break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2i, src); + break; + } + break; + case GLSL_TYPE_FLOAT: + switch (b) { + case GLSL_TYPE_UINT: + result = new(ctx) ir_expression(ir_unop_u2f, desired_type, src, NULL); + break; + case GLSL_TYPE_INT: + result = new(ctx) ir_expression(ir_unop_i2f, desired_type, src, NULL); + break; + case GLSL_TYPE_BOOL: + result = new(ctx) ir_expression(ir_unop_b2f, desired_type, src, NULL); + break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2f, desired_type, src, NULL); + break; + } + break; + case GLSL_TYPE_BOOL: + switch (b) { + case GLSL_TYPE_UINT: + result = new(ctx) ir_expression(ir_unop_i2b, + new(ctx) ir_expression(ir_unop_u2i, src)); + break; + case GLSL_TYPE_INT: + result = new(ctx) ir_expression(ir_unop_i2b, desired_type, src, NULL); + break; + case GLSL_TYPE_FLOAT: + result = new(ctx) ir_expression(ir_unop_f2b, desired_type, src, NULL); + break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2b, desired_type, src, NULL); + break; + } + break; + case GLSL_TYPE_DOUBLE: + switch (b) { + case GLSL_TYPE_INT: + result = new(ctx) ir_expression(ir_unop_i2d, src); + break; + case GLSL_TYPE_UINT: + result = new(ctx) ir_expression(ir_unop_u2d, src); + break; + case GLSL_TYPE_BOOL: + result = new(ctx) ir_expression(ir_unop_f2d, + new(ctx) ir_expression(ir_unop_b2f, src)); + break; + case GLSL_TYPE_FLOAT: + result = new(ctx) ir_expression(ir_unop_f2d, desired_type, src, NULL); + break; + } + } + + assert(result != NULL); + assert(result->type == desired_type); + + /* Try constant folding; it may fold in the conversion we just added. */ + ir_constant *const constant = result->constant_expression_value(); + return (constant != NULL) ? (ir_rvalue *) constant : (ir_rvalue *) result; +} + +/** + * Dereference a specific component from a scalar, vector, or matrix + */ +static ir_rvalue * +dereference_component(ir_rvalue *src, unsigned component) +{ + void *ctx = ralloc_parent(src); + assert(component < src->type->components()); + + /* If the source is a constant, just create a new constant instead of a + * dereference of the existing constant. + */ + ir_constant *constant = src->as_constant(); + if (constant) + return new(ctx) ir_constant(constant, component); + + if (src->type->is_scalar()) { + return src; + } else if (src->type->is_vector()) { + return new(ctx) ir_swizzle(src, component, 0, 0, 0, 1); + } else { + assert(src->type->is_matrix()); + + /* Dereference a row of the matrix, then call this function again to get + * a specific element from that row. + */ + const int c = component / src->type->column_type()->vector_elements; + const int r = component % src->type->column_type()->vector_elements; + ir_constant *const col_index = new(ctx) ir_constant(c); + ir_dereference *const col = new(ctx) ir_dereference_array(src, col_index); + + col->type = src->type->column_type(); + + return dereference_component(col, r); + } + + assert(!"Should not get here."); + return NULL; +} + + +static ir_rvalue * +process_vec_mat_constructor(exec_list *instructions, + const glsl_type *constructor_type, + YYLTYPE *loc, exec_list *parameters, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + /* The ARB_shading_language_420pack spec says: + * + * "If an initializer is a list of initializers enclosed in curly braces, + * the variable being declared must be a vector, a matrix, an array, or a + * structure. + * + * int i = { 1 }; // illegal, i is not an aggregate" + */ + if (constructor_type->vector_elements <= 1) { + _mesa_glsl_error(loc, state, "aggregates can only initialize vectors, " + "matrices, arrays, and structs"); + return ir_rvalue::error_value(ctx); + } + + exec_list actual_parameters; + const unsigned parameter_count = + process_parameters(instructions, &actual_parameters, parameters, state); + + if (parameter_count == 0 + || (constructor_type->is_vector() && + constructor_type->vector_elements != parameter_count) + || (constructor_type->is_matrix() && + constructor_type->matrix_columns != parameter_count)) { + _mesa_glsl_error(loc, state, "%s constructor must have %u parameters", + constructor_type->is_vector() ? "vector" : "matrix", + constructor_type->vector_elements); + return ir_rvalue::error_value(ctx); + } + + bool all_parameters_are_constant = true; + + /* Type cast each parameter and, if possible, fold constants. */ + foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { + ir_rvalue *result = ir; + + /* Apply implicit conversions (not the scalar constructor rules!). See + * the spec quote above. */ + if (constructor_type->base_type != result->type->base_type) { + const glsl_type *desired_type = + glsl_type::get_instance(constructor_type->base_type, + ir->type->vector_elements, + ir->type->matrix_columns); + if (result->type->can_implicitly_convert_to(desired_type, state)) { + /* Even though convert_component() implements the constructor + * conversion rules (not the implicit conversion rules), its safe + * to use it here because we already checked that the implicit + * conversion is legal. + */ + result = convert_component(ir, desired_type); + } + } + + if (constructor_type->is_matrix()) { + if (result->type != constructor_type->column_type()) { + _mesa_glsl_error(loc, state, "type error in matrix constructor: " + "expected: %s, found %s", + constructor_type->column_type()->name, + result->type->name); + return ir_rvalue::error_value(ctx); + } + } else if (result->type != constructor_type->get_scalar_type()) { + _mesa_glsl_error(loc, state, "type error in vector constructor: " + "expected: %s, found %s", + constructor_type->get_scalar_type()->name, + result->type->name); + return ir_rvalue::error_value(ctx); + } + + /* Attempt to convert the parameter to a constant valued expression. + * After doing so, track whether or not all the parameters to the + * constructor are trivially constant valued expressions. + */ + ir_rvalue *const constant = result->constant_expression_value(); + + if (constant != NULL) + result = constant; + else + all_parameters_are_constant = false; + + ir->replace_with(result); + } + + if (all_parameters_are_constant) + return new(ctx) ir_constant(constructor_type, &actual_parameters); + + ir_variable *var = new(ctx) ir_variable(constructor_type, "vec_mat_ctor", + ir_var_temporary); + instructions->push_tail(var); + + int i = 0; + + foreach_in_list(ir_rvalue, rhs, &actual_parameters) { + ir_instruction *assignment = NULL; + + if (var->type->is_matrix()) { + ir_rvalue *lhs = new(ctx) ir_dereference_array(var, + new(ctx) ir_constant(i)); + assignment = new(ctx) ir_assignment(lhs, rhs, NULL); + } else { + /* use writemask rather than index for vector */ + assert(var->type->is_vector()); + assert(i < 4); + ir_dereference *lhs = new(ctx) ir_dereference_variable(var); + assignment = new(ctx) ir_assignment(lhs, rhs, NULL, (unsigned)(1 << i)); + } + + instructions->push_tail(assignment); + + i++; + } + + return new(ctx) ir_dereference_variable(var); +} + + +static ir_rvalue * +process_array_constructor(exec_list *instructions, + const glsl_type *constructor_type, + YYLTYPE *loc, exec_list *parameters, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + /* Array constructors come in two forms: sized and unsized. Sized array + * constructors look like 'vec4[2](a, b)', where 'a' and 'b' are vec4 + * variables. In this case the number of parameters must exactly match the + * specified size of the array. + * + * Unsized array constructors look like 'vec4[](a, b)', where 'a' and 'b' + * are vec4 variables. In this case the size of the array being constructed + * is determined by the number of parameters. + * + * From page 52 (page 58 of the PDF) of the GLSL 1.50 spec: + * + * "There must be exactly the same number of arguments as the size of + * the array being constructed. If no size is present in the + * constructor, then the array is explicitly sized to the number of + * arguments provided. The arguments are assigned in order, starting at + * element 0, to the elements of the constructed array. Each argument + * must be the same type as the element type of the array, or be a type + * that can be converted to the element type of the array according to + * Section 4.1.10 "Implicit Conversions."" + */ + exec_list actual_parameters; + const unsigned parameter_count = + process_parameters(instructions, &actual_parameters, parameters, state); + bool is_unsized_array = constructor_type->is_unsized_array(); + + if ((parameter_count == 0) || + (!is_unsized_array && (constructor_type->length != parameter_count))) { + const unsigned min_param = is_unsized_array + ? 1 : constructor_type->length; + + _mesa_glsl_error(loc, state, "array constructor must have %s %u " + "parameter%s", + is_unsized_array ? "at least" : "exactly", + min_param, (min_param <= 1) ? "" : "s"); + return ir_rvalue::error_value(ctx); + } + + if (is_unsized_array) { + constructor_type = + glsl_type::get_array_instance(constructor_type->fields.array, + parameter_count); + assert(constructor_type != NULL); + assert(constructor_type->length == parameter_count); + } + + bool all_parameters_are_constant = true; + const glsl_type *element_type = constructor_type->fields.array; + + /* Type cast each parameter and, if possible, fold constants. */ + foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { + ir_rvalue *result = ir; + + const glsl_base_type element_base_type = + constructor_type->fields.array->base_type; + + /* Apply implicit conversions (not the scalar constructor rules!). See + * the spec quote above. */ + if (element_base_type != result->type->base_type) { + const glsl_type *desired_type = + glsl_type::get_instance(element_base_type, + ir->type->vector_elements, + ir->type->matrix_columns); + + if (result->type->can_implicitly_convert_to(desired_type, state)) { + /* Even though convert_component() implements the constructor + * conversion rules (not the implicit conversion rules), its safe + * to use it here because we already checked that the implicit + * conversion is legal. + */ + result = convert_component(ir, desired_type); + } + } + + if (constructor_type->fields.array->is_unsized_array()) { + /* As the inner parameters of the constructor are created without + * knowledge of each other we need to check to make sure unsized + * parameters of unsized constructors all end up with the same size. + * + * e.g we make sure to fail for a constructor like this: + * vec4[][] a = vec4[][](vec4[](vec4(0.0), vec4(1.0)), + * vec4[](vec4(0.0), vec4(1.0), vec4(1.0)), + * vec4[](vec4(0.0), vec4(1.0))); + */ + if (element_type->is_unsized_array()) { + /* This is the first parameter so just get the type */ + element_type = result->type; + } else if (element_type != result->type) { + _mesa_glsl_error(loc, state, "type error in array constructor: " + "expected: %s, found %s", + element_type->name, + result->type->name); + return ir_rvalue::error_value(ctx); + } + } else if (result->type != constructor_type->fields.array) { + _mesa_glsl_error(loc, state, "type error in array constructor: " + "expected: %s, found %s", + constructor_type->fields.array->name, + result->type->name); + return ir_rvalue::error_value(ctx); + } else { + element_type = result->type; + } + + /* Attempt to convert the parameter to a constant valued expression. + * After doing so, track whether or not all the parameters to the + * constructor are trivially constant valued expressions. + */ + ir_rvalue *const constant = result->constant_expression_value(); + + if (constant != NULL) + result = constant; + else + all_parameters_are_constant = false; + + ir->replace_with(result); + } + + if (constructor_type->fields.array->is_unsized_array()) { + constructor_type = + glsl_type::get_array_instance(element_type, + parameter_count); + assert(constructor_type != NULL); + assert(constructor_type->length == parameter_count); + } + + if (all_parameters_are_constant) + return new(ctx) ir_constant(constructor_type, &actual_parameters); + + ir_variable *var = new(ctx) ir_variable(constructor_type, "array_ctor", + ir_var_temporary); + instructions->push_tail(var); + + int i = 0; + foreach_in_list(ir_rvalue, rhs, &actual_parameters) { + ir_rvalue *lhs = new(ctx) ir_dereference_array(var, + new(ctx) ir_constant(i)); + + ir_instruction *assignment = new(ctx) ir_assignment(lhs, rhs, NULL); + instructions->push_tail(assignment); + + i++; + } + + return new(ctx) ir_dereference_variable(var); +} + + +/** + * Try to convert a record constructor to a constant expression + */ +static ir_constant * +constant_record_constructor(const glsl_type *constructor_type, + exec_list *parameters, void *mem_ctx) +{ + foreach_in_list(ir_instruction, node, parameters) { + ir_constant *constant = node->as_constant(); + if (constant == NULL) + return NULL; + node->replace_with(constant); + } + + return new(mem_ctx) ir_constant(constructor_type, parameters); +} + + +/** + * Determine if a list consists of a single scalar r-value + */ +bool +single_scalar_parameter(exec_list *parameters) +{ + const ir_rvalue *const p = (ir_rvalue *) parameters->head; + assert(((ir_rvalue *)p)->as_rvalue() != NULL); + + return (p->type->is_scalar() && p->next->is_tail_sentinel()); +} + + +/** + * Generate inline code for a vector constructor + * + * The generated constructor code will consist of a temporary variable + * declaration of the same type as the constructor. A sequence of assignments + * from constructor parameters to the temporary will follow. + * + * \return + * An \c ir_dereference_variable of the temprorary generated in the constructor + * body. + */ +ir_rvalue * +emit_inline_vector_constructor(const glsl_type *type, + exec_list *instructions, + exec_list *parameters, + void *ctx) +{ + assert(!parameters->is_empty()); + + ir_variable *var = new(ctx) ir_variable(type, "vec_ctor", ir_var_temporary); + instructions->push_tail(var); + + /* There are three kinds of vector constructors. + * + * - Construct a vector from a single scalar by replicating that scalar to + * all components of the vector. + * + * - Construct a vector from at least a matrix. This case should already + * have been taken care of in ast_function_expression::hir by breaking + * down the matrix into a series of column vectors. + * + * - Construct a vector from an arbirary combination of vectors and + * scalars. The components of the constructor parameters are assigned + * to the vector in order until the vector is full. + */ + const unsigned lhs_components = type->components(); + if (single_scalar_parameter(parameters)) { + ir_rvalue *first_param = (ir_rvalue *)parameters->head; + ir_rvalue *rhs = new(ctx) ir_swizzle(first_param, 0, 0, 0, 0, + lhs_components); + ir_dereference_variable *lhs = new(ctx) ir_dereference_variable(var); + const unsigned mask = (1U << lhs_components) - 1; + + assert(rhs->type == lhs->type); + + ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL, mask); + instructions->push_tail(inst); + } else { + unsigned base_component = 0; + unsigned base_lhs_component = 0; + ir_constant_data data; + unsigned constant_mask = 0, constant_components = 0; + + memset(&data, 0, sizeof(data)); + + foreach_in_list(ir_rvalue, param, parameters) { + unsigned rhs_components = param->type->components(); + + /* Do not try to assign more components to the vector than it has! + */ + if ((rhs_components + base_lhs_component) > lhs_components) { + rhs_components = lhs_components - base_lhs_component; + } + + const ir_constant *const c = param->as_constant(); + if (c != NULL) { + for (unsigned i = 0; i < rhs_components; i++) { + switch (c->type->base_type) { + case GLSL_TYPE_UINT: + data.u[i + base_component] = c->get_uint_component(i); + break; + case GLSL_TYPE_INT: + data.i[i + base_component] = c->get_int_component(i); + break; + case GLSL_TYPE_FLOAT: + data.f[i + base_component] = c->get_float_component(i); + break; + case GLSL_TYPE_DOUBLE: + data.d[i + base_component] = c->get_double_component(i); + break; + case GLSL_TYPE_BOOL: + data.b[i + base_component] = c->get_bool_component(i); + break; + default: + assert(!"Should not get here."); + break; + } + } + + /* Mask of fields to be written in the assignment. + */ + constant_mask |= ((1U << rhs_components) - 1) << base_lhs_component; + constant_components += rhs_components; + + base_component += rhs_components; + } + /* Advance the component index by the number of components + * that were just assigned. + */ + base_lhs_component += rhs_components; + } + + if (constant_mask != 0) { + ir_dereference *lhs = new(ctx) ir_dereference_variable(var); + const glsl_type *rhs_type = glsl_type::get_instance(var->type->base_type, + constant_components, + 1); + ir_rvalue *rhs = new(ctx) ir_constant(rhs_type, &data); + + ir_instruction *inst = + new(ctx) ir_assignment(lhs, rhs, NULL, constant_mask); + instructions->push_tail(inst); + } + + base_component = 0; + foreach_in_list(ir_rvalue, param, parameters) { + unsigned rhs_components = param->type->components(); + + /* Do not try to assign more components to the vector than it has! + */ + if ((rhs_components + base_component) > lhs_components) { + rhs_components = lhs_components - base_component; + } + + /* If we do not have any components left to copy, break out of the + * loop. This can happen when initializing a vec4 with a mat3 as the + * mat3 would have been broken into a series of column vectors. + */ + if (rhs_components == 0) { + break; + } + + const ir_constant *const c = param->as_constant(); + if (c == NULL) { + /* Mask of fields to be written in the assignment. + */ + const unsigned write_mask = ((1U << rhs_components) - 1) + << base_component; + + ir_dereference *lhs = new(ctx) ir_dereference_variable(var); + + /* Generate a swizzle so that LHS and RHS sizes match. + */ + ir_rvalue *rhs = + new(ctx) ir_swizzle(param, 0, 1, 2, 3, rhs_components); + + ir_instruction *inst = + new(ctx) ir_assignment(lhs, rhs, NULL, write_mask); + instructions->push_tail(inst); + } + + /* Advance the component index by the number of components that were + * just assigned. + */ + base_component += rhs_components; + } + } + return new(ctx) ir_dereference_variable(var); +} + + +/** + * Generate assignment of a portion of a vector to a portion of a matrix column + * + * \param src_base First component of the source to be used in assignment + * \param column Column of destination to be assiged + * \param row_base First component of the destination column to be assigned + * \param count Number of components to be assigned + * + * \note + * \c src_base + \c count must be less than or equal to the number of components + * in the source vector. + */ +ir_instruction * +assign_to_matrix_column(ir_variable *var, unsigned column, unsigned row_base, + ir_rvalue *src, unsigned src_base, unsigned count, + void *mem_ctx) +{ + ir_constant *col_idx = new(mem_ctx) ir_constant(column); + ir_dereference *column_ref = new(mem_ctx) ir_dereference_array(var, col_idx); + + assert(column_ref->type->components() >= (row_base + count)); + assert(src->type->components() >= (src_base + count)); + + /* Generate a swizzle that extracts the number of components from the source + * that are to be assigned to the column of the matrix. + */ + if (count < src->type->vector_elements) { + src = new(mem_ctx) ir_swizzle(src, + src_base + 0, src_base + 1, + src_base + 2, src_base + 3, + count); + } + + /* Mask of fields to be written in the assignment. + */ + const unsigned write_mask = ((1U << count) - 1) << row_base; + + return new(mem_ctx) ir_assignment(column_ref, src, NULL, write_mask); +} + + +/** + * Generate inline code for a matrix constructor + * + * The generated constructor code will consist of a temporary variable + * declaration of the same type as the constructor. A sequence of assignments + * from constructor parameters to the temporary will follow. + * + * \return + * An \c ir_dereference_variable of the temprorary generated in the constructor + * body. + */ +ir_rvalue * +emit_inline_matrix_constructor(const glsl_type *type, + exec_list *instructions, + exec_list *parameters, + void *ctx) +{ + assert(!parameters->is_empty()); + + ir_variable *var = new(ctx) ir_variable(type, "mat_ctor", ir_var_temporary); + instructions->push_tail(var); + + /* There are three kinds of matrix constructors. + * + * - Construct a matrix from a single scalar by replicating that scalar to + * along the diagonal of the matrix and setting all other components to + * zero. + * + * - Construct a matrix from an arbirary combination of vectors and + * scalars. The components of the constructor parameters are assigned + * to the matrix in column-major order until the matrix is full. + * + * - Construct a matrix from a single matrix. The source matrix is copied + * to the upper left portion of the constructed matrix, and the remaining + * elements take values from the identity matrix. + */ + ir_rvalue *const first_param = (ir_rvalue *) parameters->head; + if (single_scalar_parameter(parameters)) { + /* Assign the scalar to the X component of a vec4, and fill the remaining + * components with zero. + */ + glsl_base_type param_base_type = first_param->type->base_type; + assert(param_base_type == GLSL_TYPE_FLOAT || + param_base_type == GLSL_TYPE_DOUBLE); + ir_variable *rhs_var = + new(ctx) ir_variable(glsl_type::get_instance(param_base_type, 4, 1), + "mat_ctor_vec", + ir_var_temporary); + instructions->push_tail(rhs_var); + + ir_constant_data zero; + for (unsigned i = 0; i < 4; i++) + if (param_base_type == GLSL_TYPE_FLOAT) + zero.f[i] = 0.0; + else + zero.d[i] = 0.0; + + ir_instruction *inst = + new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var), + new(ctx) ir_constant(rhs_var->type, &zero), + NULL); + instructions->push_tail(inst); + + ir_dereference *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); + + inst = new(ctx) ir_assignment(rhs_ref, first_param, NULL, 0x01); + instructions->push_tail(inst); + + /* Assign the temporary vector to each column of the destination matrix + * with a swizzle that puts the X component on the diagonal of the + * matrix. In some cases this may mean that the X component does not + * get assigned into the column at all (i.e., when the matrix has more + * columns than rows). + */ + static const unsigned rhs_swiz[4][4] = { + { 0, 1, 1, 1 }, + { 1, 0, 1, 1 }, + { 1, 1, 0, 1 }, + { 1, 1, 1, 0 } + }; + + const unsigned cols_to_init = MIN2(type->matrix_columns, + type->vector_elements); + for (unsigned i = 0; i < cols_to_init; i++) { + ir_constant *const col_idx = new(ctx) ir_constant(i); + ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx); + + ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); + ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i], + type->vector_elements); + + inst = new(ctx) ir_assignment(col_ref, rhs, NULL); + instructions->push_tail(inst); + } + + for (unsigned i = cols_to_init; i < type->matrix_columns; i++) { + ir_constant *const col_idx = new(ctx) ir_constant(i); + ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx); + + ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); + ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1, + type->vector_elements); + + inst = new(ctx) ir_assignment(col_ref, rhs, NULL); + instructions->push_tail(inst); + } + } else if (first_param->type->is_matrix()) { + /* From page 50 (56 of the PDF) of the GLSL 1.50 spec: + * + * "If a matrix is constructed from a matrix, then each component + * (column i, row j) in the result that has a corresponding + * component (column i, row j) in the argument will be initialized + * from there. All other components will be initialized to the + * identity matrix. If a matrix argument is given to a matrix + * constructor, it is an error to have any other arguments." + */ + assert(first_param->next->is_tail_sentinel()); + ir_rvalue *const src_matrix = first_param; + + /* If the source matrix is smaller, pre-initialize the relavent parts of + * the destination matrix to the identity matrix. + */ + if ((src_matrix->type->matrix_columns < var->type->matrix_columns) + || (src_matrix->type->vector_elements < var->type->vector_elements)) { + + /* If the source matrix has fewer rows, every column of the destination + * must be initialized. Otherwise only the columns in the destination + * that do not exist in the source must be initialized. + */ + unsigned col = + (src_matrix->type->vector_elements < var->type->vector_elements) + ? 0 : src_matrix->type->matrix_columns; + + const glsl_type *const col_type = var->type->column_type(); + for (/* empty */; col < var->type->matrix_columns; col++) { + ir_constant_data ident; + + ident.f[0] = 0.0; + ident.f[1] = 0.0; + ident.f[2] = 0.0; + ident.f[3] = 0.0; + + ident.f[col] = 1.0; + + ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident); + + ir_rvalue *const lhs = + new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col)); + + ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL); + instructions->push_tail(inst); + } + } + + /* Assign columns from the source matrix to the destination matrix. + * + * Since the parameter will be used in the RHS of multiple assignments, + * generate a temporary and copy the paramter there. + */ + ir_variable *const rhs_var = + new(ctx) ir_variable(first_param->type, "mat_ctor_mat", + ir_var_temporary); + instructions->push_tail(rhs_var); + + ir_dereference *const rhs_var_ref = + new(ctx) ir_dereference_variable(rhs_var); + ir_instruction *const inst = + new(ctx) ir_assignment(rhs_var_ref, first_param, NULL); + instructions->push_tail(inst); + + const unsigned last_row = MIN2(src_matrix->type->vector_elements, + var->type->vector_elements); + const unsigned last_col = MIN2(src_matrix->type->matrix_columns, + var->type->matrix_columns); + + unsigned swiz[4] = { 0, 0, 0, 0 }; + for (unsigned i = 1; i < last_row; i++) + swiz[i] = i; + + const unsigned write_mask = (1U << last_row) - 1; + + for (unsigned i = 0; i < last_col; i++) { + ir_dereference *const lhs = + new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i)); + ir_rvalue *const rhs_col = + new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i)); + + /* If one matrix has columns that are smaller than the columns of the + * other matrix, wrap the column access of the larger with a swizzle + * so that the LHS and RHS of the assignment have the same size (and + * therefore have the same type). + * + * It would be perfectly valid to unconditionally generate the + * swizzles, this this will typically result in a more compact IR tree. + */ + ir_rvalue *rhs; + if (lhs->type->vector_elements != rhs_col->type->vector_elements) { + rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row); + } else { + rhs = rhs_col; + } + + ir_instruction *inst = + new(ctx) ir_assignment(lhs, rhs, NULL, write_mask); + instructions->push_tail(inst); + } + } else { + const unsigned cols = type->matrix_columns; + const unsigned rows = type->vector_elements; + unsigned remaining_slots = rows * cols; + unsigned col_idx = 0; + unsigned row_idx = 0; + + foreach_in_list(ir_rvalue, rhs, parameters) { + unsigned rhs_components = rhs->type->components(); + unsigned rhs_base = 0; + + if (remaining_slots == 0) + break; + + /* Since the parameter might be used in the RHS of two assignments, + * generate a temporary and copy the paramter there. + */ + ir_variable *rhs_var = + new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary); + instructions->push_tail(rhs_var); + + ir_dereference *rhs_var_ref = + new(ctx) ir_dereference_variable(rhs_var); + ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL); + instructions->push_tail(inst); + + do { + /* Assign the current parameter to as many components of the matrix + * as it will fill. + * + * NOTE: A single vector parameter can span two matrix columns. A + * single vec4, for example, can completely fill a mat2. + */ + unsigned count = MIN2(rows - row_idx, + rhs_components - rhs_base); + + rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); + ir_instruction *inst = assign_to_matrix_column(var, col_idx, + row_idx, + rhs_var_ref, + rhs_base, + count, ctx); + instructions->push_tail(inst); + rhs_base += count; + row_idx += count; + remaining_slots -= count; + + /* Sometimes, there is still data left in the parameters and + * components left to be set in the destination but in other + * column. + */ + if (row_idx >= rows) { + row_idx = 0; + col_idx++; + } + } while(remaining_slots > 0 && rhs_base < rhs_components); + } + } + + return new(ctx) ir_dereference_variable(var); +} + + +ir_rvalue * +emit_inline_record_constructor(const glsl_type *type, + exec_list *instructions, + exec_list *parameters, + void *mem_ctx) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(type, "record_ctor", ir_var_temporary); + ir_dereference_variable *const d = new(mem_ctx) ir_dereference_variable(var); + + instructions->push_tail(var); + + exec_node *node = parameters->head; + for (unsigned i = 0; i < type->length; i++) { + assert(!node->is_tail_sentinel()); + + ir_dereference *const lhs = + new(mem_ctx) ir_dereference_record(d->clone(mem_ctx, NULL), + type->fields.structure[i].name); + + ir_rvalue *const rhs = ((ir_instruction *) node)->as_rvalue(); + assert(rhs != NULL); + + ir_instruction *const assign = new(mem_ctx) ir_assignment(lhs, rhs, NULL); + + instructions->push_tail(assign); + node = node->next; + } + + return d; +} + + +static ir_rvalue * +process_record_constructor(exec_list *instructions, + const glsl_type *constructor_type, + YYLTYPE *loc, exec_list *parameters, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + exec_list actual_parameters; + + process_parameters(instructions, &actual_parameters, + parameters, state); + + exec_node *node = actual_parameters.head; + for (unsigned i = 0; i < constructor_type->length; i++) { + ir_rvalue *ir = (ir_rvalue *) node; + + if (node->is_tail_sentinel()) { + _mesa_glsl_error(loc, state, + "insufficient parameters to constructor for `%s'", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + if (apply_implicit_conversion(constructor_type->fields.structure[i].type, + ir, state)) { + node->replace_with(ir); + } else { + _mesa_glsl_error(loc, state, + "parameter type mismatch in constructor for `%s.%s' " + "(%s vs %s)", + constructor_type->name, + constructor_type->fields.structure[i].name, + ir->type->name, + constructor_type->fields.structure[i].type->name); + return ir_rvalue::error_value(ctx);; + } + + node = node->next; + } + + if (!node->is_tail_sentinel()) { + _mesa_glsl_error(loc, state, "too many parameters in constructor " + "for `%s'", constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + ir_rvalue *const constant = + constant_record_constructor(constructor_type, &actual_parameters, + state); + + return (constant != NULL) + ? constant + : emit_inline_record_constructor(constructor_type, instructions, + &actual_parameters, state); +} + +ir_rvalue * +ast_function_expression::handle_method(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + const ast_expression *field = subexpressions[0]; + ir_rvalue *op; + ir_rvalue *result; + void *ctx = state; + /* Handle "method calls" in GLSL 1.20 - namely, array.length() */ + YYLTYPE loc = get_location(); + state->check_version(120, 300, &loc, "methods not supported"); + + const char *method; + method = field->primary_expression.identifier; + + op = field->subexpressions[0]->hir(instructions, state); + if (strcmp(method, "length") == 0) { + if (!this->expressions.is_empty()) { + _mesa_glsl_error(&loc, state, "length method takes no arguments"); + goto fail; + } + + if (op->type->is_array()) { + if (op->type->is_unsized_array()) { + if (!state->has_shader_storage_buffer_objects()) { + _mesa_glsl_error(&loc, state, "length called on unsized array" + " only available with " + "ARB_shader_storage_buffer_object"); + } + /* Calculate length of an unsized array in run-time */ + result = new(ctx) ir_expression(ir_unop_ssbo_unsized_array_length, op); + } else { + result = new(ctx) ir_constant(op->type->array_size()); + } + } else if (op->type->is_vector()) { + if (state->has_420pack()) { + /* .length() returns int. */ + result = new(ctx) ir_constant((int) op->type->vector_elements); + } else { + _mesa_glsl_error(&loc, state, "length method on matrix only available" + "with ARB_shading_language_420pack"); + goto fail; + } + } else if (op->type->is_matrix()) { + if (state->has_420pack()) { + /* .length() returns int. */ + result = new(ctx) ir_constant((int) op->type->matrix_columns); + } else { + _mesa_glsl_error(&loc, state, "length method on matrix only available" + "with ARB_shading_language_420pack"); + goto fail; + } + } else { + _mesa_glsl_error(&loc, state, "length called on scalar."); + goto fail; + } + } else { + _mesa_glsl_error(&loc, state, "unknown method: `%s'", method); + goto fail; + } + return result; +fail: + return ir_rvalue::error_value(ctx); +} + +ir_rvalue * +ast_function_expression::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + /* There are three sorts of function calls. + * + * 1. constructors - The first subexpression is an ast_type_specifier. + * 2. methods - Only the .length() method of array types. + * 3. functions - Calls to regular old functions. + * + */ + if (is_constructor()) { + const ast_type_specifier *type = (ast_type_specifier *) subexpressions[0]; + YYLTYPE loc = type->get_location(); + const char *name; + + const glsl_type *const constructor_type = type->glsl_type(& name, state); + + /* constructor_type can be NULL if a variable with the same name as the + * structure has come into scope. + */ + if (constructor_type == NULL) { + _mesa_glsl_error(& loc, state, "unknown type `%s' (structure name " + "may be shadowed by a variable with the same name)", + type->type_name); + return ir_rvalue::error_value(ctx); + } + + + /* Constructors for opaque types are illegal. + */ + if (constructor_type->contains_opaque()) { + _mesa_glsl_error(& loc, state, "cannot construct opaque type `%s'", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + if (constructor_type->is_array()) { + if (!state->check_version(120, 300, &loc, + "array constructors forbidden")) { + return ir_rvalue::error_value(ctx); + } + + return process_array_constructor(instructions, constructor_type, + & loc, &this->expressions, state); + } + + + /* There are two kinds of constructor calls. Constructors for arrays and + * structures must have the exact number of arguments with matching types + * in the correct order. These constructors follow essentially the same + * type matching rules as functions. + * + * Constructors for built-in language types, such as mat4 and vec2, are + * free form. The only requirements are that the parameters must provide + * enough values of the correct scalar type and that no arguments are + * given past the last used argument. + * + * When using the C-style initializer syntax from GLSL 4.20, constructors + * must have the exact number of arguments with matching types in the + * correct order. + */ + if (constructor_type->is_record()) { + return process_record_constructor(instructions, constructor_type, + &loc, &this->expressions, + state); + } + + if (!constructor_type->is_numeric() && !constructor_type->is_boolean()) + return ir_rvalue::error_value(ctx); + + /* Total number of components of the type being constructed. */ + const unsigned type_components = constructor_type->components(); + + /* Number of components from parameters that have actually been + * consumed. This is used to perform several kinds of error checking. + */ + unsigned components_used = 0; + + unsigned matrix_parameters = 0; + unsigned nonmatrix_parameters = 0; + exec_list actual_parameters; + + foreach_list_typed(ast_node, ast, link, &this->expressions) { + ir_rvalue *result = ast->hir(instructions, state); + + /* From page 50 (page 56 of the PDF) of the GLSL 1.50 spec: + * + * "It is an error to provide extra arguments beyond this + * last used argument." + */ + if (components_used >= type_components) { + _mesa_glsl_error(& loc, state, "too many parameters to `%s' " + "constructor", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + if (!result->type->is_numeric() && !result->type->is_boolean()) { + _mesa_glsl_error(& loc, state, "cannot construct `%s' from a " + "non-numeric data type", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + /* Count the number of matrix and nonmatrix parameters. This + * is used below to enforce some of the constructor rules. + */ + if (result->type->is_matrix()) + matrix_parameters++; + else + nonmatrix_parameters++; + + actual_parameters.push_tail(result); + components_used += result->type->components(); + } + + /* From page 28 (page 34 of the PDF) of the GLSL 1.10 spec: + * + * "It is an error to construct matrices from other matrices. This + * is reserved for future use." + */ + if (matrix_parameters > 0 + && constructor_type->is_matrix() + && !state->check_version(120, 100, &loc, + "cannot construct `%s' from a matrix", + constructor_type->name)) { + return ir_rvalue::error_value(ctx); + } + + /* From page 50 (page 56 of the PDF) of the GLSL 1.50 spec: + * + * "If a matrix argument is given to a matrix constructor, it is + * an error to have any other arguments." + */ + if ((matrix_parameters > 0) + && ((matrix_parameters + nonmatrix_parameters) > 1) + && constructor_type->is_matrix()) { + _mesa_glsl_error(& loc, state, "for matrix `%s' constructor, " + "matrix must be only parameter", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + /* From page 28 (page 34 of the PDF) of the GLSL 1.10 spec: + * + * "In these cases, there must be enough components provided in the + * arguments to provide an initializer for every component in the + * constructed value." + */ + if (components_used < type_components && components_used != 1 + && matrix_parameters == 0) { + _mesa_glsl_error(& loc, state, "too few components to construct " + "`%s'", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + /* Matrices can never be consumed as is by any constructor but matrix + * constructors. If the constructor type is not matrix, always break the + * matrix up into a series of column vectors. + */ + if (!constructor_type->is_matrix()) { + foreach_in_list_safe(ir_rvalue, matrix, &actual_parameters) { + if (!matrix->type->is_matrix()) + continue; + + /* Create a temporary containing the matrix. */ + ir_variable *var = new(ctx) ir_variable(matrix->type, "matrix_tmp", + ir_var_temporary); + instructions->push_tail(var); + instructions->push_tail(new(ctx) ir_assignment(new(ctx) + ir_dereference_variable(var), matrix, NULL)); + var->constant_value = matrix->constant_expression_value(); + + /* Replace the matrix with dereferences of its columns. */ + for (int i = 0; i < matrix->type->matrix_columns; i++) { + matrix->insert_before(new (ctx) ir_dereference_array(var, + new(ctx) ir_constant(i))); + } + matrix->remove(); + } + } + + bool all_parameters_are_constant = true; + + /* Type cast each parameter and, if possible, fold constants.*/ + foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { + const glsl_type *desired_type = + glsl_type::get_instance(constructor_type->base_type, + ir->type->vector_elements, + ir->type->matrix_columns); + ir_rvalue *result = convert_component(ir, desired_type); + + /* Attempt to convert the parameter to a constant valued expression. + * After doing so, track whether or not all the parameters to the + * constructor are trivially constant valued expressions. + */ + ir_rvalue *const constant = result->constant_expression_value(); + + if (constant != NULL) + result = constant; + else + all_parameters_are_constant = false; + + if (result != ir) { + ir->replace_with(result); + } + } + + /* If all of the parameters are trivially constant, create a + * constant representing the complete collection of parameters. + */ + if (all_parameters_are_constant) { + return new(ctx) ir_constant(constructor_type, &actual_parameters); + } else if (constructor_type->is_scalar()) { + return dereference_component((ir_rvalue *) actual_parameters.head, + 0); + } else if (constructor_type->is_vector()) { + return emit_inline_vector_constructor(constructor_type, + instructions, + &actual_parameters, + ctx); + } else { + assert(constructor_type->is_matrix()); + return emit_inline_matrix_constructor(constructor_type, + instructions, + &actual_parameters, + ctx); + } + } else if (subexpressions[0]->oper == ast_field_selection) { + return handle_method(instructions, state); + } else { + const ast_expression *id = subexpressions[0]; + const char *func_name; + YYLTYPE loc = get_location(); + exec_list actual_parameters; + ir_variable *sub_var = NULL; + ir_rvalue *array_idx = NULL; + + process_parameters(instructions, &actual_parameters, &this->expressions, + state); + + if (id->oper == ast_array_index) { + array_idx = generate_array_index(ctx, instructions, state, loc, + id->subexpressions[0], + id->subexpressions[1], &func_name, + &actual_parameters); + } else { + func_name = id->primary_expression.identifier; + } + + ir_function_signature *sig = + match_function_by_name(func_name, &actual_parameters, state); + + ir_rvalue *value = NULL; + if (sig == NULL) { + sig = match_subroutine_by_name(func_name, &actual_parameters, state, &sub_var); + } + + if (sig == NULL) { + no_matching_function_error(func_name, &loc, &actual_parameters, state); + value = ir_rvalue::error_value(ctx); + } else if (!verify_parameter_modes(state, sig, actual_parameters, this->expressions)) { + /* an error has already been emitted */ + value = ir_rvalue::error_value(ctx); + } else { + value = generate_call(instructions, sig, &actual_parameters, sub_var, array_idx, state); + if (!value) { + ir_variable *const tmp = new(ctx) ir_variable(glsl_type::void_type, + "void_var", + ir_var_temporary); + instructions->push_tail(tmp); + value = new(ctx) ir_dereference_variable(tmp); + } + } + + return value; + } + + unreachable("not reached"); +} + +bool +ast_function_expression::has_sequence_subexpression() const +{ + foreach_list_typed(const ast_node, ast, link, &this->expressions) { + if (ast->has_sequence_subexpression()) + return true; + } + + return false; +} + +ir_rvalue * +ast_aggregate_initializer::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + YYLTYPE loc = this->get_location(); + + if (!this->constructor_type) { + _mesa_glsl_error(&loc, state, "type of C-style initializer unknown"); + return ir_rvalue::error_value(ctx); + } + const glsl_type *const constructor_type = this->constructor_type; + + if (!state->has_420pack()) { + _mesa_glsl_error(&loc, state, "C-style initialization requires the " + "GL_ARB_shading_language_420pack extension"); + return ir_rvalue::error_value(ctx); + } + + if (constructor_type->is_array()) { + return process_array_constructor(instructions, constructor_type, &loc, + &this->expressions, state); + } + + if (constructor_type->is_record()) { + return process_record_constructor(instructions, constructor_type, &loc, + &this->expressions, state); + } + + return process_vec_mat_constructor(instructions, constructor_type, &loc, + &this->expressions, state); +} diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp new file mode 100644 index 0000000..dfd3196 --- /dev/null +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -0,0 +1,7583 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ast_to_hir.c + * Convert abstract syntax to to high-level intermediate reprensentation (HIR). + * + * During the conversion to HIR, the majority of the symantic checking is + * preformed on the program. This includes: + * + * * Symbol table management + * * Type checking + * * Function binding + * + * The majority of this work could be done during parsing, and the parser could + * probably generate HIR directly. However, this results in frequent changes + * to the parser code. Since we do not assume that every system this complier + * is built on will have Flex and Bison installed, we have to store the code + * generated by these tools in our version control system. In other parts of + * the system we've seen problems where a parser was changed but the generated + * code was not committed, merge conflicts where created because two developers + * had slightly different versions of Bison installed, etc. + * + * I have also noticed that running Bison generated parsers in GDB is very + * irritating. When you get a segfault on '$$ = $1->foo', you can't very + * well 'print $1' in GDB. + * + * As a result, my preference is to put as little C code as possible in the + * parser (and lexer) sources. + */ + +#include "glsl_symbol_table.h" +#include "glsl_parser_extras.h" +#include "ast.h" +#include "compiler/glsl_types.h" +#include "program/hash_table.h" +#include "main/shaderobj.h" +#include "ir.h" +#include "ir_builder.h" + +using namespace ir_builder; + +static void +detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, + exec_list *instructions); +static void +remove_per_vertex_blocks(exec_list *instructions, + _mesa_glsl_parse_state *state, ir_variable_mode mode); + +/** + * Visitor class that finds the first instance of any write-only variable that + * is ever read, if any + */ +class read_from_write_only_variable_visitor : public ir_hierarchical_visitor +{ +public: + read_from_write_only_variable_visitor() : found(NULL) + { + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (this->in_assignee) + return visit_continue; + + ir_variable *var = ir->variable_referenced(); + /* We can have image_write_only set on both images and buffer variables, + * but in the former there is a distinction between reads from + * the variable itself (write_only) and from the memory they point to + * (image_write_only), while in the case of buffer variables there is + * no such distinction, that is why this check here is limited to + * buffer variables alone. + */ + if (!var || var->data.mode != ir_var_shader_storage) + return visit_continue; + + if (var->data.image_write_only) { + found = var; + return visit_stop; + } + + return visit_continue; + } + + ir_variable *get_variable() { + return found; + } + + virtual ir_visitor_status visit_enter(ir_expression *ir) + { + /* .length() doesn't actually read anything */ + if (ir->operation == ir_unop_ssbo_unsized_array_length) + return visit_continue_with_parent; + + return visit_continue; + } + +private: + ir_variable *found; +}; + +void +_mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) +{ + _mesa_glsl_initialize_variables(instructions, state); + + state->symbols->separate_function_namespace = state->language_version == 110; + + state->current_function = NULL; + + state->toplevel_ir = instructions; + + state->gs_input_prim_type_specified = false; + state->tcs_output_vertices_specified = false; + state->cs_input_local_size_specified = false; + + /* Section 4.2 of the GLSL 1.20 specification states: + * "The built-in functions are scoped in a scope outside the global scope + * users declare global variables in. That is, a shader's global scope, + * available for user-defined functions and global variables, is nested + * inside the scope containing the built-in functions." + * + * Since built-in functions like ftransform() access built-in variables, + * it follows that those must be in the outer scope as well. + * + * We push scope here to create this nesting effect...but don't pop. + * This way, a shader's globals are still in the symbol table for use + * by the linker. + */ + state->symbols->push_scope(); + + foreach_list_typed (ast_node, ast, link, & state->translation_unit) + ast->hir(instructions, state); + + detect_recursion_unlinked(state, instructions); + detect_conflicting_assignments(state, instructions); + + state->toplevel_ir = NULL; + + /* Move all of the variable declarations to the front of the IR list, and + * reverse the order. This has the (intended!) side effect that vertex + * shader inputs and fragment shader outputs will appear in the IR in the + * same order that they appeared in the shader code. This results in the + * locations being assigned in the declared order. Many (arguably buggy) + * applications depend on this behavior, and it matches what nearly all + * other drivers do. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *const var = node->as_variable(); + + if (var == NULL) + continue; + + var->remove(); + instructions->push_head(var); + } + + /* Figure out if gl_FragCoord is actually used in fragment shader */ + ir_variable *const var = state->symbols->get_variable("gl_FragCoord"); + if (var != NULL) + state->fs_uses_gl_fragcoord = var->data.used; + + /* From section 7.1 (Built-In Language Variables) of the GLSL 4.10 spec: + * + * If multiple shaders using members of a built-in block belonging to + * the same interface are linked together in the same program, they + * must all redeclare the built-in block in the same way, as described + * in section 4.3.7 "Interface Blocks" for interface block matching, or + * a link error will result. + * + * The phrase "using members of a built-in block" implies that if two + * shaders are linked together and one of them *does not use* any members + * of the built-in block, then that shader does not need to have a matching + * redeclaration of the built-in block. + * + * This appears to be a clarification to the behaviour established for + * gl_PerVertex by GLSL 1.50, therefore implement it regardless of GLSL + * version. + * + * The definition of "interface" in section 4.3.7 that applies here is as + * follows: + * + * The boundary between adjacent programmable pipeline stages: This + * spans all the outputs in all compilation units of the first stage + * and all the inputs in all compilation units of the second stage. + * + * Therefore this rule applies to both inter- and intra-stage linking. + * + * The easiest way to implement this is to check whether the shader uses + * gl_PerVertex right after ast-to-ir conversion, and if it doesn't, simply + * remove all the relevant variable declaration from the IR, so that the + * linker won't see them and complain about mismatches. + */ + remove_per_vertex_blocks(instructions, state, ir_var_shader_in); + remove_per_vertex_blocks(instructions, state, ir_var_shader_out); + + /* Check that we don't have reads from write-only variables */ + read_from_write_only_variable_visitor v; + v.run(instructions); + ir_variable *error_var = v.get_variable(); + if (error_var) { + /* It would be nice to have proper location information, but for that + * we would need to check this as we process each kind of AST node + */ + YYLTYPE loc; + memset(&loc, 0, sizeof(loc)); + _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'", + error_var->name); + } +} + + +static ir_expression_operation +get_conversion_operation(const glsl_type *to, const glsl_type *from, + struct _mesa_glsl_parse_state *state) +{ + switch (to->base_type) { + case GLSL_TYPE_FLOAT: + switch (from->base_type) { + case GLSL_TYPE_INT: return ir_unop_i2f; + case GLSL_TYPE_UINT: return ir_unop_u2f; + case GLSL_TYPE_DOUBLE: return ir_unop_d2f; + default: return (ir_expression_operation)0; + } + + case GLSL_TYPE_UINT: + if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) + return (ir_expression_operation)0; + switch (from->base_type) { + case GLSL_TYPE_INT: return ir_unop_i2u; + default: return (ir_expression_operation)0; + } + + case GLSL_TYPE_DOUBLE: + if (!state->has_double()) + return (ir_expression_operation)0; + switch (from->base_type) { + case GLSL_TYPE_INT: return ir_unop_i2d; + case GLSL_TYPE_UINT: return ir_unop_u2d; + case GLSL_TYPE_FLOAT: return ir_unop_f2d; + default: return (ir_expression_operation)0; + } + + default: return (ir_expression_operation)0; + } +} + + +/** + * If a conversion is available, convert one operand to a different type + * + * The \c from \c ir_rvalue is converted "in place". + * + * \param to Type that the operand it to be converted to + * \param from Operand that is being converted + * \param state GLSL compiler state + * + * \return + * If a conversion is possible (or unnecessary), \c true is returned. + * Otherwise \c false is returned. + */ +bool +apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + if (to->base_type == from->type->base_type) + return true; + + /* Prior to GLSL 1.20, there are no implicit conversions */ + if (!state->is_version(120, 0)) + return false; + + /* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec: + * + * "There are no implicit array or structure conversions. For + * example, an array of int cannot be implicitly converted to an + * array of float. + */ + if (!to->is_numeric() || !from->type->is_numeric()) + return false; + + /* We don't actually want the specific type `to`, we want a type + * with the same base type as `to`, but the same vector width as + * `from`. + */ + to = glsl_type::get_instance(to->base_type, from->type->vector_elements, + from->type->matrix_columns); + + ir_expression_operation op = get_conversion_operation(to, from->type, state); + if (op) { + from = new(ctx) ir_expression(op, to, from, NULL); + return true; + } else { + return false; + } +} + + +static const struct glsl_type * +arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + bool multiply, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + /* From GLSL 1.50 spec, page 56: + * + * "The arithmetic binary operators add (+), subtract (-), + * multiply (*), and divide (/) operate on integer and + * floating-point scalars, vectors, and matrices." + */ + if (!type_a->is_numeric() || !type_b->is_numeric()) { + _mesa_glsl_error(loc, state, + "operands to arithmetic operators must be numeric"); + return glsl_type::error_type; + } + + + /* "If one operand is floating-point based and the other is + * not, then the conversions from Section 4.1.10 "Implicit + * Conversions" are applied to the non-floating-point-based operand." + */ + if (!apply_implicit_conversion(type_a, value_b, state) + && !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "arithmetic operator"); + return glsl_type::error_type; + } + type_a = value_a->type; + type_b = value_b->type; + + /* "If the operands are integer types, they must both be signed or + * both be unsigned." + * + * From this rule and the preceeding conversion it can be inferred that + * both types must be GLSL_TYPE_FLOAT, or GLSL_TYPE_UINT, or GLSL_TYPE_INT. + * The is_numeric check above already filtered out the case where either + * type is not one of these, so now the base types need only be tested for + * equality. + */ + if (type_a->base_type != type_b->base_type) { + _mesa_glsl_error(loc, state, + "base type mismatch for arithmetic operator"); + return glsl_type::error_type; + } + + /* "All arithmetic binary operators result in the same fundamental type + * (signed integer, unsigned integer, or floating-point) as the + * operands they operate on, after operand type conversion. After + * conversion, the following cases are valid + * + * * The two operands are scalars. In this case the operation is + * applied, resulting in a scalar." + */ + if (type_a->is_scalar() && type_b->is_scalar()) + return type_a; + + /* "* One operand is a scalar, and the other is a vector or matrix. + * In this case, the scalar operation is applied independently to each + * component of the vector or matrix, resulting in the same size + * vector or matrix." + */ + if (type_a->is_scalar()) { + if (!type_b->is_scalar()) + return type_b; + } else if (type_b->is_scalar()) { + return type_a; + } + + /* All of the combinations of <scalar, scalar>, <vector, scalar>, + * <scalar, vector>, <scalar, matrix>, and <matrix, scalar> have been + * handled. + */ + assert(!type_a->is_scalar()); + assert(!type_b->is_scalar()); + + /* "* The two operands are vectors of the same size. In this case, the + * operation is done component-wise resulting in the same size + * vector." + */ + if (type_a->is_vector() && type_b->is_vector()) { + if (type_a == type_b) { + return type_a; + } else { + _mesa_glsl_error(loc, state, + "vector size mismatch for arithmetic operator"); + return glsl_type::error_type; + } + } + + /* All of the combinations of <scalar, scalar>, <vector, scalar>, + * <scalar, vector>, <scalar, matrix>, <matrix, scalar>, and + * <vector, vector> have been handled. At least one of the operands must + * be matrix. Further, since there are no integer matrix types, the base + * type of both operands must be float. + */ + assert(type_a->is_matrix() || type_b->is_matrix()); + assert(type_a->base_type == GLSL_TYPE_FLOAT || + type_a->base_type == GLSL_TYPE_DOUBLE); + assert(type_b->base_type == GLSL_TYPE_FLOAT || + type_b->base_type == GLSL_TYPE_DOUBLE); + + /* "* The operator is add (+), subtract (-), or divide (/), and the + * operands are matrices with the same number of rows and the same + * number of columns. In this case, the operation is done component- + * wise resulting in the same size matrix." + * * The operator is multiply (*), where both operands are matrices or + * one operand is a vector and the other a matrix. A right vector + * operand is treated as a column vector and a left vector operand as a + * row vector. In all these cases, it is required that the number of + * columns of the left operand is equal to the number of rows of the + * right operand. Then, the multiply (*) operation does a linear + * algebraic multiply, yielding an object that has the same number of + * rows as the left operand and the same number of columns as the right + * operand. Section 5.10 "Vector and Matrix Operations" explains in + * more detail how vectors and matrices are operated on." + */ + if (! multiply) { + if (type_a == type_b) + return type_a; + } else { + const glsl_type *type = glsl_type::get_mul_type(type_a, type_b); + + if (type == glsl_type::error_type) { + _mesa_glsl_error(loc, state, + "size mismatch for matrix multiplication"); + } + + return type; + } + + + /* "All other cases are illegal." + */ + _mesa_glsl_error(loc, state, "type mismatch"); + return glsl_type::error_type; +} + + +static const struct glsl_type * +unary_arithmetic_result_type(const struct glsl_type *type, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + /* From GLSL 1.50 spec, page 57: + * + * "The arithmetic unary operators negate (-), post- and pre-increment + * and decrement (-- and ++) operate on integer or floating-point + * values (including vectors and matrices). All unary operators work + * component-wise on their operands. These result with the same type + * they operated on." + */ + if (!type->is_numeric()) { + _mesa_glsl_error(loc, state, + "operands to arithmetic operators must be numeric"); + return glsl_type::error_type; + } + + return type; +} + +/** + * \brief Return the result type of a bit-logic operation. + * + * If the given types to the bit-logic operator are invalid, return + * glsl_type::error_type. + * + * \param value_a LHS of bit-logic op + * \param value_b RHS of bit-logic op + */ +static const struct glsl_type * +bit_logic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + ast_operators op, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + if (!state->check_bitwise_operations_allowed(loc)) { + return glsl_type::error_type; + } + + /* From page 50 (page 56 of PDF) of GLSL 1.30 spec: + * + * "The bitwise operators and (&), exclusive-or (^), and inclusive-or + * (|). The operands must be of type signed or unsigned integers or + * integer vectors." + */ + if (!type_a->is_integer()) { + _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + if (!type_b->is_integer()) { + _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* Prior to GLSL 4.0 / GL_ARB_gpu_shader5, implicit conversions didn't + * make sense for bitwise operations, as they don't operate on floats. + * + * GLSL 4.0 added implicit int -> uint conversions, which are relevant + * here. It wasn't clear whether or not we should apply them to bitwise + * operations. However, Khronos has decided that they should in future + * language revisions. Applications also rely on this behavior. We opt + * to apply them in general, but issue a portability warning. + * + * See https://www.khronos.org/bugzilla/show_bug.cgi?id=1405 + */ + if (type_a->base_type != type_b->base_type) { + if (!apply_implicit_conversion(type_a, value_b, state) + && !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "`%s` operator", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } else { + _mesa_glsl_warning(loc, state, + "some implementations may not support implicit " + "int -> uint conversions for `%s' operators; " + "consider casting explicitly for portability", + ast_expression::operator_string(op)); + } + type_a = value_a->type; + type_b = value_b->type; + } + + /* "The fundamental types of the operands (signed or unsigned) must + * match," + */ + if (type_a->base_type != type_b->base_type) { + _mesa_glsl_error(loc, state, "operands of `%s' must have the same " + "base type", ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "The operands cannot be vectors of differing size." */ + if (type_a->is_vector() && + type_b->is_vector() && + type_a->vector_elements != type_b->vector_elements) { + _mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of " + "different sizes", ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "If one operand is a scalar and the other a vector, the scalar is + * applied component-wise to the vector, resulting in the same type as + * the vector. The fundamental types of the operands [...] will be the + * resulting fundamental type." + */ + if (type_a->is_scalar()) + return type_b; + else + return type_a; +} + +static const struct glsl_type * +modulus_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + if (!state->check_version(130, 300, loc, "operator '%%' is reserved")) { + return glsl_type::error_type; + } + + /* Section 5.9 (Expressions) of the GLSL 4.00 specification says: + * + * "The operator modulus (%) operates on signed or unsigned integers or + * integer vectors." + */ + if (!type_a->is_integer()) { + _mesa_glsl_error(loc, state, "LHS of operator %% must be an integer"); + return glsl_type::error_type; + } + if (!type_b->is_integer()) { + _mesa_glsl_error(loc, state, "RHS of operator %% must be an integer"); + return glsl_type::error_type; + } + + /* "If the fundamental types in the operands do not match, then the + * conversions from section 4.1.10 "Implicit Conversions" are applied + * to create matching types." + * + * Note that GLSL 4.00 (and GL_ARB_gpu_shader5) introduced implicit + * int -> uint conversion rules. Prior to that, there were no implicit + * conversions. So it's harmless to apply them universally - no implicit + * conversions will exist. If the types don't match, we'll receive false, + * and raise an error, satisfying the GLSL 1.50 spec, page 56: + * + * "The operand types must both be signed or unsigned." + */ + if (!apply_implicit_conversion(type_a, value_b, state) && + !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "modulus (%%) operator"); + return glsl_type::error_type; + } + type_a = value_a->type; + type_b = value_b->type; + + /* "The operands cannot be vectors of differing size. If one operand is + * a scalar and the other vector, then the scalar is applied component- + * wise to the vector, resulting in the same type as the vector. If both + * are vectors of the same size, the result is computed component-wise." + */ + if (type_a->is_vector()) { + if (!type_b->is_vector() + || (type_a->vector_elements == type_b->vector_elements)) + return type_a; + } else + return type_b; + + /* "The operator modulus (%) is not defined for any other data types + * (non-integer types)." + */ + _mesa_glsl_error(loc, state, "type mismatch"); + return glsl_type::error_type; +} + + +static const struct glsl_type * +relational_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + /* From GLSL 1.50 spec, page 56: + * "The relational operators greater than (>), less than (<), greater + * than or equal (>=), and less than or equal (<=) operate only on + * scalar integer and scalar floating-point expressions." + */ + if (!type_a->is_numeric() + || !type_b->is_numeric() + || !type_a->is_scalar() + || !type_b->is_scalar()) { + _mesa_glsl_error(loc, state, + "operands to relational operators must be scalar and " + "numeric"); + return glsl_type::error_type; + } + + /* "Either the operands' types must match, or the conversions from + * Section 4.1.10 "Implicit Conversions" will be applied to the integer + * operand, after which the types must match." + */ + if (!apply_implicit_conversion(type_a, value_b, state) + && !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "relational operator"); + return glsl_type::error_type; + } + type_a = value_a->type; + type_b = value_b->type; + + if (type_a->base_type != type_b->base_type) { + _mesa_glsl_error(loc, state, "base type mismatch"); + return glsl_type::error_type; + } + + /* "The result is scalar Boolean." + */ + return glsl_type::bool_type; +} + +/** + * \brief Return the result type of a bit-shift operation. + * + * If the given types to the bit-shift operator are invalid, return + * glsl_type::error_type. + * + * \param type_a Type of LHS of bit-shift op + * \param type_b Type of RHS of bit-shift op + */ +static const struct glsl_type * +shift_result_type(const struct glsl_type *type_a, + const struct glsl_type *type_b, + ast_operators op, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + if (!state->check_bitwise_operations_allowed(loc)) { + return glsl_type::error_type; + } + + /* From page 50 (page 56 of the PDF) of the GLSL 1.30 spec: + * + * "The shift operators (<<) and (>>). For both operators, the operands + * must be signed or unsigned integers or integer vectors. One operand + * can be signed while the other is unsigned." + */ + if (!type_a->is_integer()) { + _mesa_glsl_error(loc, state, "LHS of operator %s must be an integer or " + "integer vector", ast_expression::operator_string(op)); + return glsl_type::error_type; + + } + if (!type_b->is_integer()) { + _mesa_glsl_error(loc, state, "RHS of operator %s must be an integer or " + "integer vector", ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "If the first operand is a scalar, the second operand has to be + * a scalar as well." + */ + if (type_a->is_scalar() && !type_b->is_scalar()) { + _mesa_glsl_error(loc, state, "if the first operand of %s is scalar, the " + "second must be scalar as well", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* If both operands are vectors, check that they have same number of + * elements. + */ + if (type_a->is_vector() && + type_b->is_vector() && + type_a->vector_elements != type_b->vector_elements) { + _mesa_glsl_error(loc, state, "vector operands to operator %s must " + "have same number of elements", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "In all cases, the resulting type will be the same type as the left + * operand." + */ + return type_a; +} + +/** + * Returns the innermost array index expression in an rvalue tree. + * This is the largest indexing level -- if an array of blocks, then + * it is the block index rather than an indexing expression for an + * array-typed member of an array of blocks. + */ +static ir_rvalue * +find_innermost_array_index(ir_rvalue *rv) +{ + ir_dereference_array *last = NULL; + while (rv) { + if (rv->as_dereference_array()) { + last = rv->as_dereference_array(); + rv = last->array; + } else if (rv->as_dereference_record()) + rv = rv->as_dereference_record()->record; + else if (rv->as_swizzle()) + rv = rv->as_swizzle()->val; + else + rv = NULL; + } + + if (last) + return last->array_index; + + return NULL; +} + +/** + * Validates that a value can be assigned to a location with a specified type + * + * Validates that \c rhs can be assigned to some location. If the types are + * not an exact match but an automatic conversion is possible, \c rhs will be + * converted. + * + * \return + * \c NULL if \c rhs cannot be assigned to a location with type \c lhs_type. + * Otherwise the actual RHS to be assigned will be returned. This may be + * \c rhs, or it may be \c rhs after some type conversion. + * + * \note + * In addition to being used for assignments, this function is used to + * type-check return values. + */ +static ir_rvalue * +validate_assignment(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_rvalue *lhs, + ir_rvalue *rhs, bool is_initializer) +{ + /* If there is already some error in the RHS, just return it. Anything + * else will lead to an avalanche of error message back to the user. + */ + if (rhs->type->is_error()) + return rhs; + + /* In the Tessellation Control Shader: + * If a per-vertex output variable is used as an l-value, it is an error + * if the expression indicating the vertex number is not the identifier + * `gl_InvocationID`. + */ + if (state->stage == MESA_SHADER_TESS_CTRL) { + ir_variable *var = lhs->variable_referenced(); + if (var->data.mode == ir_var_shader_out && !var->data.patch) { + ir_rvalue *index = find_innermost_array_index(lhs); + ir_variable *index_var = index ? index->variable_referenced() : NULL; + if (!index_var || strcmp(index_var->name, "gl_InvocationID") != 0) { + _mesa_glsl_error(&loc, state, + "Tessellation control shader outputs can only " + "be indexed by gl_InvocationID"); + return NULL; + } + } + } + + /* If the types are identical, the assignment can trivially proceed. + */ + if (rhs->type == lhs->type) + return rhs; + + /* If the array element types are the same and the LHS is unsized, + * the assignment is okay for initializers embedded in variable + * declarations. + * + * Note: Whole-array assignments are not permitted in GLSL 1.10, but this + * is handled by ir_dereference::is_lvalue. + */ + const glsl_type *lhs_t = lhs->type; + const glsl_type *rhs_t = rhs->type; + bool unsized_array = false; + while(lhs_t->is_array()) { + if (rhs_t == lhs_t) + break; /* the rest of the inner arrays match so break out early */ + if (!rhs_t->is_array()) { + unsized_array = false; + break; /* number of dimensions mismatch */ + } + if (lhs_t->length == rhs_t->length) { + lhs_t = lhs_t->fields.array; + rhs_t = rhs_t->fields.array; + continue; + } else if (lhs_t->is_unsized_array()) { + unsized_array = true; + } else { + unsized_array = false; + break; /* sized array mismatch */ + } + lhs_t = lhs_t->fields.array; + rhs_t = rhs_t->fields.array; + } + if (unsized_array) { + if (is_initializer) { + return rhs; + } else { + _mesa_glsl_error(&loc, state, + "implicitly sized arrays cannot be assigned"); + return NULL; + } + } + + /* Check for implicit conversion in GLSL 1.20 */ + if (apply_implicit_conversion(lhs->type, rhs, state)) { + if (rhs->type == lhs->type) + return rhs; + } + + _mesa_glsl_error(&loc, state, + "%s of type %s cannot be assigned to " + "variable of type %s", + is_initializer ? "initializer" : "value", + rhs->type->name, lhs->type->name); + + return NULL; +} + +static void +mark_whole_array_access(ir_rvalue *access) +{ + ir_dereference_variable *deref = access->as_dereference_variable(); + + if (deref && deref->var) { + deref->var->data.max_array_access = deref->type->length - 1; + } +} + +static bool +do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, + const char *non_lvalue_description, + ir_rvalue *lhs, ir_rvalue *rhs, + ir_rvalue **out_rvalue, bool needs_rvalue, + bool is_initializer, + YYLTYPE lhs_loc) +{ + void *ctx = state; + bool error_emitted = (lhs->type->is_error() || rhs->type->is_error()); + + ir_variable *lhs_var = lhs->variable_referenced(); + if (lhs_var) + lhs_var->data.assigned = true; + + if (!error_emitted) { + if (non_lvalue_description != NULL) { + _mesa_glsl_error(&lhs_loc, state, + "assignment to %s", + non_lvalue_description); + error_emitted = true; + } else if (lhs_var != NULL && (lhs_var->data.read_only || + (lhs_var->data.mode == ir_var_shader_storage && + lhs_var->data.image_read_only))) { + /* We can have image_read_only set on both images and buffer variables, + * but in the former there is a distinction between assignments to + * the variable itself (read_only) and to the memory they point to + * (image_read_only), while in the case of buffer variables there is + * no such distinction, that is why this check here is limited to + * buffer variables alone. + */ + _mesa_glsl_error(&lhs_loc, state, + "assignment to read-only variable '%s'", + lhs_var->name); + error_emitted = true; + } else if (lhs->type->is_array() && + !state->check_version(120, 300, &lhs_loc, + "whole array assignment forbidden")) { + /* From page 32 (page 38 of the PDF) of the GLSL 1.10 spec: + * + * "Other binary or unary expressions, non-dereferenced + * arrays, function names, swizzles with repeated fields, + * and constants cannot be l-values." + * + * The restriction on arrays is lifted in GLSL 1.20 and GLSL ES 3.00. + */ + error_emitted = true; + } else if (!lhs->is_lvalue()) { + _mesa_glsl_error(& lhs_loc, state, "non-lvalue in assignment"); + error_emitted = true; + } + } + + ir_rvalue *new_rhs = + validate_assignment(state, lhs_loc, lhs, rhs, is_initializer); + if (new_rhs != NULL) { + rhs = new_rhs; + + /* If the LHS array was not declared with a size, it takes it size from + * the RHS. If the LHS is an l-value and a whole array, it must be a + * dereference of a variable. Any other case would require that the LHS + * is either not an l-value or not a whole array. + */ + if (lhs->type->is_unsized_array()) { + ir_dereference *const d = lhs->as_dereference(); + + assert(d != NULL); + + ir_variable *const var = d->variable_referenced(); + + assert(var != NULL); + + if (var->data.max_array_access >= unsigned(rhs->type->array_size())) { + /* FINISHME: This should actually log the location of the RHS. */ + _mesa_glsl_error(& lhs_loc, state, "array size must be > %u due to " + "previous access", + var->data.max_array_access); + } + + var->type = glsl_type::get_array_instance(lhs->type->fields.array, + rhs->type->array_size()); + d->type = var->type; + } + if (lhs->type->is_array()) { + mark_whole_array_access(rhs); + mark_whole_array_access(lhs); + } + } + + /* Most callers of do_assignment (assign, add_assign, pre_inc/dec, + * but not post_inc) need the converted assigned value as an rvalue + * to handle things like: + * + * i = j += 1; + */ + if (needs_rvalue) { + ir_variable *var = new(ctx) ir_variable(rhs->type, "assignment_tmp", + ir_var_temporary); + instructions->push_tail(var); + instructions->push_tail(assign(var, rhs)); + + if (!error_emitted) { + ir_dereference_variable *deref_var = new(ctx) ir_dereference_variable(var); + instructions->push_tail(new(ctx) ir_assignment(lhs, deref_var)); + } + ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var); + + *out_rvalue = rvalue; + } else { + if (!error_emitted) + instructions->push_tail(new(ctx) ir_assignment(lhs, rhs)); + *out_rvalue = NULL; + } + + return error_emitted; +} + +static ir_rvalue * +get_lvalue_copy(exec_list *instructions, ir_rvalue *lvalue) +{ + void *ctx = ralloc_parent(lvalue); + ir_variable *var; + + var = new(ctx) ir_variable(lvalue->type, "_post_incdec_tmp", + ir_var_temporary); + instructions->push_tail(var); + + instructions->push_tail(new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var), + lvalue)); + + return new(ctx) ir_dereference_variable(var); +} + + +ir_rvalue * +ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) +{ + (void) instructions; + (void) state; + + return NULL; +} + +bool +ast_node::has_sequence_subexpression() const +{ + return false; +} + +void +ast_function_expression::hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + (void)hir(instructions, state); +} + +void +ast_aggregate_initializer::hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + (void)hir(instructions, state); +} + +static ir_rvalue * +do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) +{ + int join_op; + ir_rvalue *cmp = NULL; + + if (operation == ir_binop_all_equal) + join_op = ir_binop_logic_and; + else + join_op = ir_binop_logic_or; + + switch (op0->type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: + return new(mem_ctx) ir_expression(operation, op0, op1); + + case GLSL_TYPE_ARRAY: { + for (unsigned int i = 0; i < op0->type->length; i++) { + ir_rvalue *e0, *e1, *result; + + e0 = new(mem_ctx) ir_dereference_array(op0->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + e1 = new(mem_ctx) ir_dereference_array(op1->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + result = do_comparison(mem_ctx, operation, e0, e1); + + if (cmp) { + cmp = new(mem_ctx) ir_expression(join_op, cmp, result); + } else { + cmp = result; + } + } + + mark_whole_array_access(op0); + mark_whole_array_access(op1); + break; + } + + case GLSL_TYPE_STRUCT: { + for (unsigned int i = 0; i < op0->type->length; i++) { + ir_rvalue *e0, *e1, *result; + const char *field_name = op0->type->fields.structure[i].name; + + e0 = new(mem_ctx) ir_dereference_record(op0->clone(mem_ctx, NULL), + field_name); + e1 = new(mem_ctx) ir_dereference_record(op1->clone(mem_ctx, NULL), + field_name); + result = do_comparison(mem_ctx, operation, e0, e1); + + if (cmp) { + cmp = new(mem_ctx) ir_expression(join_op, cmp, result); + } else { + cmp = result; + } + } + break; + } + + case GLSL_TYPE_ERROR: + case GLSL_TYPE_VOID: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_SUBROUTINE: + /* I assume a comparison of a struct containing a sampler just + * ignores the sampler present in the type. + */ + break; + } + + if (cmp == NULL) + cmp = new(mem_ctx) ir_constant(true); + + return cmp; +} + +/* For logical operations, we want to ensure that the operands are + * scalar booleans. If it isn't, emit an error and return a constant + * boolean to avoid triggering cascading error messages. + */ +ir_rvalue * +get_scalar_boolean_operand(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + ast_expression *parent_expr, + int operand, + const char *operand_name, + bool *error_emitted) +{ + ast_expression *expr = parent_expr->subexpressions[operand]; + void *ctx = state; + ir_rvalue *val = expr->hir(instructions, state); + + if (val->type->is_boolean() && val->type->is_scalar()) + return val; + + if (!*error_emitted) { + YYLTYPE loc = expr->get_location(); + _mesa_glsl_error(&loc, state, "%s of `%s' must be scalar boolean", + operand_name, + parent_expr->operator_string(parent_expr->oper)); + *error_emitted = true; + } + + return new(ctx) ir_constant(true); +} + +/** + * If name refers to a builtin array whose maximum allowed size is less than + * size, report an error and return true. Otherwise return false. + */ +void +check_builtin_array_max_size(const char *name, unsigned size, + YYLTYPE loc, struct _mesa_glsl_parse_state *state) +{ + if ((strcmp("gl_TexCoord", name) == 0) + && (size > state->Const.MaxTextureCoords)) { + /* From page 54 (page 60 of the PDF) of the GLSL 1.20 spec: + * + * "The size [of gl_TexCoord] can be at most + * gl_MaxTextureCoords." + */ + _mesa_glsl_error(&loc, state, "`gl_TexCoord' array size cannot " + "be larger than gl_MaxTextureCoords (%u)", + state->Const.MaxTextureCoords); + } else if (strcmp("gl_ClipDistance", name) == 0 + && size > state->Const.MaxClipPlanes) { + /* From section 7.1 (Vertex Shader Special Variables) of the + * GLSL 1.30 spec: + * + * "The gl_ClipDistance array is predeclared as unsized and + * must be sized by the shader either redeclaring it with a + * size or indexing it only with integral constant + * expressions. ... The size can be at most + * gl_MaxClipDistances." + */ + _mesa_glsl_error(&loc, state, "`gl_ClipDistance' array size cannot " + "be larger than gl_MaxClipDistances (%u)", + state->Const.MaxClipPlanes); + } +} + +/** + * Create the constant 1, of a which is appropriate for incrementing and + * decrementing values of the given GLSL type. For example, if type is vec4, + * this creates a constant value of 1.0 having type float. + * + * If the given type is invalid for increment and decrement operators, return + * a floating point 1--the error will be detected later. + */ +static ir_rvalue * +constant_one_for_inc_dec(void *ctx, const glsl_type *type) +{ + switch (type->base_type) { + case GLSL_TYPE_UINT: + return new(ctx) ir_constant((unsigned) 1); + case GLSL_TYPE_INT: + return new(ctx) ir_constant(1); + default: + case GLSL_TYPE_FLOAT: + return new(ctx) ir_constant(1.0f); + } +} + +ir_rvalue * +ast_expression::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + return do_hir(instructions, state, true); +} + +void +ast_expression::hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + do_hir(instructions, state, false); +} + +ir_rvalue * +ast_expression::do_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + bool needs_rvalue) +{ + void *ctx = state; + static const int operations[AST_NUM_OPERATORS] = { + -1, /* ast_assign doesn't convert to ir_expression. */ + -1, /* ast_plus doesn't convert to ir_expression. */ + ir_unop_neg, + ir_binop_add, + ir_binop_sub, + ir_binop_mul, + ir_binop_div, + ir_binop_mod, + ir_binop_lshift, + ir_binop_rshift, + ir_binop_less, + ir_binop_greater, + ir_binop_lequal, + ir_binop_gequal, + ir_binop_all_equal, + ir_binop_any_nequal, + ir_binop_bit_and, + ir_binop_bit_xor, + ir_binop_bit_or, + ir_unop_bit_not, + ir_binop_logic_and, + ir_binop_logic_xor, + ir_binop_logic_or, + ir_unop_logic_not, + + /* Note: The following block of expression types actually convert + * to multiple IR instructions. + */ + ir_binop_mul, /* ast_mul_assign */ + ir_binop_div, /* ast_div_assign */ + ir_binop_mod, /* ast_mod_assign */ + ir_binop_add, /* ast_add_assign */ + ir_binop_sub, /* ast_sub_assign */ + ir_binop_lshift, /* ast_ls_assign */ + ir_binop_rshift, /* ast_rs_assign */ + ir_binop_bit_and, /* ast_and_assign */ + ir_binop_bit_xor, /* ast_xor_assign */ + ir_binop_bit_or, /* ast_or_assign */ + + -1, /* ast_conditional doesn't convert to ir_expression. */ + ir_binop_add, /* ast_pre_inc. */ + ir_binop_sub, /* ast_pre_dec. */ + ir_binop_add, /* ast_post_inc. */ + ir_binop_sub, /* ast_post_dec. */ + -1, /* ast_field_selection doesn't conv to ir_expression. */ + -1, /* ast_array_index doesn't convert to ir_expression. */ + -1, /* ast_function_call doesn't conv to ir_expression. */ + -1, /* ast_identifier doesn't convert to ir_expression. */ + -1, /* ast_int_constant doesn't convert to ir_expression. */ + -1, /* ast_uint_constant doesn't conv to ir_expression. */ + -1, /* ast_float_constant doesn't conv to ir_expression. */ + -1, /* ast_bool_constant doesn't conv to ir_expression. */ + -1, /* ast_sequence doesn't convert to ir_expression. */ + }; + ir_rvalue *result = NULL; + ir_rvalue *op[3]; + const struct glsl_type *type; /* a temporary variable for switch cases */ + bool error_emitted = false; + YYLTYPE loc; + + loc = this->get_location(); + + switch (this->oper) { + case ast_aggregate: + assert(!"ast_aggregate: Should never get here."); + break; + + case ast_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0], op[1], &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_plus: + op[0] = this->subexpressions[0]->hir(instructions, state); + + type = unary_arithmetic_result_type(op[0]->type, state, & loc); + + error_emitted = type->is_error(); + + result = op[0]; + break; + + case ast_neg: + op[0] = this->subexpressions[0]->hir(instructions, state); + + type = unary_arithmetic_result_type(op[0]->type, state, & loc); + + error_emitted = type->is_error(); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], NULL); + break; + + case ast_add: + case ast_sub: + case ast_mul: + case ast_div: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = arithmetic_result_type(op[0], op[1], + (this->oper == ast_mul), + state, & loc); + error_emitted = type->is_error(); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + break; + + case ast_mod: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = modulus_result_type(op[0], op[1], state, &loc); + + assert(operations[this->oper] == ir_binop_mod); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = type->is_error(); + break; + + case ast_lshift: + case ast_rshift: + if (!state->check_bitwise_operations_allowed(&loc)) { + error_emitted = true; + } + + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = shift_result_type(op[0]->type, op[1]->type, this->oper, state, + &loc); + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); + break; + + case ast_less: + case ast_greater: + case ast_lequal: + case ast_gequal: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = relational_result_type(op[0], op[1], state, & loc); + + /* The relational operators must either generate an error or result + * in a scalar boolean. See page 57 of the GLSL 1.50 spec. + */ + assert(type->is_error() + || ((type->base_type == GLSL_TYPE_BOOL) + && type->is_scalar())); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = type->is_error(); + break; + + case ast_nequal: + case ast_equal: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + /* From page 58 (page 64 of the PDF) of the GLSL 1.50 spec: + * + * "The equality operators equal (==), and not equal (!=) + * operate on all types. They result in a scalar Boolean. If + * the operand types do not match, then there must be a + * conversion from Section 4.1.10 "Implicit Conversions" + * applied to one operand that can make them match, in which + * case this conversion is done." + */ + + if (op[0]->type == glsl_type::void_type || op[1]->type == glsl_type::void_type) { + _mesa_glsl_error(& loc, state, "`%s': wrong operand types: " + "no operation `%1$s' exists that takes a left-hand " + "operand of type 'void' or a right operand of type " + "'void'", (this->oper == ast_equal) ? "==" : "!="); + error_emitted = true; + } else if ((!apply_implicit_conversion(op[0]->type, op[1], state) + && !apply_implicit_conversion(op[1]->type, op[0], state)) + || (op[0]->type != op[1]->type)) { + _mesa_glsl_error(& loc, state, "operands of `%s' must have the same " + "type", (this->oper == ast_equal) ? "==" : "!="); + error_emitted = true; + } else if ((op[0]->type->is_array() || op[1]->type->is_array()) && + !state->check_version(120, 300, &loc, + "array comparisons forbidden")) { + error_emitted = true; + } else if ((op[0]->type->contains_opaque() || + op[1]->type->contains_opaque())) { + _mesa_glsl_error(&loc, state, "opaque type comparisons forbidden"); + error_emitted = true; + } + + if (error_emitted) { + result = new(ctx) ir_constant(false); + } else { + result = do_comparison(ctx, operations[this->oper], op[0], op[1]); + assert(result->type == glsl_type::bool_type); + } + break; + + case ast_bit_and: + case ast_bit_xor: + case ast_bit_or: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc); + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); + break; + + case ast_bit_not: + op[0] = this->subexpressions[0]->hir(instructions, state); + + if (!state->check_bitwise_operations_allowed(&loc)) { + error_emitted = true; + } + + if (!op[0]->type->is_integer()) { + _mesa_glsl_error(&loc, state, "operand of `~' must be an integer"); + error_emitted = true; + } + + type = error_emitted ? glsl_type::error_type : op[0]->type; + result = new(ctx) ir_expression(ir_unop_bit_not, type, op[0], NULL); + break; + + case ast_logic_and: { + exec_list rhs_instructions; + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "LHS", &error_emitted); + op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1, + "RHS", &error_emitted); + + if (rhs_instructions.is_empty()) { + result = new(ctx) ir_expression(ir_binop_logic_and, op[0], op[1]); + type = result->type; + } else { + ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type, + "and_tmp", + ir_var_temporary); + instructions->push_tail(tmp); + + ir_if *const stmt = new(ctx) ir_if(op[0]); + instructions->push_tail(stmt); + + stmt->then_instructions.append_list(&rhs_instructions); + ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const then_assign = + new(ctx) ir_assignment(then_deref, op[1]); + stmt->then_instructions.push_tail(then_assign); + + ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const else_assign = + new(ctx) ir_assignment(else_deref, new(ctx) ir_constant(false)); + stmt->else_instructions.push_tail(else_assign); + + result = new(ctx) ir_dereference_variable(tmp); + type = tmp->type; + } + break; + } + + case ast_logic_or: { + exec_list rhs_instructions; + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "LHS", &error_emitted); + op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1, + "RHS", &error_emitted); + + if (rhs_instructions.is_empty()) { + result = new(ctx) ir_expression(ir_binop_logic_or, op[0], op[1]); + type = result->type; + } else { + ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type, + "or_tmp", + ir_var_temporary); + instructions->push_tail(tmp); + + ir_if *const stmt = new(ctx) ir_if(op[0]); + instructions->push_tail(stmt); + + ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const then_assign = + new(ctx) ir_assignment(then_deref, new(ctx) ir_constant(true)); + stmt->then_instructions.push_tail(then_assign); + + stmt->else_instructions.append_list(&rhs_instructions); + ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const else_assign = + new(ctx) ir_assignment(else_deref, op[1]); + stmt->else_instructions.push_tail(else_assign); + + result = new(ctx) ir_dereference_variable(tmp); + type = tmp->type; + } + break; + } + + case ast_logic_xor: + /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec: + * + * "The logical binary operators and (&&), or ( | | ), and + * exclusive or (^^). They operate only on two Boolean + * expressions and result in a Boolean expression." + */ + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, "LHS", + &error_emitted); + op[1] = get_scalar_boolean_operand(instructions, state, this, 1, "RHS", + &error_emitted); + + result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type, + op[0], op[1]); + break; + + case ast_logic_not: + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "operand", &error_emitted); + + result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type, + op[0], NULL); + break; + + case ast_mul_assign: + case ast_div_assign: + case ast_add_assign: + case ast_sub_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = arithmetic_result_type(op[0], op[1], + (this->oper == ast_mul_assign), + state, & loc); + + ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + + /* GLSL 1.10 does not allow array assignment. However, we don't have to + * explicitly test for this because none of the binary expression + * operators allow array operands either. + */ + + break; + } + + case ast_mod_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = modulus_result_type(op[0], op[1], state, &loc); + + assert(operations[this->oper] == ir_binop_mod); + + ir_rvalue *temp_rhs; + temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_ls_assign: + case ast_rs_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = shift_result_type(op[0]->type, op[1]->type, this->oper, state, + &loc); + ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], + type, op[0], op[1]); + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_and_assign: + case ast_xor_assign: + case ast_or_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc); + ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], + type, op[0], op[1]); + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_conditional: { + /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec: + * + * "The ternary selection operator (?:). It operates on three + * expressions (exp1 ? exp2 : exp3). This operator evaluates the + * first expression, which must result in a scalar Boolean." + */ + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "condition", &error_emitted); + + /* The :? operator is implemented by generating an anonymous temporary + * followed by an if-statement. The last instruction in each branch of + * the if-statement assigns a value to the anonymous temporary. This + * temporary is the r-value of the expression. + */ + exec_list then_instructions; + exec_list else_instructions; + + op[1] = this->subexpressions[1]->hir(&then_instructions, state); + op[2] = this->subexpressions[2]->hir(&else_instructions, state); + + /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec: + * + * "The second and third expressions can be any type, as + * long their types match, or there is a conversion in + * Section 4.1.10 "Implicit Conversions" that can be applied + * to one of the expressions to make their types match. This + * resulting matching type is the type of the entire + * expression." + */ + if ((!apply_implicit_conversion(op[1]->type, op[2], state) + && !apply_implicit_conversion(op[2]->type, op[1], state)) + || (op[1]->type != op[2]->type)) { + YYLTYPE loc = this->subexpressions[1]->get_location(); + + _mesa_glsl_error(& loc, state, "second and third operands of ?: " + "operator must have matching types"); + error_emitted = true; + type = glsl_type::error_type; + } else { + type = op[1]->type; + } + + /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec: + * + * "The second and third expressions must be the same type, but can + * be of any type other than an array." + */ + if (type->is_array() && + !state->check_version(120, 300, &loc, + "second and third operands of ?: operator " + "cannot be arrays")) { + error_emitted = true; + } + + /* From section 4.1.7 of the GLSL 4.50 spec (Opaque Types): + * + * "Except for array indexing, structure member selection, and + * parentheses, opaque variables are not allowed to be operands in + * expressions; such use results in a compile-time error." + */ + if (type->contains_opaque()) { + _mesa_glsl_error(&loc, state, "opaque variables cannot be operands " + "of the ?: operator"); + error_emitted = true; + } + + ir_constant *cond_val = op[0]->constant_expression_value(); + + if (then_instructions.is_empty() + && else_instructions.is_empty() + && cond_val != NULL) { + result = cond_val->value.b[0] ? op[1] : op[2]; + } else { + /* The copy to conditional_tmp reads the whole array. */ + if (type->is_array()) { + mark_whole_array_access(op[1]); + mark_whole_array_access(op[2]); + } + + ir_variable *const tmp = + new(ctx) ir_variable(type, "conditional_tmp", ir_var_temporary); + instructions->push_tail(tmp); + + ir_if *const stmt = new(ctx) ir_if(op[0]); + instructions->push_tail(stmt); + + then_instructions.move_nodes_to(& stmt->then_instructions); + ir_dereference *const then_deref = + new(ctx) ir_dereference_variable(tmp); + ir_assignment *const then_assign = + new(ctx) ir_assignment(then_deref, op[1]); + stmt->then_instructions.push_tail(then_assign); + + else_instructions.move_nodes_to(& stmt->else_instructions); + ir_dereference *const else_deref = + new(ctx) ir_dereference_variable(tmp); + ir_assignment *const else_assign = + new(ctx) ir_assignment(else_deref, op[2]); + stmt->else_instructions.push_tail(else_assign); + + result = new(ctx) ir_dereference_variable(tmp); + } + break; + } + + case ast_pre_inc: + case ast_pre_dec: { + this->non_lvalue_description = (this->oper == ast_pre_inc) + ? "pre-increment operation" : "pre-decrement operation"; + + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = constant_one_for_inc_dec(ctx, op[0]->type); + + type = arithmetic_result_type(op[0], op[1], false, state, & loc); + + ir_rvalue *temp_rhs; + temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_post_inc: + case ast_post_dec: { + this->non_lvalue_description = (this->oper == ast_post_inc) + ? "post-increment operation" : "post-decrement operation"; + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = constant_one_for_inc_dec(ctx, op[0]->type); + + error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); + + type = arithmetic_result_type(op[0], op[1], false, state, & loc); + + ir_rvalue *temp_rhs; + temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + /* Get a temporary of a copy of the lvalue before it's modified. + * This may get thrown away later. + */ + result = get_lvalue_copy(instructions, op[0]->clone(ctx, NULL)); + + ir_rvalue *junk_rvalue; + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &junk_rvalue, false, false, + this->subexpressions[0]->get_location()); + + break; + } + + case ast_field_selection: + result = _mesa_ast_field_selection_to_hir(this, instructions, state); + break; + + case ast_array_index: { + YYLTYPE index_loc = subexpressions[1]->get_location(); + + op[0] = subexpressions[0]->hir(instructions, state); + op[1] = subexpressions[1]->hir(instructions, state); + + result = _mesa_ast_array_index_to_hir(ctx, state, op[0], op[1], + loc, index_loc); + + if (result->type->is_error()) + error_emitted = true; + + break; + } + + case ast_unsized_array_dim: + assert(!"ast_unsized_array_dim: Should never get here."); + break; + + case ast_function_call: + /* Should *NEVER* get here. ast_function_call should always be handled + * by ast_function_expression::hir. + */ + assert(0); + break; + + case ast_identifier: { + /* ast_identifier can appear several places in a full abstract syntax + * tree. This particular use must be at location specified in the grammar + * as 'variable_identifier'. + */ + ir_variable *var = + state->symbols->get_variable(this->primary_expression.identifier); + + if (var != NULL) { + var->data.used = true; + result = new(ctx) ir_dereference_variable(var); + } else { + _mesa_glsl_error(& loc, state, "`%s' undeclared", + this->primary_expression.identifier); + + result = ir_rvalue::error_value(ctx); + error_emitted = true; + } + break; + } + + case ast_int_constant: + result = new(ctx) ir_constant(this->primary_expression.int_constant); + break; + + case ast_uint_constant: + result = new(ctx) ir_constant(this->primary_expression.uint_constant); + break; + + case ast_float_constant: + result = new(ctx) ir_constant(this->primary_expression.float_constant); + break; + + case ast_bool_constant: + result = new(ctx) ir_constant(bool(this->primary_expression.bool_constant)); + break; + + case ast_double_constant: + result = new(ctx) ir_constant(this->primary_expression.double_constant); + break; + + case ast_sequence: { + /* It should not be possible to generate a sequence in the AST without + * any expressions in it. + */ + assert(!this->expressions.is_empty()); + + /* The r-value of a sequence is the last expression in the sequence. If + * the other expressions in the sequence do not have side-effects (and + * therefore add instructions to the instruction list), they get dropped + * on the floor. + */ + exec_node *previous_tail_pred = NULL; + YYLTYPE previous_operand_loc = loc; + + foreach_list_typed (ast_node, ast, link, &this->expressions) { + /* If one of the operands of comma operator does not generate any + * code, we want to emit a warning. At each pass through the loop + * previous_tail_pred will point to the last instruction in the + * stream *before* processing the previous operand. Naturally, + * instructions->tail_pred will point to the last instruction in the + * stream *after* processing the previous operand. If the two + * pointers match, then the previous operand had no effect. + * + * The warning behavior here differs slightly from GCC. GCC will + * only emit a warning if none of the left-hand operands have an + * effect. However, it will emit a warning for each. I believe that + * there are some cases in C (especially with GCC extensions) where + * it is useful to have an intermediate step in a sequence have no + * effect, but I don't think these cases exist in GLSL. Either way, + * it would be a giant hassle to replicate that behavior. + */ + if (previous_tail_pred == instructions->tail_pred) { + _mesa_glsl_warning(&previous_operand_loc, state, + "left-hand operand of comma expression has " + "no effect"); + } + + /* tail_pred is directly accessed instead of using the get_tail() + * method for performance reasons. get_tail() has extra code to + * return NULL when the list is empty. We don't care about that + * here, so using tail_pred directly is fine. + */ + previous_tail_pred = instructions->tail_pred; + previous_operand_loc = ast->get_location(); + + result = ast->hir(instructions, state); + } + + /* Any errors should have already been emitted in the loop above. + */ + error_emitted = true; + break; + } + } + type = NULL; /* use result->type, not type. */ + assert(result != NULL || !needs_rvalue); + + if (result && result->type->is_error() && !error_emitted) + _mesa_glsl_error(& loc, state, "type mismatch"); + + return result; +} + +bool +ast_expression::has_sequence_subexpression() const +{ + switch (this->oper) { + case ast_plus: + case ast_neg: + case ast_bit_not: + case ast_logic_not: + case ast_pre_inc: + case ast_pre_dec: + case ast_post_inc: + case ast_post_dec: + return this->subexpressions[0]->has_sequence_subexpression(); + + case ast_assign: + case ast_add: + case ast_sub: + case ast_mul: + case ast_div: + case ast_mod: + case ast_lshift: + case ast_rshift: + case ast_less: + case ast_greater: + case ast_lequal: + case ast_gequal: + case ast_nequal: + case ast_equal: + case ast_bit_and: + case ast_bit_xor: + case ast_bit_or: + case ast_logic_and: + case ast_logic_or: + case ast_logic_xor: + case ast_array_index: + case ast_mul_assign: + case ast_div_assign: + case ast_add_assign: + case ast_sub_assign: + case ast_mod_assign: + case ast_ls_assign: + case ast_rs_assign: + case ast_and_assign: + case ast_xor_assign: + case ast_or_assign: + return this->subexpressions[0]->has_sequence_subexpression() || + this->subexpressions[1]->has_sequence_subexpression(); + + case ast_conditional: + return this->subexpressions[0]->has_sequence_subexpression() || + this->subexpressions[1]->has_sequence_subexpression() || + this->subexpressions[2]->has_sequence_subexpression(); + + case ast_sequence: + return true; + + case ast_field_selection: + case ast_identifier: + case ast_int_constant: + case ast_uint_constant: + case ast_float_constant: + case ast_bool_constant: + case ast_double_constant: + return false; + + case ast_aggregate: + unreachable("ast_aggregate: Should never get here."); + + case ast_function_call: + unreachable("should be handled by ast_function_expression::hir"); + + case ast_unsized_array_dim: + unreachable("ast_unsized_array_dim: Should never get here."); + } + + return false; +} + +ir_rvalue * +ast_expression_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + /* It is possible to have expression statements that don't have an + * expression. This is the solitary semicolon: + * + * for (i = 0; i < 5; i++) + * ; + * + * In this case the expression will be NULL. Test for NULL and don't do + * anything in that case. + */ + if (expression != NULL) + expression->hir_no_rvalue(instructions, state); + + /* Statements do not have r-values. + */ + return NULL; +} + + +ir_rvalue * +ast_compound_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + if (new_scope) + state->symbols->push_scope(); + + foreach_list_typed (ast_node, ast, link, &this->statements) + ast->hir(instructions, state); + + if (new_scope) + state->symbols->pop_scope(); + + /* Compound statements do not have r-values. + */ + return NULL; +} + +/** + * Evaluate the given exec_node (which should be an ast_node representing + * a single array dimension) and return its integer value. + */ +static unsigned +process_array_size(exec_node *node, + struct _mesa_glsl_parse_state *state) +{ + exec_list dummy_instructions; + + ast_node *array_size = exec_node_data(ast_node, node, link); + + /** + * Dimensions other than the outermost dimension can by unsized if they + * are immediately sized by a constructor or initializer. + */ + if (((ast_expression*)array_size)->oper == ast_unsized_array_dim) + return 0; + + ir_rvalue *const ir = array_size->hir(& dummy_instructions, state); + YYLTYPE loc = array_size->get_location(); + + if (ir == NULL) { + _mesa_glsl_error(& loc, state, + "array size could not be resolved"); + return 0; + } + + if (!ir->type->is_integer()) { + _mesa_glsl_error(& loc, state, + "array size must be integer type"); + return 0; + } + + if (!ir->type->is_scalar()) { + _mesa_glsl_error(& loc, state, + "array size must be scalar type"); + return 0; + } + + ir_constant *const size = ir->constant_expression_value(); + if (size == NULL || array_size->has_sequence_subexpression()) { + _mesa_glsl_error(& loc, state, "array size must be a " + "constant valued expression"); + return 0; + } + + if (size->value.i[0] <= 0) { + _mesa_glsl_error(& loc, state, "array size must be > 0"); + return 0; + } + + assert(size->type == ir->type); + + /* If the array size is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the array size isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); + + return size->value.u[0]; +} + +static const glsl_type * +process_array_type(YYLTYPE *loc, const glsl_type *base, + ast_array_specifier *array_specifier, + struct _mesa_glsl_parse_state *state) +{ + const glsl_type *array_type = base; + + if (array_specifier != NULL) { + if (base->is_array()) { + + /* From page 19 (page 25) of the GLSL 1.20 spec: + * + * "Only one-dimensional arrays may be declared." + */ + if (!state->check_arrays_of_arrays_allowed(loc)) { + return glsl_type::error_type; + } + } + + for (exec_node *node = array_specifier->array_dimensions.tail_pred; + !node->is_head_sentinel(); node = node->prev) { + unsigned array_size = process_array_size(node, state); + array_type = glsl_type::get_array_instance(array_type, array_size); + } + } + + return array_type; +} + +static bool +precision_qualifier_allowed(const glsl_type *type) +{ + /* Precision qualifiers apply to floating point, integer and opaque + * types. + * + * Section 4.5.2 (Precision Qualifiers) of the GLSL 1.30 spec says: + * "Any floating point or any integer declaration can have the type + * preceded by one of these precision qualifiers [...] Literal + * constants do not have precision qualifiers. Neither do Boolean + * variables. + * + * Section 4.5 (Precision and Precision Qualifiers) of the GLSL 1.30 + * spec also says: + * + * "Precision qualifiers are added for code portability with OpenGL + * ES, not for functionality. They have the same syntax as in OpenGL + * ES." + * + * Section 8 (Built-In Functions) of the GLSL ES 1.00 spec says: + * + * "uniform lowp sampler2D sampler; + * highp vec2 coord; + * ... + * lowp vec4 col = texture2D (sampler, coord); + * // texture2D returns lowp" + * + * From this, we infer that GLSL 1.30 (and later) should allow precision + * qualifiers on sampler types just like float and integer types. + */ + return (type->is_float() + || type->is_integer() + || type->contains_opaque()) + && !type->without_array()->is_record(); +} + +const glsl_type * +ast_type_specifier::glsl_type(const char **name, + struct _mesa_glsl_parse_state *state) const +{ + const struct glsl_type *type; + + type = state->symbols->get_type(this->type_name); + *name = this->type_name; + + YYLTYPE loc = this->get_location(); + type = process_array_type(&loc, type, this->array_specifier, state); + + return type; +} + +/** + * From the OpenGL ES 3.0 spec, 4.5.4 Default Precision Qualifiers: + * + * "The precision statement + * + * precision precision-qualifier type; + * + * can be used to establish a default precision qualifier. The type field can + * be either int or float or any of the sampler types, (...) If type is float, + * the directive applies to non-precision-qualified floating point type + * (scalar, vector, and matrix) declarations. If type is int, the directive + * applies to all non-precision-qualified integer type (scalar, vector, signed, + * and unsigned) declarations." + * + * We use the symbol table to keep the values of the default precisions for + * each 'type' in each scope and we use the 'type' string from the precision + * statement as key in the symbol table. When we want to retrieve the default + * precision associated with a given glsl_type we need to know the type string + * associated with it. This is what this function returns. + */ +static const char * +get_type_name_for_precision_qualifier(const glsl_type *type) +{ + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + return "float"; + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + return "int"; + case GLSL_TYPE_ATOMIC_UINT: + return "atomic_uint"; + case GLSL_TYPE_IMAGE: + /* fallthrough */ + case GLSL_TYPE_SAMPLER: { + const unsigned type_idx = + type->sampler_array + 2 * type->sampler_shadow; + const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4; + assert(type_idx < 4); + switch (type->sampler_type) { + case GLSL_TYPE_FLOAT: + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "sampler1D", "sampler1DArray", + "sampler1DShadow", "sampler1DArrayShadow" + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_2D: { + static const char *const names[8] = { + "sampler2D", "sampler2DArray", + "sampler2DShadow", "sampler2DArrayShadow", + "image2D", "image2DArray", NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_3D: { + static const char *const names[8] = { + "sampler3D", NULL, NULL, NULL, + "image3D", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_CUBE: { + static const char *const names[8] = { + "samplerCube", "samplerCubeArray", + "samplerCubeShadow", "samplerCubeArrayShadow", + "imageCube", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_MS: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "sampler2DMS", "sampler2DMSArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_RECT: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "samplerRect", NULL, "samplerRectShadow", NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_BUF: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "samplerBuffer", NULL, NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_EXTERNAL: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "samplerExternalOES", NULL, NULL, NULL + }; + return names[type_idx]; + } + default: + unreachable("Unsupported sampler/image dimensionality"); + } /* sampler/image float dimensionality */ + break; + case GLSL_TYPE_INT: + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isampler1D", "isampler1DArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_2D: { + static const char *const names[8] = { + "isampler2D", "isampler2DArray", NULL, NULL, + "iimage2D", "iimage2DArray", NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_3D: { + static const char *const names[8] = { + "isampler3D", NULL, NULL, NULL, + "iimage3D", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_CUBE: { + static const char *const names[8] = { + "isamplerCube", "isamplerCubeArray", NULL, NULL, + "iimageCube", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_MS: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isampler2DMS", "isampler2DMSArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_RECT: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isamplerRect", NULL, "isamplerRectShadow", NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_BUF: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isamplerBuffer", NULL, NULL, NULL + }; + return names[type_idx]; + } + default: + unreachable("Unsupported isampler/iimage dimensionality"); + } /* sampler/image int dimensionality */ + break; + case GLSL_TYPE_UINT: + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usampler1D", "usampler1DArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_2D: { + static const char *const names[8] = { + "usampler2D", "usampler2DArray", NULL, NULL, + "uimage2D", "uimage2DArray", NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_3D: { + static const char *const names[8] = { + "usampler3D", NULL, NULL, NULL, + "uimage3D", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_CUBE: { + static const char *const names[8] = { + "usamplerCube", "usamplerCubeArray", NULL, NULL, + "uimageCube", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_MS: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usampler2DMS", "usampler2DMSArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_RECT: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usamplerRect", NULL, "usamplerRectShadow", NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_BUF: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usamplerBuffer", NULL, NULL, NULL + }; + return names[type_idx]; + } + default: + unreachable("Unsupported usampler/uimage dimensionality"); + } /* sampler/image uint dimensionality */ + break; + default: + unreachable("Unsupported sampler/image type"); + } /* sampler/image type */ + break; + } /* GLSL_TYPE_SAMPLER/GLSL_TYPE_IMAGE */ + break; + default: + unreachable("Unsupported type"); + } /* base type */ +} + +static unsigned +select_gles_precision(unsigned qual_precision, + const glsl_type *type, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + /* Precision qualifiers do not have any meaning in Desktop GLSL. + * In GLES we take the precision from the type qualifier if present, + * otherwise, if the type of the variable allows precision qualifiers at + * all, we look for the default precision qualifier for that type in the + * current scope. + */ + assert(state->es_shader); + + unsigned precision = GLSL_PRECISION_NONE; + if (qual_precision) { + precision = qual_precision; + } else if (precision_qualifier_allowed(type)) { + const char *type_name = + get_type_name_for_precision_qualifier(type->without_array()); + assert(type_name != NULL); + + precision = + state->symbols->get_default_precision_qualifier(type_name); + if (precision == ast_precision_none) { + _mesa_glsl_error(loc, state, + "No precision specified in this scope for type `%s'", + type->name); + } + } + return precision; +} + +const glsl_type * +ast_fully_specified_type::glsl_type(const char **name, + struct _mesa_glsl_parse_state *state) const +{ + return this->specifier->glsl_type(name, state); +} + +/** + * Determine whether a toplevel variable declaration declares a varying. This + * function operates by examining the variable's mode and the shader target, + * so it correctly identifies linkage variables regardless of whether they are + * declared using the deprecated "varying" syntax or the new "in/out" syntax. + * + * Passing a non-toplevel variable declaration (e.g. a function parameter) to + * this function will produce undefined results. + */ +static bool +is_varying_var(ir_variable *var, gl_shader_stage target) +{ + switch (target) { + case MESA_SHADER_VERTEX: + return var->data.mode == ir_var_shader_out; + case MESA_SHADER_FRAGMENT: + return var->data.mode == ir_var_shader_in; + default: + return var->data.mode == ir_var_shader_out || var->data.mode == ir_var_shader_in; + } +} + + +/** + * Matrix layout qualifiers are only allowed on certain types + */ +static void +validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + const glsl_type *type, + ir_variable *var) +{ + if (var && !var->is_in_buffer_block()) { + /* Layout qualifiers may only apply to interface blocks and fields in + * them. + */ + _mesa_glsl_error(loc, state, + "uniform block layout qualifiers row_major and " + "column_major may not be applied to variables " + "outside of uniform blocks"); + } else if (!type->without_array()->is_matrix()) { + /* The OpenGL ES 3.0 conformance tests did not originally allow + * matrix layout qualifiers on non-matrices. However, the OpenGL + * 4.4 and OpenGL ES 3.0 (revision TBD) specifications were + * amended to specifically allow these layouts on all types. Emit + * a warning so that people know their code may not be portable. + */ + _mesa_glsl_warning(loc, state, + "uniform block layout qualifiers row_major and " + "column_major applied to non-matrix types may " + "be rejected by older compilers"); + } +} + +static bool +process_qualifier_constant(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + const char *qual_indentifier, + ast_expression *const_expression, + unsigned *value) +{ + exec_list dummy_instructions; + + if (const_expression == NULL) { + *value = 0; + return true; + } + + ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); + + ir_constant *const const_int = ir->constant_expression_value(); + if (const_int == NULL || !const_int->type->is_integer()) { + _mesa_glsl_error(loc, state, "%s must be an integral constant " + "expression", qual_indentifier); + return false; + } + + if (const_int->value.i[0] < 0) { + _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)", + qual_indentifier, const_int->value.u[0]); + return false; + } + + /* If the location is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the location isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); + + *value = const_int->value.u[0]; + return true; +} + +static bool +validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state, + unsigned stream) +{ + if (stream >= state->ctx->Const.MaxVertexStreams) { + _mesa_glsl_error(loc, state, + "invalid stream specified %d is larger than " + "MAX_VERTEX_STREAMS - 1 (%d).", + stream, state->ctx->Const.MaxVertexStreams - 1); + return false; + } + + return true; +} + +static void +apply_explicit_binding(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + ir_variable *var, + const glsl_type *type, + const ast_type_qualifier *qual) +{ + if (!qual->flags.q.uniform && !qual->flags.q.buffer) { + _mesa_glsl_error(loc, state, + "the \"binding\" qualifier only applies to uniforms and " + "shader storage buffer objects"); + return; + } + + unsigned qual_binding; + if (!process_qualifier_constant(state, loc, "binding", qual->binding, + &qual_binding)) { + return; + } + + const struct gl_context *const ctx = state->ctx; + unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1; + unsigned max_index = qual_binding + elements - 1; + const glsl_type *base_type = type->without_array(); + + if (base_type->is_interface()) { + /* UBOs. From page 60 of the GLSL 4.20 specification: + * "If the binding point for any uniform block instance is less than zero, + * or greater than or equal to the implementation-dependent maximum + * number of uniform buffer bindings, a compilation error will occur. + * When the binding identifier is used with a uniform block instanced as + * an array of size N, all elements of the array from binding through + * binding + N – 1 must be within this range." + * + * The implementation-dependent maximum is GL_MAX_UNIFORM_BUFFER_BINDINGS. + */ + if (qual->flags.q.uniform && + max_index >= ctx->Const.MaxUniformBufferBindings) { + _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds " + "the maximum number of UBO binding points (%d)", + qual_binding, elements, + ctx->Const.MaxUniformBufferBindings); + return; + } + + /* SSBOs. From page 67 of the GLSL 4.30 specification: + * "If the binding point for any uniform or shader storage block instance + * is less than zero, or greater than or equal to the + * implementation-dependent maximum number of uniform buffer bindings, a + * compile-time error will occur. When the binding identifier is used + * with a uniform or shader storage block instanced as an array of size + * N, all elements of the array from binding through binding + N – 1 must + * be within this range." + */ + if (qual->flags.q.buffer && + max_index >= ctx->Const.MaxShaderStorageBufferBindings) { + _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds " + "the maximum number of SSBO binding points (%d)", + qual_binding, elements, + ctx->Const.MaxShaderStorageBufferBindings); + return; + } + } else if (base_type->is_sampler()) { + /* Samplers. From page 63 of the GLSL 4.20 specification: + * "If the binding is less than zero, or greater than or equal to the + * implementation-dependent maximum supported number of units, a + * compilation error will occur. When the binding identifier is used + * with an array of size N, all elements of the array from binding + * through binding + N - 1 must be within this range." + */ + unsigned limit = ctx->Const.MaxCombinedTextureImageUnits; + + if (max_index >= limit) { + _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers " + "exceeds the maximum number of texture image units " + "(%u)", qual_binding, elements, limit); + + return; + } + } else if (base_type->contains_atomic()) { + assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS); + if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) { + _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the " + " maximum number of atomic counter buffer bindings" + "(%u)", qual_binding, + ctx->Const.MaxAtomicBufferBindings); + + return; + } + } else if ((state->is_version(420, 310) || + state->ARB_shading_language_420pack_enable) && + base_type->is_image()) { + assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS); + if (max_index >= ctx->Const.MaxImageUnits) { + _mesa_glsl_error(loc, state, "Image binding %d exceeds the " + " maximum number of image units (%d)", max_index, + ctx->Const.MaxImageUnits); + return; + } + + } else { + _mesa_glsl_error(loc, state, + "the \"binding\" qualifier only applies to uniform " + "blocks, opaque variables, or arrays thereof"); + return; + } + + var->data.explicit_binding = true; + var->data.binding = qual_binding; + + return; +} + + +static glsl_interp_qualifier +interpret_interpolation_qualifier(const struct ast_type_qualifier *qual, + ir_variable_mode mode, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) +{ + glsl_interp_qualifier interpolation; + if (qual->flags.q.flat) + interpolation = INTERP_QUALIFIER_FLAT; + else if (qual->flags.q.noperspective) + interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + else if (qual->flags.q.smooth) + interpolation = INTERP_QUALIFIER_SMOOTH; + else + interpolation = INTERP_QUALIFIER_NONE; + + if (interpolation != INTERP_QUALIFIER_NONE) { + if (mode != ir_var_shader_in && mode != ir_var_shader_out) { + _mesa_glsl_error(loc, state, + "interpolation qualifier `%s' can only be applied to " + "shader inputs or outputs.", + interpolation_string(interpolation)); + + } + + if ((state->stage == MESA_SHADER_VERTEX && mode == ir_var_shader_in) || + (state->stage == MESA_SHADER_FRAGMENT && mode == ir_var_shader_out)) { + _mesa_glsl_error(loc, state, + "interpolation qualifier `%s' cannot be applied to " + "vertex shader inputs or fragment shader outputs", + interpolation_string(interpolation)); + } + } + + return interpolation; +} + + +static void +apply_explicit_location(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) +{ + bool fail = false; + + unsigned qual_location; + if (!process_qualifier_constant(state, loc, "location", qual->location, + &qual_location)) { + return; + } + + /* Checks for GL_ARB_explicit_uniform_location. */ + if (qual->flags.q.uniform) { + if (!state->check_explicit_uniform_location_allowed(loc, var)) + return; + + const struct gl_context *const ctx = state->ctx; + unsigned max_loc = qual_location + var->type->uniform_locations() - 1; + + if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) { + _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s " + ">= MAX_UNIFORM_LOCATIONS (%u)", var->name, + ctx->Const.MaxUserAssignableUniformLocations); + return; + } + + var->data.explicit_location = true; + var->data.location = qual_location; + return; + } + + /* Between GL_ARB_explicit_attrib_location an + * GL_ARB_separate_shader_objects, the inputs and outputs of any shader + * stage can be assigned explicit locations. The checking here associates + * the correct extension with the correct stage's input / output: + * + * input output + * ----- ------ + * vertex explicit_loc sso + * tess control sso sso + * tess eval sso sso + * geometry sso sso + * fragment sso explicit_loc + */ + switch (state->stage) { + case MESA_SHADER_VERTEX: + if (var->data.mode == ir_var_shader_in) { + if (!state->check_explicit_attrib_location_allowed(loc, var)) + return; + + break; + } + + if (var->data.mode == ir_var_shader_out) { + if (!state->check_separate_shader_objects_allowed(loc, var)) + return; + + break; + } + + fail = true; + break; + + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if (var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) { + if (!state->check_separate_shader_objects_allowed(loc, var)) + return; + + break; + } + + fail = true; + break; + + case MESA_SHADER_FRAGMENT: + if (var->data.mode == ir_var_shader_in) { + if (!state->check_separate_shader_objects_allowed(loc, var)) + return; + + break; + } + + if (var->data.mode == ir_var_shader_out) { + if (!state->check_explicit_attrib_location_allowed(loc, var)) + return; + + break; + } + + fail = true; + break; + + case MESA_SHADER_COMPUTE: + _mesa_glsl_error(loc, state, + "compute shader variables cannot be given " + "explicit locations"); + return; + }; + + if (fail) { + _mesa_glsl_error(loc, state, + "%s cannot be given an explicit location in %s shader", + mode_string(var), + _mesa_shader_stage_to_string(state->stage)); + } else { + var->data.explicit_location = true; + + switch (state->stage) { + case MESA_SHADER_VERTEX: + var->data.location = (var->data.mode == ir_var_shader_in) + ? (qual_location + VERT_ATTRIB_GENERIC0) + : (qual_location + VARYING_SLOT_VAR0); + break; + + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if (var->data.patch) + var->data.location = qual_location + VARYING_SLOT_PATCH0; + else + var->data.location = qual_location + VARYING_SLOT_VAR0; + break; + + case MESA_SHADER_FRAGMENT: + var->data.location = (var->data.mode == ir_var_shader_out) + ? (qual_location + FRAG_RESULT_DATA0) + : (qual_location + VARYING_SLOT_VAR0); + break; + case MESA_SHADER_COMPUTE: + assert(!"Unexpected shader type"); + break; + } + + /* Check if index was set for the uniform instead of the function */ + if (qual->flags.q.explicit_index && qual->flags.q.subroutine) { + _mesa_glsl_error(loc, state, "an index qualifier can only be " + "used with subroutine functions"); + return; + } + + unsigned qual_index; + if (qual->flags.q.explicit_index && + process_qualifier_constant(state, loc, "index", qual->index, + &qual_index)) { + /* From the GLSL 4.30 specification, section 4.4.2 (Output + * Layout Qualifiers): + * + * "It is also a compile-time error if a fragment shader + * sets a layout index to less than 0 or greater than 1." + * + * Older specifications don't mandate a behavior; we take + * this as a clarification and always generate the error. + */ + if (qual_index > 1) { + _mesa_glsl_error(loc, state, + "explicit index may only be 0 or 1"); + } else { + var->data.explicit_index = true; + var->data.index = qual_index; + } + } + } +} + +static void +apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) +{ + const glsl_type *base_type = var->type->without_array(); + + if (base_type->is_image()) { + if (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_function_in) { + _mesa_glsl_error(loc, state, "image variables may only be declared as " + "function parameters or uniform-qualified " + "global variables"); + } + + var->data.image_read_only |= qual->flags.q.read_only; + var->data.image_write_only |= qual->flags.q.write_only; + var->data.image_coherent |= qual->flags.q.coherent; + var->data.image_volatile |= qual->flags.q._volatile; + var->data.image_restrict |= qual->flags.q.restrict_flag; + var->data.read_only = true; + + if (qual->flags.q.explicit_image_format) { + if (var->data.mode == ir_var_function_in) { + _mesa_glsl_error(loc, state, "format qualifiers cannot be " + "used on image function parameters"); + } + + if (qual->image_base_type != base_type->sampler_type) { + _mesa_glsl_error(loc, state, "format qualifier doesn't match the " + "base data type of the image"); + } + + var->data.image_format = qual->image_format; + } else { + if (var->data.mode == ir_var_uniform) { + if (state->es_shader) { + _mesa_glsl_error(loc, state, "all image uniforms " + "must have a format layout qualifier"); + + } else if (!qual->flags.q.write_only) { + _mesa_glsl_error(loc, state, "image uniforms not qualified with " + "`writeonly' must have a format layout " + "qualifier"); + } + } + + var->data.image_format = GL_NONE; + } + + /* From page 70 of the GLSL ES 3.1 specification: + * + * "Except for image variables qualified with the format qualifiers + * r32f, r32i, and r32ui, image variables must specify either memory + * qualifier readonly or the memory qualifier writeonly." + */ + if (state->es_shader && + var->data.image_format != GL_R32F && + var->data.image_format != GL_R32I && + var->data.image_format != GL_R32UI && + !var->data.image_read_only && + !var->data.image_write_only) { + _mesa_glsl_error(loc, state, "image variables of format other than " + "r32f, r32i or r32ui must be qualified `readonly' or " + "`writeonly'"); + } + + } else if (qual->flags.q.read_only || + qual->flags.q.write_only || + qual->flags.q.coherent || + qual->flags.q._volatile || + qual->flags.q.restrict_flag || + qual->flags.q.explicit_image_format) { + _mesa_glsl_error(loc, state, "memory qualifiers may only be applied to " + "images"); + } +} + +static inline const char* +get_layout_qualifier_string(bool origin_upper_left, bool pixel_center_integer) +{ + if (origin_upper_left && pixel_center_integer) + return "origin_upper_left, pixel_center_integer"; + else if (origin_upper_left) + return "origin_upper_left"; + else if (pixel_center_integer) + return "pixel_center_integer"; + else + return " "; +} + +static inline bool +is_conflicting_fragcoord_redeclaration(struct _mesa_glsl_parse_state *state, + const struct ast_type_qualifier *qual) +{ + /* If gl_FragCoord was previously declared, and the qualifiers were + * different in any way, return true. + */ + if (state->fs_redeclares_gl_fragcoord) { + return (state->fs_pixel_center_integer != qual->flags.q.pixel_center_integer + || state->fs_origin_upper_left != qual->flags.q.origin_upper_left); + } + + return false; +} + +static inline void +validate_array_dimensions(const glsl_type *t, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) { + if (t->is_array()) { + t = t->fields.array; + while (t->is_array()) { + if (t->is_unsized_array()) { + _mesa_glsl_error(loc, state, + "only the outermost array dimension can " + "be unsized", + t->name); + break; + } + t = t->fields.array; + } + } +} + +static void +apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) +{ + if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) { + + /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says: + * + * "Within any shader, the first redeclarations of gl_FragCoord + * must appear before any use of gl_FragCoord." + * + * Generate a compiler error if above condition is not met by the + * fragment shader. + */ + ir_variable *earlier = state->symbols->get_variable("gl_FragCoord"); + if (earlier != NULL && + earlier->data.used && + !state->fs_redeclares_gl_fragcoord) { + _mesa_glsl_error(loc, state, + "gl_FragCoord used before its first redeclaration " + "in fragment shader"); + } + + /* Make sure all gl_FragCoord redeclarations specify the same layout + * qualifiers. + */ + if (is_conflicting_fragcoord_redeclaration(state, qual)) { + const char *const qual_string = + get_layout_qualifier_string(qual->flags.q.origin_upper_left, + qual->flags.q.pixel_center_integer); + + const char *const state_string = + get_layout_qualifier_string(state->fs_origin_upper_left, + state->fs_pixel_center_integer); + + _mesa_glsl_error(loc, state, + "gl_FragCoord redeclared with different layout " + "qualifiers (%s) and (%s) ", + state_string, + qual_string); + } + state->fs_origin_upper_left = qual->flags.q.origin_upper_left; + state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer; + state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = + !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer; + state->fs_redeclares_gl_fragcoord = + state->fs_origin_upper_left || + state->fs_pixel_center_integer || + state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; + } + + var->data.pixel_center_integer = qual->flags.q.pixel_center_integer; + var->data.origin_upper_left = qual->flags.q.origin_upper_left; + if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer) + && (strcmp(var->name, "gl_FragCoord") != 0)) { + const char *const qual_string = (qual->flags.q.origin_upper_left) + ? "origin_upper_left" : "pixel_center_integer"; + + _mesa_glsl_error(loc, state, + "layout qualifier `%s' can only be applied to " + "fragment shader input `gl_FragCoord'", + qual_string); + } + + if (qual->flags.q.explicit_location) { + apply_explicit_location(qual, var, state, loc); + } else if (qual->flags.q.explicit_index) { + if (!qual->flags.q.subroutine_def) + _mesa_glsl_error(loc, state, + "explicit index requires explicit location"); + } + + if (qual->flags.q.explicit_binding) { + apply_explicit_binding(state, loc, var, var->type, qual); + } + + if (state->stage == MESA_SHADER_GEOMETRY && + qual->flags.q.out && qual->flags.q.stream) { + unsigned qual_stream; + if (process_qualifier_constant(state, loc, "stream", qual->stream, + &qual_stream) && + validate_stream_qualifier(loc, state, qual_stream)) { + var->data.stream = qual_stream; + } + } + + if (var->type->contains_atomic()) { + if (var->data.mode == ir_var_uniform) { + if (var->data.explicit_binding) { + unsigned *offset = + &state->atomic_counter_offsets[var->data.binding]; + + if (*offset % ATOMIC_COUNTER_SIZE) + _mesa_glsl_error(loc, state, + "misaligned atomic counter offset"); + + var->data.offset = *offset; + *offset += var->type->atomic_size(); + + } else { + _mesa_glsl_error(loc, state, + "atomic counters require explicit binding point"); + } + } else if (var->data.mode != ir_var_function_in) { + _mesa_glsl_error(loc, state, "atomic counters may only be declared as " + "function parameters or uniform-qualified " + "global variables"); + } + } + + /* Is the 'layout' keyword used with parameters that allow relaxed checking. + * Many implementations of GL_ARB_fragment_coord_conventions_enable and some + * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable + * allowed the layout qualifier to be used with 'varying' and 'attribute'. + * These extensions and all following extensions that add the 'layout' + * keyword have been modified to require the use of 'in' or 'out'. + * + * The following extension do not allow the deprecated keywords: + * + * GL_AMD_conservative_depth + * GL_ARB_conservative_depth + * GL_ARB_gpu_shader5 + * GL_ARB_separate_shader_objects + * GL_ARB_tessellation_shader + * GL_ARB_transform_feedback3 + * GL_ARB_uniform_buffer_object + * + * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5 + * allow layout with the deprecated keywords. + */ + const bool relaxed_layout_qualifier_checking = + state->ARB_fragment_coord_conventions_enable; + + const bool uses_deprecated_qualifier = qual->flags.q.attribute + || qual->flags.q.varying; + if (qual->has_layout() && uses_deprecated_qualifier) { + if (relaxed_layout_qualifier_checking) { + _mesa_glsl_warning(loc, state, + "`layout' qualifier may not be used with " + "`attribute' or `varying'"); + } else { + _mesa_glsl_error(loc, state, + "`layout' qualifier may not be used with " + "`attribute' or `varying'"); + } + } + + /* Layout qualifiers for gl_FragDepth, which are enabled by extension + * AMD_conservative_depth. + */ + int depth_layout_count = qual->flags.q.depth_any + + qual->flags.q.depth_greater + + qual->flags.q.depth_less + + qual->flags.q.depth_unchanged; + if (depth_layout_count > 0 + && !state->AMD_conservative_depth_enable + && !state->ARB_conservative_depth_enable) { + _mesa_glsl_error(loc, state, + "extension GL_AMD_conservative_depth or " + "GL_ARB_conservative_depth must be enabled " + "to use depth layout qualifiers"); + } else if (depth_layout_count > 0 + && strcmp(var->name, "gl_FragDepth") != 0) { + _mesa_glsl_error(loc, state, + "depth layout qualifiers can be applied only to " + "gl_FragDepth"); + } else if (depth_layout_count > 1 + && strcmp(var->name, "gl_FragDepth") == 0) { + _mesa_glsl_error(loc, state, + "at most one depth layout qualifier can be applied to " + "gl_FragDepth"); + } + if (qual->flags.q.depth_any) + var->data.depth_layout = ir_depth_layout_any; + else if (qual->flags.q.depth_greater) + var->data.depth_layout = ir_depth_layout_greater; + else if (qual->flags.q.depth_less) + var->data.depth_layout = ir_depth_layout_less; + else if (qual->flags.q.depth_unchanged) + var->data.depth_layout = ir_depth_layout_unchanged; + else + var->data.depth_layout = ir_depth_layout_none; + + if (qual->flags.q.std140 || + qual->flags.q.std430 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(loc, state, + "uniform and shader storage block layout qualifiers " + "std140, std430, packed, and shared can only be " + "applied to uniform or shader storage blocks, not " + "members"); + } + + if (qual->flags.q.row_major || qual->flags.q.column_major) { + validate_matrix_layout_for_type(state, loc, var->type, var); + } + + /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader + * Inputs): + * + * "Fragment shaders also allow the following layout qualifier on in only + * (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (qual->flags.q.early_fragment_tests) { + _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only " + "valid in fragment shader input layout declaration."); + } +} + +static void +apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + bool is_parameter) +{ + STATIC_ASSERT(sizeof(qual->flags.q) <= sizeof(qual->flags.i)); + + if (qual->flags.q.invariant) { + if (var->data.used) { + _mesa_glsl_error(loc, state, + "variable `%s' may not be redeclared " + "`invariant' after being used", + var->name); + } else { + var->data.invariant = 1; + } + } + + if (qual->flags.q.precise) { + if (var->data.used) { + _mesa_glsl_error(loc, state, + "variable `%s' may not be redeclared " + "`precise' after being used", + var->name); + } else { + var->data.precise = 1; + } + } + + if (qual->flags.q.subroutine && !qual->flags.q.uniform) { + _mesa_glsl_error(loc, state, + "`subroutine' may only be applied to uniforms, " + "subroutine type declarations, or function definitions"); + } + + if (qual->flags.q.constant || qual->flags.q.attribute + || qual->flags.q.uniform + || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT))) + var->data.read_only = 1; + + if (qual->flags.q.centroid) + var->data.centroid = 1; + + if (qual->flags.q.sample) + var->data.sample = 1; + + /* Precision qualifiers do not hold any meaning in Desktop GLSL */ + if (state->es_shader) { + var->data.precision = + select_gles_precision(qual->precision, var->type, state, loc); + } + + if (qual->flags.q.patch) + var->data.patch = 1; + + if (qual->flags.q.attribute && state->stage != MESA_SHADER_VERTEX) { + var->type = glsl_type::error_type; + _mesa_glsl_error(loc, state, + "`attribute' variables may not be declared in the " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } + + /* Disallow layout qualifiers which may only appear on layout declarations. */ + if (qual->flags.q.prim_type) { + _mesa_glsl_error(loc, state, + "Primitive type may only be specified on GS input or output " + "layout declaration, not on variables."); + } + + /* Section 6.1.1 (Function Calling Conventions) of the GLSL 1.10 spec says: + * + * "However, the const qualifier cannot be used with out or inout." + * + * The same section of the GLSL 4.40 spec further clarifies this saying: + * + * "The const qualifier cannot be used with out or inout, or a + * compile-time error results." + */ + if (is_parameter && qual->flags.q.constant && qual->flags.q.out) { + _mesa_glsl_error(loc, state, + "`const' may not be applied to `out' or `inout' " + "function parameters"); + } + + /* If there is no qualifier that changes the mode of the variable, leave + * the setting alone. + */ + assert(var->data.mode != ir_var_temporary); + if (qual->flags.q.in && qual->flags.q.out) + var->data.mode = ir_var_function_inout; + else if (qual->flags.q.in) + var->data.mode = is_parameter ? ir_var_function_in : ir_var_shader_in; + else if (qual->flags.q.attribute + || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT))) + var->data.mode = ir_var_shader_in; + else if (qual->flags.q.out) + var->data.mode = is_parameter ? ir_var_function_out : ir_var_shader_out; + else if (qual->flags.q.varying && (state->stage == MESA_SHADER_VERTEX)) + var->data.mode = ir_var_shader_out; + else if (qual->flags.q.uniform) + var->data.mode = ir_var_uniform; + else if (qual->flags.q.buffer) + var->data.mode = ir_var_shader_storage; + else if (qual->flags.q.shared_storage) + var->data.mode = ir_var_shader_shared; + + if (!is_parameter && is_varying_var(var, state->stage)) { + /* User-defined ins/outs are not permitted in compute shaders. */ + if (state->stage == MESA_SHADER_COMPUTE) { + _mesa_glsl_error(loc, state, + "user-defined input and output variables are not " + "permitted in compute shaders"); + } + + /* This variable is being used to link data between shader stages (in + * pre-glsl-1.30 parlance, it's a "varying"). Check that it has a type + * that is allowed for such purposes. + * + * From page 25 (page 31 of the PDF) of the GLSL 1.10 spec: + * + * "The varying qualifier can be used only with the data types + * float, vec2, vec3, vec4, mat2, mat3, and mat4, or arrays of + * these." + * + * This was relaxed in GLSL version 1.30 and GLSL ES version 3.00. From + * page 31 (page 37 of the PDF) of the GLSL 1.30 spec: + * + * "Fragment inputs can only be signed and unsigned integers and + * integer vectors, float, floating-point vectors, matrices, or + * arrays of these. Structures cannot be input. + * + * Similar text exists in the section on vertex shader outputs. + * + * Similar text exists in the GLSL ES 3.00 spec, except that the GLSL ES + * 3.00 spec allows structs as well. Varying structs are also allowed + * in GLSL 1.50. + */ + switch (var->type->get_scalar_type()->base_type) { + case GLSL_TYPE_FLOAT: + /* Ok in all GLSL versions */ + break; + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + if (state->is_version(130, 300)) + break; + _mesa_glsl_error(loc, state, + "varying variables must be of base type float in %s", + state->get_version_string()); + break; + case GLSL_TYPE_STRUCT: + if (state->is_version(150, 300)) + break; + _mesa_glsl_error(loc, state, + "varying variables may not be of type struct"); + break; + case GLSL_TYPE_DOUBLE: + break; + default: + _mesa_glsl_error(loc, state, "illegal type for a varying variable"); + break; + } + } + + if (state->all_invariant && (state->current_function == NULL)) { + switch (state->stage) { + case MESA_SHADER_VERTEX: + if (var->data.mode == ir_var_shader_out) + var->data.invariant = true; + break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if ((var->data.mode == ir_var_shader_in) + || (var->data.mode == ir_var_shader_out)) + var->data.invariant = true; + break; + case MESA_SHADER_FRAGMENT: + if (var->data.mode == ir_var_shader_in) + var->data.invariant = true; + break; + case MESA_SHADER_COMPUTE: + /* Invariance isn't meaningful in compute shaders. */ + break; + } + } + + var->data.interpolation = + interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode, + state, loc); + + /* Does the declaration use the deprecated 'attribute' or 'varying' + * keywords? + */ + const bool uses_deprecated_qualifier = qual->flags.q.attribute + || qual->flags.q.varying; + + + /* Validate auxiliary storage qualifiers */ + + /* From section 4.3.4 of the GLSL 1.30 spec: + * "It is an error to use centroid in in a vertex shader." + * + * From section 4.3.4 of the GLSL ES 3.00 spec: + * "It is an error to use centroid in or interpolation qualifiers in + * a vertex shader input." + */ + + /* Section 4.3.6 of the GLSL 1.30 specification states: + * "It is an error to use centroid out in a fragment shader." + * + * The GL_ARB_shading_language_420pack extension specification states: + * "It is an error to use auxiliary storage qualifiers or interpolation + * qualifiers on an output in a fragment shader." + */ + if (qual->flags.q.sample && (!is_varying_var(var, state->stage) || uses_deprecated_qualifier)) { + _mesa_glsl_error(loc, state, + "sample qualifier may only be used on `in` or `out` " + "variables between shader stages"); + } + if (qual->flags.q.centroid && !is_varying_var(var, state->stage)) { + _mesa_glsl_error(loc, state, + "centroid qualifier may only be used with `in', " + "`out' or `varying' variables between shader stages"); + } + + if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) { + _mesa_glsl_error(loc, state, + "the shared storage qualifiers can only be used with " + "compute shaders"); + } + + apply_image_qualifier_to_variable(qual, var, state, loc); +} + +/** + * Get the variable that is being redeclared by this declaration + * + * Semantic checks to verify the validity of the redeclaration are also + * performed. If semantic checks fail, compilation error will be emitted via + * \c _mesa_glsl_error, but a non-\c NULL pointer will still be returned. + * + * \returns + * A pointer to an existing variable in the current scope if the declaration + * is a redeclaration, \c NULL otherwise. + */ +static ir_variable * +get_variable_being_redeclared(ir_variable *var, YYLTYPE loc, + struct _mesa_glsl_parse_state *state, + bool allow_all_redeclarations) +{ + /* Check if this declaration is actually a re-declaration, either to + * resize an array or add qualifiers to an existing variable. + * + * This is allowed for variables in the current scope, or when at + * global scope (for built-ins in the implicit outer scope). + */ + ir_variable *earlier = state->symbols->get_variable(var->name); + if (earlier == NULL || + (state->current_function != NULL && + !state->symbols->name_declared_this_scope(var->name))) { + return NULL; + } + + + /* From page 24 (page 30 of the PDF) of the GLSL 1.50 spec, + * + * "It is legal to declare an array without a size and then + * later re-declare the same name as an array of the same + * type and specify a size." + */ + if (earlier->type->is_unsized_array() && var->type->is_array() + && (var->type->fields.array == earlier->type->fields.array)) { + /* FINISHME: This doesn't match the qualifiers on the two + * FINISHME: declarations. It's not 100% clear whether this is + * FINISHME: required or not. + */ + + const unsigned size = unsigned(var->type->array_size()); + check_builtin_array_max_size(var->name, size, loc, state); + if ((size > 0) && (size <= earlier->data.max_array_access)) { + _mesa_glsl_error(& loc, state, "array size must be > %u due to " + "previous access", + earlier->data.max_array_access); + } + + earlier->type = var->type; + delete var; + var = NULL; + } else if ((state->ARB_fragment_coord_conventions_enable || + state->is_version(150, 0)) + && strcmp(var->name, "gl_FragCoord") == 0 + && earlier->type == var->type + && var->data.mode == ir_var_shader_in) { + /* Allow redeclaration of gl_FragCoord for ARB_fcc layout + * qualifiers. + */ + earlier->data.origin_upper_left = var->data.origin_upper_left; + earlier->data.pixel_center_integer = var->data.pixel_center_integer; + + /* According to section 4.3.7 of the GLSL 1.30 spec, + * the following built-in varaibles can be redeclared with an + * interpolation qualifier: + * * gl_FrontColor + * * gl_BackColor + * * gl_FrontSecondaryColor + * * gl_BackSecondaryColor + * * gl_Color + * * gl_SecondaryColor + */ + } else if (state->is_version(130, 0) + && (strcmp(var->name, "gl_FrontColor") == 0 + || strcmp(var->name, "gl_BackColor") == 0 + || strcmp(var->name, "gl_FrontSecondaryColor") == 0 + || strcmp(var->name, "gl_BackSecondaryColor") == 0 + || strcmp(var->name, "gl_Color") == 0 + || strcmp(var->name, "gl_SecondaryColor") == 0) + && earlier->type == var->type + && earlier->data.mode == var->data.mode) { + earlier->data.interpolation = var->data.interpolation; + + /* Layout qualifiers for gl_FragDepth. */ + } else if ((state->AMD_conservative_depth_enable || + state->ARB_conservative_depth_enable) + && strcmp(var->name, "gl_FragDepth") == 0 + && earlier->type == var->type + && earlier->data.mode == var->data.mode) { + + /** From the AMD_conservative_depth spec: + * Within any shader, the first redeclarations of gl_FragDepth + * must appear before any use of gl_FragDepth. + */ + if (earlier->data.used) { + _mesa_glsl_error(&loc, state, + "the first redeclaration of gl_FragDepth " + "must appear before any use of gl_FragDepth"); + } + + /* Prevent inconsistent redeclaration of depth layout qualifier. */ + if (earlier->data.depth_layout != ir_depth_layout_none + && earlier->data.depth_layout != var->data.depth_layout) { + _mesa_glsl_error(&loc, state, + "gl_FragDepth: depth layout is declared here " + "as '%s, but it was previously declared as " + "'%s'", + depth_layout_string(var->data.depth_layout), + depth_layout_string(earlier->data.depth_layout)); + } + + earlier->data.depth_layout = var->data.depth_layout; + + } else if (allow_all_redeclarations) { + if (earlier->data.mode != var->data.mode) { + _mesa_glsl_error(&loc, state, + "redeclaration of `%s' with incorrect qualifiers", + var->name); + } else if (earlier->type != var->type) { + _mesa_glsl_error(&loc, state, + "redeclaration of `%s' has incorrect type", + var->name); + } + } else { + _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name); + } + + return earlier; +} + +/** + * Generate the IR for an initializer in a variable declaration + */ +ir_rvalue * +process_initializer(ir_variable *var, ast_declaration *decl, + ast_fully_specified_type *type, + exec_list *initializer_instructions, + struct _mesa_glsl_parse_state *state) +{ + ir_rvalue *result = NULL; + + YYLTYPE initializer_loc = decl->initializer->get_location(); + + /* From page 24 (page 30 of the PDF) of the GLSL 1.10 spec: + * + * "All uniform variables are read-only and are initialized either + * directly by an application via API commands, or indirectly by + * OpenGL." + */ + if (var->data.mode == ir_var_uniform) { + state->check_version(120, 0, &initializer_loc, + "cannot initialize uniform %s", + var->name); + } + + /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec: + * + * "Buffer variables cannot have initializers." + */ + if (var->data.mode == ir_var_shader_storage) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize buffer variable %s", + var->name); + } + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "Opaque variables [...] are initialized only through the + * OpenGL API; they cannot be declared with an initializer in a + * shader." + */ + if (var->type->contains_opaque()) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize opaque variable %s", + var->name); + } + + if ((var->data.mode == ir_var_shader_in) && (state->current_function == NULL)) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize %s shader input / %s %s", + _mesa_shader_stage_to_string(state->stage), + (state->stage == MESA_SHADER_VERTEX) + ? "attribute" : "varying", + var->name); + } + + if (var->data.mode == ir_var_shader_out && state->current_function == NULL) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize %s shader output %s", + _mesa_shader_stage_to_string(state->stage), + var->name); + } + + /* If the initializer is an ast_aggregate_initializer, recursively store + * type information from the LHS into it, so that its hir() function can do + * type checking. + */ + if (decl->initializer->oper == ast_aggregate) + _mesa_ast_set_aggregate_type(var->type, decl->initializer); + + ir_dereference *const lhs = new(state) ir_dereference_variable(var); + ir_rvalue *rhs = decl->initializer->hir(initializer_instructions, state); + + /* Calculate the constant value if this is a const or uniform + * declaration. + * + * Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says: + * + * "Declarations of globals without a storage qualifier, or with + * just the const qualifier, may include initializers, in which case + * they will be initialized before the first line of main() is + * executed. Such initializers must be a constant expression." + * + * The same section of the GLSL ES 3.00.4 spec has similar language. + */ + if (type->qualifier.flags.q.constant + || type->qualifier.flags.q.uniform + || (state->es_shader && state->current_function == NULL)) { + ir_rvalue *new_rhs = validate_assignment(state, initializer_loc, + lhs, rhs, true); + if (new_rhs != NULL) { + rhs = new_rhs; + + /* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec + * says: + * + * "A constant expression is one of + * + * ... + * + * - an expression formed by an operator on operands that are + * all constant expressions, including getting an element of + * a constant array, or a field of a constant structure, or + * components of a constant vector. However, the sequence + * operator ( , ) and the assignment operators ( =, +=, ...) + * are not included in the operators that can create a + * constant expression." + * + * Section 12.43 (Sequence operator and constant expressions) says: + * + * "Should the following construct be allowed? + * + * float a[2,3]; + * + * The expression within the brackets uses the sequence operator + * (',') and returns the integer 3 so the construct is declaring + * a single-dimensional array of size 3. In some languages, the + * construct declares a two-dimensional array. It would be + * preferable to make this construct illegal to avoid confusion. + * + * One possibility is to change the definition of the sequence + * operator so that it does not return a constant-expression and + * hence cannot be used to declare an array size. + * + * RESOLUTION: The result of a sequence operator is not a + * constant-expression." + * + * Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec + * contains language almost identical to the section 4.3.3 in the + * GLSL ES 3.00.4 spec. This is a new limitation for these GLSL + * versions. + */ + ir_constant *constant_value = rhs->constant_expression_value(); + if (!constant_value || + (state->is_version(430, 300) && + decl->initializer->has_sequence_subexpression())) { + const char *const variable_mode = + (type->qualifier.flags.q.constant) + ? "const" + : ((type->qualifier.flags.q.uniform) ? "uniform" : "global"); + + /* If ARB_shading_language_420pack is enabled, initializers of + * const-qualified local variables do not have to be constant + * expressions. Const-qualified global variables must still be + * initialized with constant expressions. + */ + if (!state->has_420pack() + || state->current_function == NULL) { + _mesa_glsl_error(& initializer_loc, state, + "initializer of %s variable `%s' must be a " + "constant expression", + variable_mode, + decl->identifier); + if (var->type->is_numeric()) { + /* Reduce cascading errors. */ + var->constant_value = type->qualifier.flags.q.constant + ? ir_constant::zero(state, var->type) : NULL; + } + } + } else { + rhs = constant_value; + var->constant_value = type->qualifier.flags.q.constant + ? constant_value : NULL; + } + } else { + if (var->type->is_numeric()) { + /* Reduce cascading errors. */ + var->constant_value = type->qualifier.flags.q.constant + ? ir_constant::zero(state, var->type) : NULL; + } + } + } + + if (rhs && !rhs->type->is_error()) { + bool temp = var->data.read_only; + if (type->qualifier.flags.q.constant) + var->data.read_only = false; + + /* Never emit code to initialize a uniform. + */ + const glsl_type *initializer_type; + if (!type->qualifier.flags.q.uniform) { + do_assignment(initializer_instructions, state, + NULL, + lhs, rhs, + &result, true, + true, + type->get_location()); + initializer_type = result->type; + } else + initializer_type = rhs->type; + + var->constant_initializer = rhs->constant_expression_value(); + var->data.has_initializer = true; + + /* If the declared variable is an unsized array, it must inherrit + * its full type from the initializer. A declaration such as + * + * uniform float a[] = float[](1.0, 2.0, 3.0, 3.0); + * + * becomes + * + * uniform float a[4] = float[](1.0, 2.0, 3.0, 3.0); + * + * The assignment generated in the if-statement (below) will also + * automatically handle this case for non-uniforms. + * + * If the declared variable is not an array, the types must + * already match exactly. As a result, the type assignment + * here can be done unconditionally. For non-uniforms the call + * to do_assignment can change the type of the initializer (via + * the implicit conversion rules). For uniforms the initializer + * must be a constant expression, and the type of that expression + * was validated above. + */ + var->type = initializer_type; + + var->data.read_only = temp; + } + + return result; +} + +static void +validate_layout_qualifier_vertex_count(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var, + unsigned num_vertices, + unsigned *size, + const char *var_category) +{ + if (var->type->is_unsized_array()) { + /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec says: + * + * All geometry shader input unsized array declarations will be + * sized by an earlier input layout qualifier, when present, as per + * the following table. + * + * Followed by a table mapping each allowed input layout qualifier to + * the corresponding input length. + * + * Similarly for tessellation control shader outputs. + */ + if (num_vertices != 0) + var->type = glsl_type::get_array_instance(var->type->fields.array, + num_vertices); + } else { + /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec + * includes the following examples of compile-time errors: + * + * // code sequence within one shader... + * in vec4 Color1[]; // size unknown + * ...Color1.length()...// illegal, length() unknown + * in vec4 Color2[2]; // size is 2 + * ...Color1.length()...// illegal, Color1 still has no size + * in vec4 Color3[3]; // illegal, input sizes are inconsistent + * layout(lines) in; // legal, input size is 2, matching + * in vec4 Color4[3]; // illegal, contradicts layout + * ... + * + * To detect the case illustrated by Color3, we verify that the size of + * an explicitly-sized array matches the size of any previously declared + * explicitly-sized array. To detect the case illustrated by Color4, we + * verify that the size of an explicitly-sized array is consistent with + * any previously declared input layout. + */ + if (num_vertices != 0 && var->type->length != num_vertices) { + _mesa_glsl_error(&loc, state, + "%s size contradicts previously declared layout " + "(size is %u, but layout requires a size of %u)", + var_category, var->type->length, num_vertices); + } else if (*size != 0 && var->type->length != *size) { + _mesa_glsl_error(&loc, state, + "%s sizes are inconsistent (size is %u, but a " + "previous declaration has size %u)", + var_category, var->type->length, *size); + } else { + *size = var->type->length; + } + } +} + +static void +handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var) +{ + unsigned num_vertices = 0; + + if (state->tcs_output_vertices_specified) { + if (!state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", + &num_vertices, false)) { + return; + } + + if (num_vertices > state->Const.MaxPatchVertices) { + _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " + "GL_MAX_PATCH_VERTICES", num_vertices); + return; + } + } + + if (!var->type->is_array() && !var->data.patch) { + _mesa_glsl_error(&loc, state, + "tessellation control shader outputs must be arrays"); + + /* To avoid cascading failures, short circuit the checks below. */ + return; + } + + if (var->data.patch) + return; + + validate_layout_qualifier_vertex_count(state, loc, var, num_vertices, + &state->tcs_output_size, + "tessellation control shader output"); +} + +/** + * Do additional processing necessary for tessellation control/evaluation shader + * input declarations. This covers both interface block arrays and bare input + * variables. + */ +static void +handle_tess_shader_input_decl(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var) +{ + if (!var->type->is_array() && !var->data.patch) { + _mesa_glsl_error(&loc, state, + "per-vertex tessellation shader inputs must be arrays"); + /* Avoid cascading failures. */ + return; + } + + if (var->data.patch) + return; + + /* Unsized arrays are implicitly sized to gl_MaxPatchVertices. */ + if (var->type->is_unsized_array()) { + var->type = glsl_type::get_array_instance(var->type->fields.array, + state->Const.MaxPatchVertices); + } +} + + +/** + * Do additional processing necessary for geometry shader input declarations + * (this covers both interface blocks arrays and bare input variables). + */ +static void +handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var) +{ + unsigned num_vertices = 0; + + if (state->gs_input_prim_type_specified) { + num_vertices = vertices_per_prim(state->in_qualifier->prim_type); + } + + /* Geometry shader input variables must be arrays. Caller should have + * reported an error for this. + */ + if (!var->type->is_array()) { + assert(state->error); + + /* To avoid cascading failures, short circuit the checks below. */ + return; + } + + validate_layout_qualifier_vertex_count(state, loc, var, num_vertices, + &state->gs_input_size, + "geometry shader input"); +} + +void +validate_identifier(const char *identifier, YYLTYPE loc, + struct _mesa_glsl_parse_state *state) +{ + /* From page 15 (page 21 of the PDF) of the GLSL 1.10 spec, + * + * "Identifiers starting with "gl_" are reserved for use by + * OpenGL, and may not be declared in a shader as either a + * variable or a function." + */ + if (is_gl_identifier(identifier)) { + _mesa_glsl_error(&loc, state, + "identifier `%s' uses reserved `gl_' prefix", + identifier); + } else if (strstr(identifier, "__")) { + /* From page 14 (page 20 of the PDF) of the GLSL 1.10 + * spec: + * + * "In addition, all identifiers containing two + * consecutive underscores (__) are reserved as + * possible future keywords." + * + * The intention is that names containing __ are reserved for internal + * use by the implementation, and names prefixed with GL_ are reserved + * for use by Khronos. Names simply containing __ are dangerous to use, + * but should be allowed. + * + * A future version of the GLSL specification will clarify this. + */ + _mesa_glsl_warning(&loc, state, + "identifier `%s' uses reserved `__' string", + identifier); + } +} + +ir_rvalue * +ast_declarator_list::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + const struct glsl_type *decl_type; + const char *type_name = NULL; + ir_rvalue *result = NULL; + YYLTYPE loc = this->get_location(); + + /* From page 46 (page 52 of the PDF) of the GLSL 1.50 spec: + * + * "To ensure that a particular output variable is invariant, it is + * necessary to use the invariant qualifier. It can either be used to + * qualify a previously declared variable as being invariant + * + * invariant gl_Position; // make existing gl_Position be invariant" + * + * In these cases the parser will set the 'invariant' flag in the declarator + * list, and the type will be NULL. + */ + if (this->invariant) { + assert(this->type == NULL); + + if (state->current_function != NULL) { + _mesa_glsl_error(& loc, state, + "all uses of `invariant' keyword must be at global " + "scope"); + } + + foreach_list_typed (ast_declaration, decl, link, &this->declarations) { + assert(decl->array_specifier == NULL); + assert(decl->initializer == NULL); + + ir_variable *const earlier = + state->symbols->get_variable(decl->identifier); + if (earlier == NULL) { + _mesa_glsl_error(& loc, state, + "undeclared variable `%s' cannot be marked " + "invariant", decl->identifier); + } else if (!is_varying_var(earlier, state->stage)) { + _mesa_glsl_error(&loc, state, + "`%s' cannot be marked invariant; interfaces between " + "shader stages only.", decl->identifier); + } else if (earlier->data.used) { + _mesa_glsl_error(& loc, state, + "variable `%s' may not be redeclared " + "`invariant' after being used", + earlier->name); + } else { + earlier->data.invariant = true; + } + } + + /* Invariant redeclarations do not have r-values. + */ + return NULL; + } + + if (this->precise) { + assert(this->type == NULL); + + foreach_list_typed (ast_declaration, decl, link, &this->declarations) { + assert(decl->array_specifier == NULL); + assert(decl->initializer == NULL); + + ir_variable *const earlier = + state->symbols->get_variable(decl->identifier); + if (earlier == NULL) { + _mesa_glsl_error(& loc, state, + "undeclared variable `%s' cannot be marked " + "precise", decl->identifier); + } else if (state->current_function != NULL && + !state->symbols->name_declared_this_scope(decl->identifier)) { + /* Note: we have to check if we're in a function, since + * builtins are treated as having come from another scope. + */ + _mesa_glsl_error(& loc, state, + "variable `%s' from an outer scope may not be " + "redeclared `precise' in this scope", + earlier->name); + } else if (earlier->data.used) { + _mesa_glsl_error(& loc, state, + "variable `%s' may not be redeclared " + "`precise' after being used", + earlier->name); + } else { + earlier->data.precise = true; + } + } + + /* Precise redeclarations do not have r-values either. */ + return NULL; + } + + assert(this->type != NULL); + assert(!this->invariant); + assert(!this->precise); + + /* The type specifier may contain a structure definition. Process that + * before any of the variable declarations. + */ + (void) this->type->specifier->hir(instructions, state); + + decl_type = this->type->glsl_type(& type_name, state); + + /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec: + * "Buffer variables may only be declared inside interface blocks + * (section 4.3.9 “Interface Blocks”), which are then referred to as + * shader storage blocks. It is a compile-time error to declare buffer + * variables at global scope (outside a block)." + */ + if (type->qualifier.flags.q.buffer && !decl_type->is_interface()) { + _mesa_glsl_error(&loc, state, + "buffer variables cannot be declared outside " + "interface blocks"); + } + + /* An offset-qualified atomic counter declaration sets the default + * offset for the next declaration within the same atomic counter + * buffer. + */ + if (decl_type && decl_type->contains_atomic()) { + if (type->qualifier.flags.q.explicit_binding && + type->qualifier.flags.q.explicit_offset) { + unsigned qual_binding; + unsigned qual_offset; + if (process_qualifier_constant(state, &loc, "binding", + type->qualifier.binding, + &qual_binding) + && process_qualifier_constant(state, &loc, "offset", + type->qualifier.offset, + &qual_offset)) { + state->atomic_counter_offsets[qual_binding] = qual_offset; + } + } + } + + if (this->declarations.is_empty()) { + /* If there is no structure involved in the program text, there are two + * possible scenarios: + * + * - The program text contained something like 'vec4;'. This is an + * empty declaration. It is valid but weird. Emit a warning. + * + * - The program text contained something like 'S;' and 'S' is not the + * name of a known structure type. This is both invalid and weird. + * Emit an error. + * + * - The program text contained something like 'mediump float;' + * when the programmer probably meant 'precision mediump + * float;' Emit a warning with a description of what they + * probably meant to do. + * + * Note that if decl_type is NULL and there is a structure involved, + * there must have been some sort of error with the structure. In this + * case we assume that an error was already generated on this line of + * code for the structure. There is no need to generate an additional, + * confusing error. + */ + assert(this->type->specifier->structure == NULL || decl_type != NULL + || state->error); + + if (decl_type == NULL) { + _mesa_glsl_error(&loc, state, + "invalid type `%s' in empty declaration", + type_name); + } else if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) { + /* Empty atomic counter declarations are allowed and useful + * to set the default offset qualifier. + */ + return NULL; + } else if (this->type->qualifier.precision != ast_precision_none) { + if (this->type->specifier->structure != NULL) { + _mesa_glsl_error(&loc, state, + "precision qualifiers can't be applied " + "to structures"); + } else { + static const char *const precision_names[] = { + "highp", + "highp", + "mediump", + "lowp" + }; + + _mesa_glsl_warning(&loc, state, + "empty declaration with precision qualifier, " + "to set the default precision, use " + "`precision %s %s;'", + precision_names[this->type->qualifier.precision], + type_name); + } + } else if (this->type->specifier->structure == NULL) { + _mesa_glsl_warning(&loc, state, "empty declaration"); + } + } + + foreach_list_typed (ast_declaration, decl, link, &this->declarations) { + const struct glsl_type *var_type; + ir_variable *var; + const char *identifier = decl->identifier; + /* FINISHME: Emit a warning if a variable declaration shadows a + * FINISHME: declaration at a higher scope. + */ + + if ((decl_type == NULL) || decl_type->is_void()) { + if (type_name != NULL) { + _mesa_glsl_error(& loc, state, + "invalid type `%s' in declaration of `%s'", + type_name, decl->identifier); + } else { + _mesa_glsl_error(& loc, state, + "invalid type in declaration of `%s'", + decl->identifier); + } + continue; + } + + if (this->type->qualifier.flags.q.subroutine) { + const glsl_type *t; + const char *name; + + t = state->symbols->get_type(this->type->specifier->type_name); + if (!t) + _mesa_glsl_error(& loc, state, + "invalid type in declaration of `%s'", + decl->identifier); + name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), decl->identifier); + + identifier = name; + + } + var_type = process_array_type(&loc, decl_type, decl->array_specifier, + state); + + var = new(ctx) ir_variable(var_type, identifier, ir_var_auto); + + /* The 'varying in' and 'varying out' qualifiers can only be used with + * ARB_geometry_shader4 and EXT_geometry_shader4, which we don't support + * yet. + */ + if (this->type->qualifier.flags.q.varying) { + if (this->type->qualifier.flags.q.in) { + _mesa_glsl_error(& loc, state, + "`varying in' qualifier in declaration of " + "`%s' only valid for geometry shaders using " + "ARB_geometry_shader4 or EXT_geometry_shader4", + decl->identifier); + } else if (this->type->qualifier.flags.q.out) { + _mesa_glsl_error(& loc, state, + "`varying out' qualifier in declaration of " + "`%s' only valid for geometry shaders using " + "ARB_geometry_shader4 or EXT_geometry_shader4", + decl->identifier); + } + } + + /* From page 22 (page 28 of the PDF) of the GLSL 1.10 specification; + * + * "Global variables can only use the qualifiers const, + * attribute, uniform, or varying. Only one may be + * specified. + * + * Local variables can only use the qualifier const." + * + * This is relaxed in GLSL 1.30 and GLSL ES 3.00. It is also relaxed by + * any extension that adds the 'layout' keyword. + */ + if (!state->is_version(130, 300) + && !state->has_explicit_attrib_location() + && !state->has_separate_shader_objects() + && !state->ARB_fragment_coord_conventions_enable) { + if (this->type->qualifier.flags.q.out) { + _mesa_glsl_error(& loc, state, + "`out' qualifier in declaration of `%s' " + "only valid for function parameters in %s", + decl->identifier, state->get_version_string()); + } + if (this->type->qualifier.flags.q.in) { + _mesa_glsl_error(& loc, state, + "`in' qualifier in declaration of `%s' " + "only valid for function parameters in %s", + decl->identifier, state->get_version_string()); + } + /* FINISHME: Test for other invalid qualifiers. */ + } + + apply_type_qualifier_to_variable(& this->type->qualifier, var, state, + & loc, false); + apply_layout_qualifier_to_variable(&this->type->qualifier, var, state, + &loc); + + if (this->type->qualifier.flags.q.invariant) { + if (!is_varying_var(var, state->stage)) { + _mesa_glsl_error(&loc, state, + "`%s' cannot be marked invariant; interfaces between " + "shader stages only", var->name); + } + } + + if (state->current_function != NULL) { + const char *mode = NULL; + const char *extra = ""; + + /* There is no need to check for 'inout' here because the parser will + * only allow that in function parameter lists. + */ + if (this->type->qualifier.flags.q.attribute) { + mode = "attribute"; + } else if (this->type->qualifier.flags.q.subroutine) { + mode = "subroutine uniform"; + } else if (this->type->qualifier.flags.q.uniform) { + mode = "uniform"; + } else if (this->type->qualifier.flags.q.varying) { + mode = "varying"; + } else if (this->type->qualifier.flags.q.in) { + mode = "in"; + extra = " or in function parameter list"; + } else if (this->type->qualifier.flags.q.out) { + mode = "out"; + extra = " or in function parameter list"; + } + + if (mode) { + _mesa_glsl_error(& loc, state, + "%s variable `%s' must be declared at " + "global scope%s", + mode, var->name, extra); + } + } else if (var->data.mode == ir_var_shader_in) { + var->data.read_only = true; + + if (state->stage == MESA_SHADER_VERTEX) { + bool error_emitted = false; + + /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec: + * + * "Vertex shader inputs can only be float, floating-point + * vectors, matrices, signed and unsigned integers and integer + * vectors. Vertex shader inputs can also form arrays of these + * types, but not structures." + * + * From page 31 (page 27 of the PDF) of the GLSL 1.30 spec: + * + * "Vertex shader inputs can only be float, floating-point + * vectors, matrices, signed and unsigned integers and integer + * vectors. They cannot be arrays or structures." + * + * From page 23 (page 29 of the PDF) of the GLSL 1.20 spec: + * + * "The attribute qualifier can be used only with float, + * floating-point vectors, and matrices. Attribute variables + * cannot be declared as arrays or structures." + * + * From page 33 (page 39 of the PDF) of the GLSL ES 3.00 spec: + * + * "Vertex shader inputs can only be float, floating-point + * vectors, matrices, signed and unsigned integers and integer + * vectors. Vertex shader inputs cannot be arrays or + * structures." + */ + const glsl_type *check_type = var->type->without_array(); + + switch (check_type->base_type) { + case GLSL_TYPE_FLOAT: + break; + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + if (state->is_version(120, 300)) + break; + case GLSL_TYPE_DOUBLE: + if (check_type->base_type == GLSL_TYPE_DOUBLE && (state->is_version(410, 0) || state->ARB_vertex_attrib_64bit_enable)) + break; + /* FALLTHROUGH */ + default: + _mesa_glsl_error(& loc, state, + "vertex shader input / attribute cannot have " + "type %s`%s'", + var->type->is_array() ? "array of " : "", + check_type->name); + error_emitted = true; + } + + if (!error_emitted && var->type->is_array() && + !state->check_version(150, 0, &loc, + "vertex shader input / attribute " + "cannot have array type")) { + error_emitted = true; + } + } else if (state->stage == MESA_SHADER_GEOMETRY) { + /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec: + * + * Geometry shader input variables get the per-vertex values + * written out by vertex shader output variables of the same + * names. Since a geometry shader operates on a set of + * vertices, each input varying variable (or input block, see + * interface blocks below) needs to be declared as an array. + */ + if (!var->type->is_array()) { + _mesa_glsl_error(&loc, state, + "geometry shader inputs must be arrays"); + } + + handle_geometry_shader_input_decl(state, loc, var); + } else if (state->stage == MESA_SHADER_FRAGMENT) { + /* From section 4.3.4 (Input Variables) of the GLSL ES 3.10 spec: + * + * It is a compile-time error to declare a fragment shader + * input with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * An array of arrays + * * An array of structures + * * A structure containing an array + * * A structure containing a structure + */ + if (state->es_shader) { + const glsl_type *check_type = var->type->without_array(); + if (check_type->is_boolean() || + check_type->contains_opaque()) { + _mesa_glsl_error(&loc, state, + "fragment shader input cannot have type %s", + check_type->name); + } + if (var->type->is_array() && + var->type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "%s shader output " + "cannot have an array of arrays", + _mesa_shader_stage_to_string(state->stage)); + } + if (var->type->is_array() && + var->type->fields.array->is_record()) { + _mesa_glsl_error(&loc, state, + "fragment shader input " + "cannot have an array of structs"); + } + if (var->type->is_record()) { + for (unsigned i = 0; i < var->type->length; i++) { + if (var->type->fields.structure[i].type->is_array() || + var->type->fields.structure[i].type->is_record()) + _mesa_glsl_error(&loc, state, + "fragement shader input cannot have " + "a struct that contains an " + "array or struct"); + } + } + } + } else if (state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) { + handle_tess_shader_input_decl(state, loc, var); + } + } else if (var->data.mode == ir_var_shader_out) { + const glsl_type *check_type = var->type->without_array(); + + /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec: + * + * It is a compile-time error to declare a vertex, tessellation + * evaluation, tessellation control, or geometry shader output + * that contains any of the following: + * + * * A Boolean type (bool, bvec2 ...) + * * An opaque type + */ + if (check_type->is_boolean() || check_type->contains_opaque()) + _mesa_glsl_error(&loc, state, + "%s shader output cannot have type %s", + _mesa_shader_stage_to_string(state->stage), + check_type->name); + + /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec: + * + * It is a compile-time error to declare a fragment shader output + * that contains any of the following: + * + * * A Boolean type (bool, bvec2 ...) + * * A double-precision scalar or vector (double, dvec2 ...) + * * An opaque type + * * Any matrix type + * * A structure + */ + if (state->stage == MESA_SHADER_FRAGMENT) { + if (check_type->is_record() || check_type->is_matrix()) + _mesa_glsl_error(&loc, state, + "fragment shader output " + "cannot have struct or matrix type"); + switch (check_type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + break; + default: + _mesa_glsl_error(&loc, state, + "fragment shader output cannot have " + "type %s", check_type->name); + } + } + + /* From section 4.3.6 (Output Variables) of the GLSL ES 3.10 spec: + * + * It is a compile-time error to declare a vertex shader output + * with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * An array of arrays + * * An array of structures + * * A structure containing an array + * * A structure containing a structure + * + * It is a compile-time error to declare a fragment shader output + * with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * A matrix + * * A structure + * * An array of array + */ + if (state->es_shader) { + if (var->type->is_array() && + var->type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "%s shader output " + "cannot have an array of arrays", + _mesa_shader_stage_to_string(state->stage)); + } + if (state->stage == MESA_SHADER_VERTEX) { + if (var->type->is_array() && + var->type->fields.array->is_record()) { + _mesa_glsl_error(&loc, state, + "vertex shader output " + "cannot have an array of structs"); + } + if (var->type->is_record()) { + for (unsigned i = 0; i < var->type->length; i++) { + if (var->type->fields.structure[i].type->is_array() || + var->type->fields.structure[i].type->is_record()) + _mesa_glsl_error(&loc, state, + "vertex shader output cannot have a " + "struct that contains an " + "array or struct"); + } + } + } + } + + if (state->stage == MESA_SHADER_TESS_CTRL) { + handle_tess_ctrl_shader_output_decl(state, loc, var); + } + } else if (var->type->contains_subroutine()) { + /* declare subroutine uniforms as hidden */ + var->data.how_declared = ir_var_hidden; + } + + /* Integer fragment inputs must be qualified with 'flat'. In GLSL ES, + * so must integer vertex outputs. + * + * From section 4.3.4 ("Inputs") of the GLSL 1.50 spec: + * "Fragment shader inputs that are signed or unsigned integers or + * integer vectors must be qualified with the interpolation qualifier + * flat." + * + * From section 4.3.4 ("Input Variables") of the GLSL 3.00 ES spec: + * "Fragment shader inputs that are, or contain, signed or unsigned + * integers or integer vectors must be qualified with the + * interpolation qualifier flat." + * + * From section 4.3.6 ("Output Variables") of the GLSL 3.00 ES spec: + * "Vertex shader outputs that are, or contain, signed or unsigned + * integers or integer vectors must be qualified with the + * interpolation qualifier flat." + * + * Note that prior to GLSL 1.50, this requirement applied to vertex + * outputs rather than fragment inputs. That creates problems in the + * presence of geometry shaders, so we adopt the GLSL 1.50 rule for all + * desktop GL shaders. For GLSL ES shaders, we follow the spec and + * apply the restriction to both vertex outputs and fragment inputs. + * + * Note also that the desktop GLSL specs are missing the text "or + * contain"; this is presumably an oversight, since there is no + * reasonable way to interpolate a fragment shader input that contains + * an integer. + */ + if (state->is_version(130, 300) && + var->type->contains_integer() && + var->data.interpolation != INTERP_QUALIFIER_FLAT && + ((state->stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_in) + || (state->stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_out + && state->es_shader))) { + const char *var_type = (state->stage == MESA_SHADER_VERTEX) ? + "vertex output" : "fragment input"; + _mesa_glsl_error(&loc, state, "if a %s is (or contains) " + "an integer, then it must be qualified with 'flat'", + var_type); + } + + /* Double fragment inputs must be qualified with 'flat'. */ + if (var->type->contains_double() && + var->data.interpolation != INTERP_QUALIFIER_FLAT && + state->stage == MESA_SHADER_FRAGMENT && + var->data.mode == ir_var_shader_in) { + _mesa_glsl_error(&loc, state, "if a fragment input is (or contains) " + "a double, then it must be qualified with 'flat'", + var_type); + } + + /* Interpolation qualifiers cannot be applied to 'centroid' and + * 'centroid varying'. + * + * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec: + * "interpolation qualifiers may only precede the qualifiers in, + * centroid in, out, or centroid out in a declaration. They do not apply + * to the deprecated storage qualifiers varying or centroid varying." + * + * These deprecated storage qualifiers do not exist in GLSL ES 3.00. + */ + if (state->is_version(130, 0) + && this->type->qualifier.has_interpolation() + && this->type->qualifier.flags.q.varying) { + + const char *i = this->type->qualifier.interpolation_string(); + assert(i != NULL); + const char *s; + if (this->type->qualifier.flags.q.centroid) + s = "centroid varying"; + else + s = "varying"; + + _mesa_glsl_error(&loc, state, + "qualifier '%s' cannot be applied to the " + "deprecated storage qualifier '%s'", i, s); + } + + + /* Interpolation qualifiers can only apply to vertex shader outputs and + * fragment shader inputs. + * + * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec: + * "Outputs from a vertex shader (out) and inputs to a fragment + * shader (in) can be further qualified with one or more of these + * interpolation qualifiers" + * + * From page 31 (page 37 of the PDF) of the GLSL ES 3.00 spec: + * "These interpolation qualifiers may only precede the qualifiers + * in, centroid in, out, or centroid out in a declaration. They do + * not apply to inputs into a vertex shader or outputs from a + * fragment shader." + */ + if (state->is_version(130, 300) + && this->type->qualifier.has_interpolation()) { + + const char *i = this->type->qualifier.interpolation_string(); + assert(i != NULL); + + switch (state->stage) { + case MESA_SHADER_VERTEX: + if (this->type->qualifier.flags.q.in) { + _mesa_glsl_error(&loc, state, + "qualifier '%s' cannot be applied to vertex " + "shader inputs", i); + } + break; + case MESA_SHADER_FRAGMENT: + if (this->type->qualifier.flags.q.out) { + _mesa_glsl_error(&loc, state, + "qualifier '%s' cannot be applied to fragment " + "shader outputs", i); + } + break; + default: + break; + } + } + + + /* From section 4.3.4 of the GLSL 4.00 spec: + * "Input variables may not be declared using the patch in qualifier + * in tessellation control or geometry shaders." + * + * From section 4.3.6 of the GLSL 4.00 spec: + * "It is an error to use patch out in a vertex, tessellation + * evaluation, or geometry shader." + * + * This doesn't explicitly forbid using them in a fragment shader, but + * that's probably just an oversight. + */ + if (state->stage != MESA_SHADER_TESS_EVAL + && this->type->qualifier.flags.q.patch + && this->type->qualifier.flags.q.in) { + + _mesa_glsl_error(&loc, state, "'patch in' can only be used in a " + "tessellation evaluation shader"); + } + + if (state->stage != MESA_SHADER_TESS_CTRL + && this->type->qualifier.flags.q.patch + && this->type->qualifier.flags.q.out) { + + _mesa_glsl_error(&loc, state, "'patch out' can only be used in a " + "tessellation control shader"); + } + + /* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30. + */ + if (this->type->qualifier.precision != ast_precision_none) { + state->check_precision_qualifiers_allowed(&loc); + } + + + /* If a precision qualifier is allowed on a type, it is allowed on + * an array of that type. + */ + if (!(this->type->qualifier.precision == ast_precision_none + || precision_qualifier_allowed(var->type->without_array()))) { + + _mesa_glsl_error(&loc, state, + "precision qualifiers apply only to floating point" + ", integer and opaque types"); + } + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "[Opaque types] can only be declared as function + * parameters or uniform-qualified variables." + */ + if (var_type->contains_opaque() && + !this->type->qualifier.flags.q.uniform) { + _mesa_glsl_error(&loc, state, + "opaque variables must be declared uniform"); + } + + /* Process the initializer and add its instructions to a temporary + * list. This list will be added to the instruction stream (below) after + * the declaration is added. This is done because in some cases (such as + * redeclarations) the declaration may not actually be added to the + * instruction stream. + */ + exec_list initializer_instructions; + + /* Examine var name here since var may get deleted in the next call */ + bool var_is_gl_id = is_gl_identifier(var->name); + + ir_variable *earlier = + get_variable_being_redeclared(var, decl->get_location(), state, + false /* allow_all_redeclarations */); + if (earlier != NULL) { + if (var_is_gl_id && + earlier->data.how_declared == ir_var_declared_in_block) { + _mesa_glsl_error(&loc, state, + "`%s' has already been redeclared using " + "gl_PerVertex", earlier->name); + } + earlier->data.how_declared = ir_var_declared_normally; + } + + if (decl->initializer != NULL) { + result = process_initializer((earlier == NULL) ? var : earlier, + decl, this->type, + &initializer_instructions, state); + } else { + validate_array_dimensions(var_type, state, &loc); + } + + /* From page 23 (page 29 of the PDF) of the GLSL 1.10 spec: + * + * "It is an error to write to a const variable outside of + * its declaration, so they must be initialized when + * declared." + */ + if (this->type->qualifier.flags.q.constant && decl->initializer == NULL) { + _mesa_glsl_error(& loc, state, + "const declaration of `%s' must be initialized", + decl->identifier); + } + + if (state->es_shader) { + const glsl_type *const t = (earlier == NULL) + ? var->type : earlier->type; + + if (t->is_unsized_array()) + /* Section 10.17 of the GLSL ES 1.00 specification states that + * unsized array declarations have been removed from the language. + * Arrays that are sized using an initializer are still explicitly + * sized. However, GLSL ES 1.00 does not allow array + * initializers. That is only allowed in GLSL ES 3.00. + * + * Section 4.1.9 (Arrays) of the GLSL ES 3.00 spec says: + * + * "An array type can also be formed without specifying a size + * if the definition includes an initializer: + * + * float x[] = float[2] (1.0, 2.0); // declares an array of size 2 + * float y[] = float[] (1.0, 2.0, 3.0); // declares an array of size 3 + * + * float a[5]; + * float b[] = a;" + */ + _mesa_glsl_error(& loc, state, + "unsized array declarations are not allowed in " + "GLSL ES"); + } + + /* If the declaration is not a redeclaration, there are a few additional + * semantic checks that must be applied. In addition, variable that was + * created for the declaration should be added to the IR stream. + */ + if (earlier == NULL) { + validate_identifier(decl->identifier, loc, state); + + /* Add the variable to the symbol table. Note that the initializer's + * IR was already processed earlier (though it hasn't been emitted + * yet), without the variable in scope. + * + * This differs from most C-like languages, but it follows the GLSL + * specification. From page 28 (page 34 of the PDF) of the GLSL 1.50 + * spec: + * + * "Within a declaration, the scope of a name starts immediately + * after the initializer if present or immediately after the name + * being declared if not." + */ + if (!state->symbols->add_variable(var)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, "name `%s' already taken in the " + "current scope", decl->identifier); + continue; + } + + /* Push the variable declaration to the top. It means that all the + * variable declarations will appear in a funny last-to-first order, + * but otherwise we run into trouble if a function is prototyped, a + * global var is decled, then the function is defined with usage of + * the global var. See glslparsertest's CorrectModule.frag. + */ + instructions->push_head(var); + } + + instructions->append_list(&initializer_instructions); + } + + + /* Generally, variable declarations do not have r-values. However, + * one is used for the declaration in + * + * while (bool b = some_condition()) { + * ... + * } + * + * so we return the rvalue from the last seen declaration here. + */ + return result; +} + + +ir_rvalue * +ast_parameter_declarator::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + const struct glsl_type *type; + const char *name = NULL; + YYLTYPE loc = this->get_location(); + + type = this->type->glsl_type(& name, state); + + if (type == NULL) { + if (name != NULL) { + _mesa_glsl_error(& loc, state, + "invalid type `%s' in declaration of `%s'", + name, this->identifier); + } else { + _mesa_glsl_error(& loc, state, + "invalid type in declaration of `%s'", + this->identifier); + } + + type = glsl_type::error_type; + } + + /* From page 62 (page 68 of the PDF) of the GLSL 1.50 spec: + * + * "Functions that accept no input arguments need not use void in the + * argument list because prototypes (or definitions) are required and + * therefore there is no ambiguity when an empty argument list "( )" is + * declared. The idiom "(void)" as a parameter list is provided for + * convenience." + * + * Placing this check here prevents a void parameter being set up + * for a function, which avoids tripping up checks for main taking + * parameters and lookups of an unnamed symbol. + */ + if (type->is_void()) { + if (this->identifier != NULL) + _mesa_glsl_error(& loc, state, + "named parameter cannot have type `void'"); + + is_void = true; + return NULL; + } + + if (formal_parameter && (this->identifier == NULL)) { + _mesa_glsl_error(& loc, state, "formal parameter lacks a name"); + return NULL; + } + + /* This only handles "vec4 foo[..]". The earlier specifier->glsl_type(...) + * call already handled the "vec4[..] foo" case. + */ + type = process_array_type(&loc, type, this->array_specifier, state); + + if (!type->is_error() && type->is_unsized_array()) { + _mesa_glsl_error(&loc, state, "arrays passed as parameters must have " + "a declared size"); + type = glsl_type::error_type; + } + + is_void = false; + ir_variable *var = new(ctx) + ir_variable(type, this->identifier, ir_var_function_in); + + /* Apply any specified qualifiers to the parameter declaration. Note that + * for function parameters the default mode is 'in'. + */ + apply_type_qualifier_to_variable(& this->type->qualifier, var, state, & loc, + true); + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "Opaque variables cannot be treated as l-values; hence cannot + * be used as out or inout function parameters, nor can they be + * assigned into." + */ + if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out) + && type->contains_opaque()) { + _mesa_glsl_error(&loc, state, "out and inout parameters cannot " + "contain opaque variables"); + type = glsl_type::error_type; + } + + /* From page 39 (page 45 of the PDF) of the GLSL 1.10 spec: + * + * "When calling a function, expressions that do not evaluate to + * l-values cannot be passed to parameters declared as out or inout." + * + * From page 32 (page 38 of the PDF) of the GLSL 1.10 spec: + * + * "Other binary or unary expressions, non-dereferenced arrays, + * function names, swizzles with repeated fields, and constants + * cannot be l-values." + * + * So for GLSL 1.10, passing an array as an out or inout parameter is not + * allowed. This restriction is removed in GLSL 1.20, and in GLSL ES. + */ + if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out) + && type->is_array() + && !state->check_version(120, 100, &loc, + "arrays cannot be out or inout parameters")) { + type = glsl_type::error_type; + } + + instructions->push_tail(var); + + /* Parameter declarations do not have r-values. + */ + return NULL; +} + + +void +ast_parameter_declarator::parameters_to_hir(exec_list *ast_parameters, + bool formal, + exec_list *ir_parameters, + _mesa_glsl_parse_state *state) +{ + ast_parameter_declarator *void_param = NULL; + unsigned count = 0; + + foreach_list_typed (ast_parameter_declarator, param, link, ast_parameters) { + param->formal_parameter = formal; + param->hir(ir_parameters, state); + + if (param->is_void) + void_param = param; + + count++; + } + + if ((void_param != NULL) && (count > 1)) { + YYLTYPE loc = void_param->get_location(); + + _mesa_glsl_error(& loc, state, + "`void' parameter must be only parameter"); + } +} + + +void +emit_function(_mesa_glsl_parse_state *state, ir_function *f) +{ + /* IR invariants disallow function declarations or definitions + * nested within other function definitions. But there is no + * requirement about the relative order of function declarations + * and definitions with respect to one another. So simply insert + * the new ir_function block at the end of the toplevel instruction + * list. + */ + state->toplevel_ir->push_tail(f); +} + + +ir_rvalue * +ast_function::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + ir_function *f = NULL; + ir_function_signature *sig = NULL; + exec_list hir_parameters; + YYLTYPE loc = this->get_location(); + + const char *const name = identifier; + + /* New functions are always added to the top-level IR instruction stream, + * so this instruction list pointer is ignored. See also emit_function + * (called below). + */ + (void) instructions; + + /* From page 21 (page 27 of the PDF) of the GLSL 1.20 spec, + * + * "Function declarations (prototypes) cannot occur inside of functions; + * they must be at global scope, or for the built-in functions, outside + * the global scope." + * + * From page 27 (page 33 of the PDF) of the GLSL ES 1.00.16 spec, + * + * "User defined functions may only be defined within the global scope." + * + * Note that this language does not appear in GLSL 1.10. + */ + if ((state->current_function != NULL) && + state->is_version(120, 100)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "declaration of function `%s' not allowed within " + "function body", name); + } + + validate_identifier(name, this->get_location(), state); + + /* Convert the list of function parameters to HIR now so that they can be + * used below to compare this function's signature with previously seen + * signatures for functions with the same name. + */ + ast_parameter_declarator::parameters_to_hir(& this->parameters, + is_definition, + & hir_parameters, state); + + const char *return_type_name; + const glsl_type *return_type = + this->return_type->glsl_type(& return_type_name, state); + + if (!return_type) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "function `%s' has undeclared return type `%s'", + name, return_type_name); + return_type = glsl_type::error_type; + } + + /* ARB_shader_subroutine states: + * "Subroutine declarations cannot be prototyped. It is an error to prepend + * subroutine(...) to a function declaration." + */ + if (this->return_type->qualifier.flags.q.subroutine_def && !is_definition) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "function declaration `%s' cannot have subroutine prepended", + name); + } + + /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec: + * "No qualifier is allowed on the return type of a function." + */ + if (this->return_type->has_qualifiers(state)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "function `%s' return type has qualifiers", name); + } + + /* Section 6.1 (Function Definitions) of the GLSL 1.20 spec says: + * + * "Arrays are allowed as arguments and as the return type. In both + * cases, the array must be explicitly sized." + */ + if (return_type->is_unsized_array()) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "function `%s' return type array must be explicitly " + "sized", name); + } + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "[Opaque types] can only be declared as function parameters + * or uniform-qualified variables." + */ + if (return_type->contains_opaque()) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "function `%s' return type can't contain an opaque type", + name); + } + + /* Create an ir_function if one doesn't already exist. */ + f = state->symbols->get_function(name); + if (f == NULL) { + f = new(ctx) ir_function(name); + if (!this->return_type->qualifier.flags.q.subroutine) { + if (!state->symbols->add_function(f)) { + /* This function name shadows a non-function use of the same name. */ + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, "function name `%s' conflicts with " + "non-function", name); + return NULL; + } + } + emit_function(state, f); + } + + /* From GLSL ES 3.0 spec, chapter 6.1 "Function Definitions", page 71: + * + * "A shader cannot redefine or overload built-in functions." + * + * While in GLSL ES 1.0 specification, chapter 8 "Built-in Functions": + * + * "User code can overload the built-in functions but cannot redefine + * them." + */ + if (state->es_shader && state->language_version >= 300) { + /* Local shader has no exact candidates; check the built-ins. */ + _mesa_glsl_initialize_builtin_functions(); + if (_mesa_glsl_find_builtin_function_by_name(name)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "A shader cannot redefine or overload built-in " + "function `%s' in GLSL ES 3.00", name); + return NULL; + } + } + + /* Verify that this function's signature either doesn't match a previously + * seen signature for a function with the same name, or, if a match is found, + * that the previously seen signature does not have an associated definition. + */ + if (state->es_shader || f->has_user_signature()) { + sig = f->exact_matching_signature(state, &hir_parameters); + if (sig != NULL) { + const char *badvar = sig->qualifiers_match(&hir_parameters); + if (badvar != NULL) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(&loc, state, "function `%s' parameter `%s' " + "qualifiers don't match prototype", name, badvar); + } + + if (sig->return_type != return_type) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(&loc, state, "function `%s' return type doesn't " + "match prototype", name); + } + + if (sig->is_defined) { + if (is_definition) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, "function `%s' redefined", name); + } else { + /* We just encountered a prototype that exactly matches a + * function that's already been defined. This is redundant, + * and we should ignore it. + */ + return NULL; + } + } + } + } + + /* Verify the return type of main() */ + if (strcmp(name, "main") == 0) { + if (! return_type->is_void()) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "main() must return void"); + } + + if (!hir_parameters.is_empty()) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "main() must not take any parameters"); + } + } + + /* Finish storing the information about this new function in its signature. + */ + if (sig == NULL) { + sig = new(ctx) ir_function_signature(return_type); + f->add_signature(sig); + } + + sig->replace_parameters(&hir_parameters); + signature = sig; + + if (this->return_type->qualifier.flags.q.subroutine_def) { + int idx; + + if (this->return_type->qualifier.flags.q.explicit_index) { + unsigned qual_index; + if (process_qualifier_constant(state, &loc, "index", + this->return_type->qualifier.index, + &qual_index)) { + if (!state->has_explicit_uniform_location()) { + _mesa_glsl_error(&loc, state, "subroutine index requires " + "GL_ARB_explicit_uniform_location or " + "GLSL 4.30"); + } else if (qual_index >= MAX_SUBROUTINES) { + _mesa_glsl_error(&loc, state, + "invalid subroutine index (%d) index must " + "be a number between 0 and " + "GL_MAX_SUBROUTINES - 1 (%d)", qual_index, + MAX_SUBROUTINES - 1); + } else { + f->subroutine_index = qual_index; + } + } + } + + f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length(); + f->subroutine_types = ralloc_array(state, const struct glsl_type *, + f->num_subroutine_types); + idx = 0; + foreach_list_typed(ast_declaration, decl, link, &this->return_type->qualifier.subroutine_list->declarations) { + const struct glsl_type *type; + /* the subroutine type must be already declared */ + type = state->symbols->get_type(decl->identifier); + if (!type) { + _mesa_glsl_error(& loc, state, "unknown type '%s' in subroutine function definition", decl->identifier); + } + f->subroutine_types[idx++] = type; + } + state->subroutines = (ir_function **)reralloc(state, state->subroutines, + ir_function *, + state->num_subroutines + 1); + state->subroutines[state->num_subroutines] = f; + state->num_subroutines++; + + } + + if (this->return_type->qualifier.flags.q.subroutine) { + if (!state->symbols->add_type(this->identifier, glsl_type::get_subroutine_instance(this->identifier))) { + _mesa_glsl_error(& loc, state, "type '%s' previously defined", this->identifier); + return NULL; + } + state->subroutine_types = (ir_function **)reralloc(state, state->subroutine_types, + ir_function *, + state->num_subroutine_types + 1); + state->subroutine_types[state->num_subroutine_types] = f; + state->num_subroutine_types++; + + f->is_subroutine = true; + } + + /* Function declarations (prototypes) do not have r-values. + */ + return NULL; +} + + +ir_rvalue * +ast_function_definition::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + prototype->is_definition = true; + prototype->hir(instructions, state); + + ir_function_signature *signature = prototype->signature; + if (signature == NULL) + return NULL; + + assert(state->current_function == NULL); + state->current_function = signature; + state->found_return = false; + + /* Duplicate parameters declared in the prototype as concrete variables. + * Add these to the symbol table. + */ + state->symbols->push_scope(); + foreach_in_list(ir_variable, var, &signature->parameters) { + assert(var->as_variable() != NULL); + + /* The only way a parameter would "exist" is if two parameters have + * the same name. + */ + if (state->symbols->name_declared_this_scope(var->name)) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "parameter `%s' redeclared", var->name); + } else { + state->symbols->add_variable(var); + } + } + + /* Convert the body of the function to HIR. */ + this->body->hir(&signature->body, state); + signature->is_defined = true; + + state->symbols->pop_scope(); + + assert(state->current_function == signature); + state->current_function = NULL; + + if (!signature->return_type->is_void() && !state->found_return) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, "function `%s' has non-void return type " + "%s, but no return statement", + signature->function_name(), + signature->return_type->name); + } + + /* Function definitions do not have r-values. + */ + return NULL; +} + + +ir_rvalue * +ast_jump_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + switch (mode) { + case ast_return: { + ir_return *inst; + assert(state->current_function); + + if (opt_return_value) { + ir_rvalue *ret = opt_return_value->hir(instructions, state); + + /* The value of the return type can be NULL if the shader says + * 'return foo();' and foo() is a function that returns void. + * + * NOTE: The GLSL spec doesn't say that this is an error. The type + * of the return value is void. If the return type of the function is + * also void, then this should compile without error. Seriously. + */ + const glsl_type *const ret_type = + (ret == NULL) ? glsl_type::void_type : ret->type; + + /* Implicit conversions are not allowed for return values prior to + * ARB_shading_language_420pack. + */ + if (state->current_function->return_type != ret_type) { + YYLTYPE loc = this->get_location(); + + if (state->has_420pack()) { + if (!apply_implicit_conversion(state->current_function->return_type, + ret, state)) { + _mesa_glsl_error(& loc, state, + "could not implicitly convert return value " + "to %s, in function `%s'", + state->current_function->return_type->name, + state->current_function->function_name()); + } + } else { + _mesa_glsl_error(& loc, state, + "`return' with wrong type %s, in function `%s' " + "returning %s", + ret_type->name, + state->current_function->function_name(), + state->current_function->return_type->name); + } + } else if (state->current_function->return_type->base_type == + GLSL_TYPE_VOID) { + YYLTYPE loc = this->get_location(); + + /* The ARB_shading_language_420pack, GLSL ES 3.0, and GLSL 4.20 + * specs add a clarification: + * + * "A void function can only use return without a return argument, even if + * the return argument has void type. Return statements only accept values: + * + * void func1() { } + * void func2() { return func1(); } // illegal return statement" + */ + _mesa_glsl_error(& loc, state, + "void functions can only use `return' without a " + "return argument"); + } + + inst = new(ctx) ir_return(ret); + } else { + if (state->current_function->return_type->base_type != + GLSL_TYPE_VOID) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, + "`return' with no value, in function %s returning " + "non-void", + state->current_function->function_name()); + } + inst = new(ctx) ir_return; + } + + state->found_return = true; + instructions->push_tail(inst); + break; + } + + case ast_discard: + if (state->stage != MESA_SHADER_FRAGMENT) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, + "`discard' may only appear in a fragment shader"); + } + instructions->push_tail(new(ctx) ir_discard); + break; + + case ast_break: + case ast_continue: + if (mode == ast_continue && + state->loop_nesting_ast == NULL) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "continue may only appear in a loop"); + } else if (mode == ast_break && + state->loop_nesting_ast == NULL && + state->switch_state.switch_nesting_ast == NULL) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, + "break may only appear in a loop or a switch"); + } else { + /* For a loop, inline the for loop expression again, since we don't + * know where near the end of the loop body the normal copy of it is + * going to be placed. Same goes for the condition for a do-while + * loop. + */ + if (state->loop_nesting_ast != NULL && + mode == ast_continue && !state->switch_state.is_switch_innermost) { + if (state->loop_nesting_ast->rest_expression) { + state->loop_nesting_ast->rest_expression->hir(instructions, + state); + } + if (state->loop_nesting_ast->mode == + ast_iteration_statement::ast_do_while) { + state->loop_nesting_ast->condition_to_hir(instructions, state); + } + } + + if (state->switch_state.is_switch_innermost && + mode == ast_continue) { + /* Set 'continue_inside' to true. */ + ir_rvalue *const true_val = new (ctx) ir_constant(true); + ir_dereference_variable *deref_continue_inside_var = + new(ctx) ir_dereference_variable(state->switch_state.continue_inside); + instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var, + true_val)); + + /* Break out from the switch, continue for the loop will + * be called right after switch. */ + ir_loop_jump *const jump = + new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + instructions->push_tail(jump); + + } else if (state->switch_state.is_switch_innermost && + mode == ast_break) { + /* Force break out of switch by inserting a break. */ + ir_loop_jump *const jump = + new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + instructions->push_tail(jump); + } else { + ir_loop_jump *const jump = + new(ctx) ir_loop_jump((mode == ast_break) + ? ir_loop_jump::jump_break + : ir_loop_jump::jump_continue); + instructions->push_tail(jump); + } + } + + break; + } + + /* Jump instructions do not have r-values. + */ + return NULL; +} + + +ir_rvalue * +ast_selection_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + ir_rvalue *const condition = this->condition->hir(instructions, state); + + /* From page 66 (page 72 of the PDF) of the GLSL 1.50 spec: + * + * "Any expression whose type evaluates to a Boolean can be used as the + * conditional expression bool-expression. Vector types are not accepted + * as the expression to if." + * + * The checks are separated so that higher quality diagnostics can be + * generated for cases where both rules are violated. + */ + if (!condition->type->is_boolean() || !condition->type->is_scalar()) { + YYLTYPE loc = this->condition->get_location(); + + _mesa_glsl_error(& loc, state, "if-statement condition must be scalar " + "boolean"); + } + + ir_if *const stmt = new(ctx) ir_if(condition); + + if (then_statement != NULL) { + state->symbols->push_scope(); + then_statement->hir(& stmt->then_instructions, state); + state->symbols->pop_scope(); + } + + if (else_statement != NULL) { + state->symbols->push_scope(); + else_statement->hir(& stmt->else_instructions, state); + state->symbols->pop_scope(); + } + + instructions->push_tail(stmt); + + /* if-statements do not have r-values. + */ + return NULL; +} + + +ir_rvalue * +ast_switch_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + ir_rvalue *const test_expression = + this->test_expression->hir(instructions, state); + + /* From page 66 (page 55 of the PDF) of the GLSL 1.50 spec: + * + * "The type of init-expression in a switch statement must be a + * scalar integer." + */ + if (!test_expression->type->is_scalar() || + !test_expression->type->is_integer()) { + YYLTYPE loc = this->test_expression->get_location(); + + _mesa_glsl_error(& loc, + state, + "switch-statement expression must be scalar " + "integer"); + } + + /* Track the switch-statement nesting in a stack-like manner. + */ + struct glsl_switch_state saved = state->switch_state; + + state->switch_state.is_switch_innermost = true; + state->switch_state.switch_nesting_ast = this; + state->switch_state.labels_ht = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + state->switch_state.previous_default = NULL; + + /* Initalize is_fallthru state to false. + */ + ir_rvalue *const is_fallthru_val = new (ctx) ir_constant(false); + state->switch_state.is_fallthru_var = + new(ctx) ir_variable(glsl_type::bool_type, + "switch_is_fallthru_tmp", + ir_var_temporary); + instructions->push_tail(state->switch_state.is_fallthru_var); + + ir_dereference_variable *deref_is_fallthru_var = + new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var); + instructions->push_tail(new(ctx) ir_assignment(deref_is_fallthru_var, + is_fallthru_val)); + + /* Initialize continue_inside state to false. + */ + state->switch_state.continue_inside = + new(ctx) ir_variable(glsl_type::bool_type, + "continue_inside_tmp", + ir_var_temporary); + instructions->push_tail(state->switch_state.continue_inside); + + ir_rvalue *const false_val = new (ctx) ir_constant(false); + ir_dereference_variable *deref_continue_inside_var = + new(ctx) ir_dereference_variable(state->switch_state.continue_inside); + instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var, + false_val)); + + state->switch_state.run_default = + new(ctx) ir_variable(glsl_type::bool_type, + "run_default_tmp", + ir_var_temporary); + instructions->push_tail(state->switch_state.run_default); + + /* Loop around the switch is used for flow control. */ + ir_loop * loop = new(ctx) ir_loop(); + instructions->push_tail(loop); + + /* Cache test expression. + */ + test_to_hir(&loop->body_instructions, state); + + /* Emit code for body of switch stmt. + */ + body->hir(&loop->body_instructions, state); + + /* Insert a break at the end to exit loop. */ + ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + loop->body_instructions.push_tail(jump); + + /* If we are inside loop, check if continue got called inside switch. */ + if (state->loop_nesting_ast != NULL) { + ir_dereference_variable *deref_continue_inside = + new(ctx) ir_dereference_variable(state->switch_state.continue_inside); + ir_if *irif = new(ctx) ir_if(deref_continue_inside); + ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_continue); + + if (state->loop_nesting_ast != NULL) { + if (state->loop_nesting_ast->rest_expression) { + state->loop_nesting_ast->rest_expression->hir(&irif->then_instructions, + state); + } + if (state->loop_nesting_ast->mode == + ast_iteration_statement::ast_do_while) { + state->loop_nesting_ast->condition_to_hir(&irif->then_instructions, state); + } + } + irif->then_instructions.push_tail(jump); + instructions->push_tail(irif); + } + + hash_table_dtor(state->switch_state.labels_ht); + + state->switch_state = saved; + + /* Switch statements do not have r-values. */ + return NULL; +} + + +void +ast_switch_statement::test_to_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + /* Cache value of test expression. */ + ir_rvalue *const test_val = + test_expression->hir(instructions, + state); + + state->switch_state.test_var = new(ctx) ir_variable(test_val->type, + "switch_test_tmp", + ir_var_temporary); + ir_dereference_variable *deref_test_var = + new(ctx) ir_dereference_variable(state->switch_state.test_var); + + instructions->push_tail(state->switch_state.test_var); + instructions->push_tail(new(ctx) ir_assignment(deref_test_var, test_val)); +} + + +ir_rvalue * +ast_switch_body::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + if (stmts != NULL) + stmts->hir(instructions, state); + + /* Switch bodies do not have r-values. */ + return NULL; +} + +ir_rvalue * +ast_case_statement_list::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + exec_list default_case, after_default, tmp; + + foreach_list_typed (ast_case_statement, case_stmt, link, & this->cases) { + case_stmt->hir(&tmp, state); + + /* Default case. */ + if (state->switch_state.previous_default && default_case.is_empty()) { + default_case.append_list(&tmp); + continue; + } + + /* If default case found, append 'after_default' list. */ + if (!default_case.is_empty()) + after_default.append_list(&tmp); + else + instructions->append_list(&tmp); + } + + /* Handle the default case. This is done here because default might not be + * the last case. We need to add checks against following cases first to see + * if default should be chosen or not. + */ + if (!default_case.is_empty()) { + + ir_rvalue *const true_val = new (state) ir_constant(true); + ir_dereference_variable *deref_run_default_var = + new(state) ir_dereference_variable(state->switch_state.run_default); + + /* Choose to run default case initially, following conditional + * assignments might change this. + */ + ir_assignment *const init_var = + new(state) ir_assignment(deref_run_default_var, true_val); + instructions->push_tail(init_var); + + /* Default case was the last one, no checks required. */ + if (after_default.is_empty()) { + instructions->append_list(&default_case); + return NULL; + } + + foreach_in_list(ir_instruction, ir, &after_default) { + ir_assignment *assign = ir->as_assignment(); + + if (!assign) + continue; + + /* Clone the check between case label and init expression. */ + ir_expression *exp = (ir_expression*) assign->condition; + ir_expression *clone = exp->clone(state, NULL); + + ir_dereference_variable *deref_var = + new(state) ir_dereference_variable(state->switch_state.run_default); + ir_rvalue *const false_val = new (state) ir_constant(false); + + ir_assignment *const set_false = + new(state) ir_assignment(deref_var, false_val, clone); + + instructions->push_tail(set_false); + } + + /* Append default case and all cases after it. */ + instructions->append_list(&default_case); + instructions->append_list(&after_default); + } + + /* Case statements do not have r-values. */ + return NULL; +} + +ir_rvalue * +ast_case_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + labels->hir(instructions, state); + + /* Guard case statements depending on fallthru state. */ + ir_dereference_variable *const deref_fallthru_guard = + new(state) ir_dereference_variable(state->switch_state.is_fallthru_var); + ir_if *const test_fallthru = new(state) ir_if(deref_fallthru_guard); + + foreach_list_typed (ast_node, stmt, link, & this->stmts) + stmt->hir(& test_fallthru->then_instructions, state); + + instructions->push_tail(test_fallthru); + + /* Case statements do not have r-values. */ + return NULL; +} + + +ir_rvalue * +ast_case_label_list::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + foreach_list_typed (ast_case_label, label, link, & this->labels) + label->hir(instructions, state); + + /* Case labels do not have r-values. */ + return NULL; +} + +ir_rvalue * +ast_case_label::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + ir_dereference_variable *deref_fallthru_var = + new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var); + + ir_rvalue *const true_val = new(ctx) ir_constant(true); + + /* If not default case, ... */ + if (this->test_value != NULL) { + /* Conditionally set fallthru state based on + * comparison of cached test expression value to case label. + */ + ir_rvalue *const label_rval = this->test_value->hir(instructions, state); + ir_constant *label_const = label_rval->constant_expression_value(); + + if (!label_const) { + YYLTYPE loc = this->test_value->get_location(); + + _mesa_glsl_error(& loc, state, + "switch statement case label must be a " + "constant expression"); + + /* Stuff a dummy value in to allow processing to continue. */ + label_const = new(ctx) ir_constant(0); + } else { + ast_expression *previous_label = (ast_expression *) + hash_table_find(state->switch_state.labels_ht, + (void *)(uintptr_t)label_const->value.u[0]); + + if (previous_label) { + YYLTYPE loc = this->test_value->get_location(); + _mesa_glsl_error(& loc, state, "duplicate case value"); + + loc = previous_label->get_location(); + _mesa_glsl_error(& loc, state, "this is the previous case label"); + } else { + hash_table_insert(state->switch_state.labels_ht, + this->test_value, + (void *)(uintptr_t)label_const->value.u[0]); + } + } + + ir_dereference_variable *deref_test_var = + new(ctx) ir_dereference_variable(state->switch_state.test_var); + + ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, + label_const, + deref_test_var); + + /* + * From GLSL 4.40 specification section 6.2 ("Selection"): + * + * "The type of the init-expression value in a switch statement must + * be a scalar int or uint. The type of the constant-expression value + * in a case label also must be a scalar int or uint. When any pair + * of these values is tested for "equal value" and the types do not + * match, an implicit conversion will be done to convert the int to a + * uint (see section 4.1.10 “Implicit Conversions”) before the compare + * is done." + */ + if (label_const->type != state->switch_state.test_var->type) { + YYLTYPE loc = this->test_value->get_location(); + + const glsl_type *type_a = label_const->type; + const glsl_type *type_b = state->switch_state.test_var->type; + + /* Check if int->uint implicit conversion is supported. */ + bool integer_conversion_supported = + glsl_type::int_type->can_implicitly_convert_to(glsl_type::uint_type, + state); + + if ((!type_a->is_integer() || !type_b->is_integer()) || + !integer_conversion_supported) { + _mesa_glsl_error(&loc, state, "type mismatch with switch " + "init-expression and case label (%s != %s)", + type_a->name, type_b->name); + } else { + /* Conversion of the case label. */ + if (type_a->base_type == GLSL_TYPE_INT) { + if (!apply_implicit_conversion(glsl_type::uint_type, + test_cond->operands[0], state)) + _mesa_glsl_error(&loc, state, "implicit type conversion error"); + } else { + /* Conversion of the init-expression value. */ + if (!apply_implicit_conversion(glsl_type::uint_type, + test_cond->operands[1], state)) + _mesa_glsl_error(&loc, state, "implicit type conversion error"); + } + } + } + + ir_assignment *set_fallthru_on_test = + new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); + + instructions->push_tail(set_fallthru_on_test); + } else { /* default case */ + if (state->switch_state.previous_default) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "multiple default labels in one switch"); + + loc = state->switch_state.previous_default->get_location(); + _mesa_glsl_error(& loc, state, "this is the first default label"); + } + state->switch_state.previous_default = this; + + /* Set fallthru condition on 'run_default' bool. */ + ir_dereference_variable *deref_run_default = + new(ctx) ir_dereference_variable(state->switch_state.run_default); + ir_rvalue *const cond_true = new(ctx) ir_constant(true); + ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, + cond_true, + deref_run_default); + + /* Set falltrhu state. */ + ir_assignment *set_fallthru = + new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); + + instructions->push_tail(set_fallthru); + } + + /* Case statements do not have r-values. */ + return NULL; +} + +void +ast_iteration_statement::condition_to_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + if (condition != NULL) { + ir_rvalue *const cond = + condition->hir(instructions, state); + + if ((cond == NULL) + || !cond->type->is_boolean() || !cond->type->is_scalar()) { + YYLTYPE loc = condition->get_location(); + + _mesa_glsl_error(& loc, state, + "loop condition must be scalar boolean"); + } else { + /* As the first code in the loop body, generate a block that looks + * like 'if (!condition) break;' as the loop termination condition. + */ + ir_rvalue *const not_cond = + new(ctx) ir_expression(ir_unop_logic_not, cond); + + ir_if *const if_stmt = new(ctx) ir_if(not_cond); + + ir_jump *const break_stmt = + new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(break_stmt); + instructions->push_tail(if_stmt); + } + } +} + + +ir_rvalue * +ast_iteration_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + /* For-loops and while-loops start a new scope, but do-while loops do not. + */ + if (mode != ast_do_while) + state->symbols->push_scope(); + + if (init_statement != NULL) + init_statement->hir(instructions, state); + + ir_loop *const stmt = new(ctx) ir_loop(); + instructions->push_tail(stmt); + + /* Track the current loop nesting. */ + ast_iteration_statement *nesting_ast = state->loop_nesting_ast; + + state->loop_nesting_ast = this; + + /* Likewise, indicate that following code is closest to a loop, + * NOT closest to a switch. + */ + bool saved_is_switch_innermost = state->switch_state.is_switch_innermost; + state->switch_state.is_switch_innermost = false; + + if (mode != ast_do_while) + condition_to_hir(&stmt->body_instructions, state); + + if (body != NULL) + body->hir(& stmt->body_instructions, state); + + if (rest_expression != NULL) + rest_expression->hir(& stmt->body_instructions, state); + + if (mode == ast_do_while) + condition_to_hir(&stmt->body_instructions, state); + + if (mode != ast_do_while) + state->symbols->pop_scope(); + + /* Restore previous nesting before returning. */ + state->loop_nesting_ast = nesting_ast; + state->switch_state.is_switch_innermost = saved_is_switch_innermost; + + /* Loops do not have r-values. + */ + return NULL; +} + + +/** + * Determine if the given type is valid for establishing a default precision + * qualifier. + * + * From GLSL ES 3.00 section 4.5.4 ("Default Precision Qualifiers"): + * + * "The precision statement + * + * precision precision-qualifier type; + * + * can be used to establish a default precision qualifier. The type field + * can be either int or float or any of the sampler types, and the + * precision-qualifier can be lowp, mediump, or highp." + * + * GLSL ES 1.00 has similar language. GLSL 1.30 doesn't allow precision + * qualifiers on sampler types, but this seems like an oversight (since the + * intention of including these in GLSL 1.30 is to allow compatibility with ES + * shaders). So we allow int, float, and all sampler types regardless of GLSL + * version. + */ +static bool +is_valid_default_precision_type(const struct glsl_type *const type) +{ + if (type == NULL) + return false; + + switch (type->base_type) { + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + /* "int" and "float" are valid, but vectors and matrices are not. */ + return type->vector_elements == 1 && type->matrix_columns == 1; + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + return true; + default: + return false; + } +} + + +ir_rvalue * +ast_type_specifier::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + if (this->default_precision == ast_precision_none && this->structure == NULL) + return NULL; + + YYLTYPE loc = this->get_location(); + + /* If this is a precision statement, check that the type to which it is + * applied is either float or int. + * + * From section 4.5.3 of the GLSL 1.30 spec: + * "The precision statement + * precision precision-qualifier type; + * can be used to establish a default precision qualifier. The type + * field can be either int or float [...]. Any other types or + * qualifiers will result in an error. + */ + if (this->default_precision != ast_precision_none) { + if (!state->check_precision_qualifiers_allowed(&loc)) + return NULL; + + if (this->structure != NULL) { + _mesa_glsl_error(&loc, state, + "precision qualifiers do not apply to structures"); + return NULL; + } + + if (this->array_specifier != NULL) { + _mesa_glsl_error(&loc, state, + "default precision statements do not apply to " + "arrays"); + return NULL; + } + + const struct glsl_type *const type = + state->symbols->get_type(this->type_name); + if (!is_valid_default_precision_type(type)) { + _mesa_glsl_error(&loc, state, + "default precision statements apply only to " + "float, int, and opaque types"); + return NULL; + } + + if (state->es_shader) { + /* Section 4.5.3 (Default Precision Qualifiers) of the GLSL ES 1.00 + * spec says: + * + * "Non-precision qualified declarations will use the precision + * qualifier specified in the most recent precision statement + * that is still in scope. The precision statement has the same + * scoping rules as variable declarations. If it is declared + * inside a compound statement, its effect stops at the end of + * the innermost statement it was declared in. Precision + * statements in nested scopes override precision statements in + * outer scopes. Multiple precision statements for the same basic + * type can appear inside the same scope, with later statements + * overriding earlier statements within that scope." + * + * Default precision specifications follow the same scope rules as + * variables. So, we can track the state of the default precision + * qualifiers in the symbol table, and the rules will just work. This + * is a slight abuse of the symbol table, but it has the semantics + * that we want. + */ + state->symbols->add_default_precision_qualifier(this->type_name, + this->default_precision); + } + + /* FINISHME: Translate precision statements into IR. */ + return NULL; + } + + /* _mesa_ast_set_aggregate_type() sets the <structure> field so that + * process_record_constructor() can do type-checking on C-style initializer + * expressions of structs, but ast_struct_specifier should only be translated + * to HIR if it is declaring the type of a structure. + * + * The ->is_declaration field is false for initializers of variables + * declared separately from the struct's type definition. + * + * struct S { ... }; (is_declaration = true) + * struct T { ... } t = { ... }; (is_declaration = true) + * S s = { ... }; (is_declaration = false) + */ + if (this->structure != NULL && this->structure->is_declaration) + return this->structure->hir(instructions, state); + + return NULL; +} + + +/** + * Process a structure or interface block tree into an array of structure fields + * + * After parsing, where there are some syntax differnces, structures and + * interface blocks are almost identical. They are similar enough that the + * AST for each can be processed the same way into a set of + * \c glsl_struct_field to describe the members. + * + * If we're processing an interface block, var_mode should be the type of the + * interface block (ir_var_shader_in, ir_var_shader_out, ir_var_uniform or + * ir_var_shader_storage). If we're processing a structure, var_mode should be + * ir_var_auto. + * + * \return + * The number of fields processed. A pointer to the array structure fields is + * stored in \c *fields_ret. + */ +static unsigned +ast_process_struct_or_iface_block_members(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + exec_list *declarations, + glsl_struct_field **fields_ret, + bool is_interface, + enum glsl_matrix_layout matrix_layout, + bool allow_reserved_names, + ir_variable_mode var_mode, + ast_type_qualifier *layout, + unsigned block_stream, + unsigned expl_location) +{ + unsigned decl_count = 0; + + /* Make an initial pass over the list of fields to determine how + * many there are. Each element in this list is an ast_declarator_list. + * This means that we actually need to count the number of elements in the + * 'declarations' list in each of the elements. + */ + foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { + decl_count += decl_list->declarations.length(); + } + + /* Allocate storage for the fields and process the field + * declarations. As the declarations are processed, try to also convert + * the types to HIR. This ensures that structure definitions embedded in + * other structure definitions or in interface blocks are processed. + */ + glsl_struct_field *const fields = ralloc_array(state, glsl_struct_field, + decl_count); + + bool first_member = true; + bool first_member_has_explicit_location; + + unsigned i = 0; + foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { + const char *type_name; + YYLTYPE loc = decl_list->get_location(); + + decl_list->type->specifier->hir(instructions, state); + + /* Section 10.9 of the GLSL ES 1.00 specification states that + * embedded structure definitions have been removed from the language. + */ + if (state->es_shader && decl_list->type->specifier->structure != NULL) { + _mesa_glsl_error(&loc, state, "embedded structure definitions are " + "not allowed in GLSL ES 1.00"); + } + + const glsl_type *decl_type = + decl_list->type->glsl_type(& type_name, state); + + const struct ast_type_qualifier *const qual = + &decl_list->type->qualifier; + + /* From section 4.3.9 of the GLSL 4.40 spec: + * + * "[In interface blocks] opaque types are not allowed." + * + * It should be impossible for decl_type to be NULL here. Cases that + * might naturally lead to decl_type being NULL, especially for the + * is_interface case, will have resulted in compilation having + * already halted due to a syntax error. + */ + assert(decl_type); + + if (is_interface && decl_type->contains_opaque()) { + _mesa_glsl_error(&loc, state, + "uniform/buffer in non-default interface block contains " + "opaque variable"); + } + + if (decl_type->contains_atomic()) { + /* From section 4.1.7.3 of the GLSL 4.40 spec: + * + * "Members of structures cannot be declared as atomic counter + * types." + */ + _mesa_glsl_error(&loc, state, "atomic counter in structure, " + "shader storage block or uniform block"); + } + + if (decl_type->contains_image()) { + /* FINISHME: Same problem as with atomic counters. + * FINISHME: Request clarification from Khronos and add + * FINISHME: spec quotation here. + */ + _mesa_glsl_error(&loc, state, + "image in structure, shader storage block or " + "uniform block"); + } + + if (qual->flags.q.explicit_binding) { + _mesa_glsl_error(&loc, state, + "binding layout qualifier cannot be applied " + "to struct or interface block members"); + } + + if (is_interface) { + if (!first_member) { + if (!layout->flags.q.explicit_location && + ((first_member_has_explicit_location && + !qual->flags.q.explicit_location) || + (!first_member_has_explicit_location && + qual->flags.q.explicit_location))) { + _mesa_glsl_error(&loc, state, + "when block-level location layout qualifier " + "is not supplied either all members must " + "have a location layout qualifier or all " + "members must not have a location layout " + "qualifier"); + } + } else { + first_member = false; + first_member_has_explicit_location = + qual->flags.q.explicit_location; + } + } + + if (qual->flags.q.std140 || + qual->flags.q.std430 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(&loc, state, + "uniform/shader storage block layout qualifiers " + "std140, std430, packed, and shared can only be " + "applied to uniform/shader storage blocks, not " + "members"); + } + + if (qual->flags.q.constant) { + _mesa_glsl_error(&loc, state, + "const storage qualifier cannot be applied " + "to struct or interface block members"); + } + + /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec: + * + * "A block member may be declared with a stream identifier, but + * the specified stream must match the stream associated with the + * containing block." + */ + if (qual->flags.q.explicit_stream) { + unsigned qual_stream; + if (process_qualifier_constant(state, &loc, "stream", + qual->stream, &qual_stream) && + qual_stream != block_stream) { + _mesa_glsl_error(&loc, state, "stream layout qualifier on " + "interface block member does not match " + "the interface block (%u vs %u)", qual_stream, + block_stream); + } + } + + if (qual->flags.q.uniform && qual->has_interpolation()) { + _mesa_glsl_error(&loc, state, + "interpolation qualifiers cannot be used " + "with uniform interface blocks"); + } + + if ((qual->flags.q.uniform || !is_interface) && + qual->has_auxiliary_storage()) { + _mesa_glsl_error(&loc, state, + "auxiliary storage qualifiers cannot be used " + "in uniform blocks or structures."); + } + + if (qual->flags.q.row_major || qual->flags.q.column_major) { + if (!qual->flags.q.uniform && !qual->flags.q.buffer) { + _mesa_glsl_error(&loc, state, + "row_major and column_major can only be " + "applied to interface blocks"); + } else + validate_matrix_layout_for_type(state, &loc, decl_type, NULL); + } + + if (qual->flags.q.read_only && qual->flags.q.write_only) { + _mesa_glsl_error(&loc, state, "buffer variable can't be both " + "readonly and writeonly."); + } + + foreach_list_typed (ast_declaration, decl, link, + &decl_list->declarations) { + YYLTYPE loc = decl->get_location(); + + if (!allow_reserved_names) + validate_identifier(decl->identifier, loc, state); + + const struct glsl_type *field_type = + process_array_type(&loc, decl_type, decl->array_specifier, state); + validate_array_dimensions(field_type, state, &loc); + fields[i].type = field_type; + fields[i].name = decl->identifier; + fields[i].interpolation = + interpret_interpolation_qualifier(qual, var_mode, state, &loc); + fields[i].centroid = qual->flags.q.centroid ? 1 : 0; + fields[i].sample = qual->flags.q.sample ? 1 : 0; + fields[i].patch = qual->flags.q.patch ? 1 : 0; + fields[i].precision = qual->precision; + + if (qual->flags.q.explicit_location) { + unsigned qual_location; + if (process_qualifier_constant(state, &loc, "location", + qual->location, &qual_location)) { + fields[i].location = VARYING_SLOT_VAR0 + qual_location; + expl_location = fields[i].location + + fields[i].type->count_attribute_slots(false); + } + } else { + if (layout && layout->flags.q.explicit_location) { + fields[i].location = expl_location; + expl_location += fields[i].type->count_attribute_slots(false); + } else { + fields[i].location = -1; + } + } + + /* Propogate row- / column-major information down the fields of the + * structure or interface block. Structures need this data because + * the structure may contain a structure that contains ... a matrix + * that need the proper layout. + */ + if (field_type->without_array()->is_matrix() + || field_type->without_array()->is_record()) { + /* If no layout is specified for the field, inherit the layout + * from the block. + */ + fields[i].matrix_layout = matrix_layout; + + if (qual->flags.q.row_major) + fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR; + else if (qual->flags.q.column_major) + fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR; + + /* If we're processing an interface block, the matrix layout must + * be decided by this point. + */ + assert(!is_interface + || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR + || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR); + } + + /* Image qualifiers are allowed on buffer variables, which can only + * be defined inside shader storage buffer objects + */ + if (layout && var_mode == ir_var_shader_storage) { + /* For readonly and writeonly qualifiers the field definition, + * if set, overwrites the layout qualifier. + */ + if (qual->flags.q.read_only) { + fields[i].image_read_only = true; + fields[i].image_write_only = false; + } else if (qual->flags.q.write_only) { + fields[i].image_read_only = false; + fields[i].image_write_only = true; + } else { + fields[i].image_read_only = layout->flags.q.read_only; + fields[i].image_write_only = layout->flags.q.write_only; + } + + /* For other qualifiers, we set the flag if either the layout + * qualifier or the field qualifier are set + */ + fields[i].image_coherent = qual->flags.q.coherent || + layout->flags.q.coherent; + fields[i].image_volatile = qual->flags.q._volatile || + layout->flags.q._volatile; + fields[i].image_restrict = qual->flags.q.restrict_flag || + layout->flags.q.restrict_flag; + } + + i++; + } + } + + assert(i == decl_count); + + *fields_ret = fields; + return decl_count; +} + + +ir_rvalue * +ast_struct_specifier::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + /* Section 4.1.8 (Structures) of the GLSL 1.10 spec says: + * + * "Anonymous structures are not supported; so embedded structures must + * have a declarator. A name given to an embedded struct is scoped at + * the same level as the struct it is embedded in." + * + * The same section of the GLSL 1.20 spec says: + * + * "Anonymous structures are not supported. Embedded structures are not + * supported. + * + * struct S { float f; }; + * struct T { + * S; // Error: anonymous structures disallowed + * struct { ... }; // Error: embedded structures disallowed + * S s; // Okay: nested structures with name are allowed + * };" + * + * The GLSL ES 1.00 and 3.00 specs have similar langauge and examples. So, + * we allow embedded structures in 1.10 only. + */ + if (state->language_version != 110 && state->struct_specifier_depth != 0) + _mesa_glsl_error(&loc, state, + "embedded structure declarations are not allowed"); + + state->struct_specifier_depth++; + + unsigned expl_location = 0; + if (layout && layout->flags.q.explicit_location) { + if (!process_qualifier_constant(state, &loc, "location", + layout->location, &expl_location)) { + return NULL; + } else { + expl_location = VARYING_SLOT_VAR0 + expl_location; + } + } + + glsl_struct_field *fields; + unsigned decl_count = + ast_process_struct_or_iface_block_members(instructions, + state, + &this->declarations, + &fields, + false, + GLSL_MATRIX_LAYOUT_INHERITED, + false /* allow_reserved_names */, + ir_var_auto, + layout, + 0, /* for interface only */ + expl_location); + + validate_identifier(this->name, loc, state); + + const glsl_type *t = + glsl_type::get_record_instance(fields, decl_count, this->name); + + if (!state->symbols->add_type(name, t)) { + _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name); + } else { + const glsl_type **s = reralloc(state, state->user_structures, + const glsl_type *, + state->num_user_structures + 1); + if (s != NULL) { + s[state->num_user_structures] = t; + state->user_structures = s; + state->num_user_structures++; + } + } + + state->struct_specifier_depth--; + + /* Structure type definitions do not have r-values. + */ + return NULL; +} + + +/** + * Visitor class which detects whether a given interface block has been used. + */ +class interface_block_usage_visitor : public ir_hierarchical_visitor +{ +public: + interface_block_usage_visitor(ir_variable_mode mode, const glsl_type *block) + : mode(mode), block(block), found(false) + { + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (ir->var->data.mode == mode && ir->var->get_interface_type() == block) { + found = true; + return visit_stop; + } + return visit_continue; + } + + bool usage_found() const + { + return this->found; + } + +private: + ir_variable_mode mode; + const glsl_type *block; + bool found; +}; + +static bool +is_unsized_array_last_element(ir_variable *v) +{ + const glsl_type *interface_type = v->get_interface_type(); + int length = interface_type->length; + + assert(v->type->is_unsized_array()); + + /* Check if it is the last element of the interface */ + if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0) + return true; + return false; +} + +ir_rvalue * +ast_interface_block::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + /* Interface blocks must be declared at global scope */ + if (state->current_function != NULL) { + _mesa_glsl_error(&loc, state, + "Interface block `%s' must be declared " + "at global scope", + this->block_name); + } + + if (!this->layout.flags.q.buffer && + this->layout.flags.q.std430) { + _mesa_glsl_error(&loc, state, + "std430 storage block layout qualifier is supported " + "only for shader storage blocks"); + } + + /* The ast_interface_block has a list of ast_declarator_lists. We + * need to turn those into ir_variables with an association + * with this uniform block. + */ + enum glsl_interface_packing packing; + if (this->layout.flags.q.shared) { + packing = GLSL_INTERFACE_PACKING_SHARED; + } else if (this->layout.flags.q.packed) { + packing = GLSL_INTERFACE_PACKING_PACKED; + } else if (this->layout.flags.q.std430) { + packing = GLSL_INTERFACE_PACKING_STD430; + } else { + /* The default layout is std140. + */ + packing = GLSL_INTERFACE_PACKING_STD140; + } + + ir_variable_mode var_mode; + const char *iface_type_name; + if (this->layout.flags.q.in) { + var_mode = ir_var_shader_in; + iface_type_name = "in"; + } else if (this->layout.flags.q.out) { + var_mode = ir_var_shader_out; + iface_type_name = "out"; + } else if (this->layout.flags.q.uniform) { + var_mode = ir_var_uniform; + iface_type_name = "uniform"; + } else if (this->layout.flags.q.buffer) { + var_mode = ir_var_shader_storage; + iface_type_name = "buffer"; + } else { + var_mode = ir_var_auto; + iface_type_name = "UNKNOWN"; + assert(!"interface block layout qualifier not found!"); + } + + enum glsl_matrix_layout matrix_layout = GLSL_MATRIX_LAYOUT_INHERITED; + if (this->layout.flags.q.row_major) + matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR; + else if (this->layout.flags.q.column_major) + matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR; + + bool redeclaring_per_vertex = strcmp(this->block_name, "gl_PerVertex") == 0; + exec_list declared_variables; + glsl_struct_field *fields; + + /* Treat an interface block as one level of nesting, so that embedded struct + * specifiers will be disallowed. + */ + state->struct_specifier_depth++; + + /* For blocks that accept memory qualifiers (i.e. shader storage), verify + * that we don't have incompatible qualifiers + */ + if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) { + _mesa_glsl_error(&loc, state, + "Interface block sets both readonly and writeonly"); + } + + unsigned qual_stream; + if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream, + &qual_stream) || + !validate_stream_qualifier(&loc, state, qual_stream)) { + /* If the stream qualifier is invalid it doesn't make sense to continue + * on and try to compare stream layouts on member variables against it + * so just return early. + */ + return NULL; + } + + unsigned expl_location = 0; + if (layout.flags.q.explicit_location) { + if (!process_qualifier_constant(state, &loc, "location", + layout.location, &expl_location)) { + return NULL; + } else { + expl_location = VARYING_SLOT_VAR0 + expl_location; + } + } + + unsigned int num_variables = + ast_process_struct_or_iface_block_members(&declared_variables, + state, + &this->declarations, + &fields, + true, + matrix_layout, + redeclaring_per_vertex, + var_mode, + &this->layout, + qual_stream, + expl_location); + + state->struct_specifier_depth--; + + if (!redeclaring_per_vertex) { + validate_identifier(this->block_name, loc, state); + + /* From section 4.3.9 ("Interface Blocks") of the GLSL 4.50 spec: + * + * "Block names have no other use within a shader beyond interface + * matching; it is a compile-time error to use a block name at global + * scope for anything other than as a block name." + */ + ir_variable *var = state->symbols->get_variable(this->block_name); + if (var && !var->type->is_interface()) { + _mesa_glsl_error(&loc, state, "Block name `%s' is " + "already used in the scope.", + this->block_name); + } + } + + const glsl_type *earlier_per_vertex = NULL; + if (redeclaring_per_vertex) { + /* Find the previous declaration of gl_PerVertex. If we're redeclaring + * the named interface block gl_in, we can find it by looking at the + * previous declaration of gl_in. Otherwise we can find it by looking + * at the previous decalartion of any of the built-in outputs, + * e.g. gl_Position. + * + * Also check that the instance name and array-ness of the redeclaration + * are correct. + */ + switch (var_mode) { + case ir_var_shader_in: + if (ir_variable *earlier_gl_in = + state->symbols->get_variable("gl_in")) { + earlier_per_vertex = earlier_gl_in->get_interface_type(); + } else { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex input not allowed " + "in the %s shader", + _mesa_shader_stage_to_string(state->stage)); + } + if (this->instance_name == NULL || + strcmp(this->instance_name, "gl_in") != 0 || this->array_specifier == NULL || + !this->array_specifier->is_single_dimension()) { + _mesa_glsl_error(&loc, state, + "gl_PerVertex input must be redeclared as " + "gl_in[]"); + } + break; + case ir_var_shader_out: + if (ir_variable *earlier_gl_Position = + state->symbols->get_variable("gl_Position")) { + earlier_per_vertex = earlier_gl_Position->get_interface_type(); + } else if (ir_variable *earlier_gl_out = + state->symbols->get_variable("gl_out")) { + earlier_per_vertex = earlier_gl_out->get_interface_type(); + } else { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex output not " + "allowed in the %s shader", + _mesa_shader_stage_to_string(state->stage)); + } + if (state->stage == MESA_SHADER_TESS_CTRL) { + if (this->instance_name == NULL || + strcmp(this->instance_name, "gl_out") != 0 || this->array_specifier == NULL) { + _mesa_glsl_error(&loc, state, + "gl_PerVertex output must be redeclared as " + "gl_out[]"); + } + } else { + if (this->instance_name != NULL) { + _mesa_glsl_error(&loc, state, + "gl_PerVertex output may not be redeclared with " + "an instance name"); + } + } + break; + default: + _mesa_glsl_error(&loc, state, + "gl_PerVertex must be declared as an input or an " + "output"); + break; + } + + if (earlier_per_vertex == NULL) { + /* An error has already been reported. Bail out to avoid null + * dereferences later in this function. + */ + return NULL; + } + + /* Copy locations from the old gl_PerVertex interface block. */ + for (unsigned i = 0; i < num_variables; i++) { + int j = earlier_per_vertex->field_index(fields[i].name); + if (j == -1) { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex must be a subset " + "of the built-in members of gl_PerVertex"); + } else { + fields[i].location = + earlier_per_vertex->fields.structure[j].location; + fields[i].interpolation = + earlier_per_vertex->fields.structure[j].interpolation; + fields[i].centroid = + earlier_per_vertex->fields.structure[j].centroid; + fields[i].sample = + earlier_per_vertex->fields.structure[j].sample; + fields[i].patch = + earlier_per_vertex->fields.structure[j].patch; + fields[i].precision = + earlier_per_vertex->fields.structure[j].precision; + } + } + + /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10 + * spec: + * + * If a built-in interface block is redeclared, it must appear in + * the shader before any use of any member included in the built-in + * declaration, or a compilation error will result. + * + * This appears to be a clarification to the behaviour established for + * gl_PerVertex by GLSL 1.50, therefore we implement this behaviour + * regardless of GLSL version. + */ + interface_block_usage_visitor v(var_mode, earlier_per_vertex); + v.run(instructions); + if (v.usage_found()) { + _mesa_glsl_error(&loc, state, + "redeclaration of a built-in interface block must " + "appear before any use of any member of the " + "interface block"); + } + } + + const glsl_type *block_type = + glsl_type::get_interface_instance(fields, + num_variables, + packing, + this->block_name); + + if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, "interface block `%s' with type `%s' " + "already taken in the current scope", + this->block_name, iface_type_name); + } + + /* Since interface blocks cannot contain statements, it should be + * impossible for the block to generate any instructions. + */ + assert(declared_variables.is_empty()); + + /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec: + * + * Geometry shader input variables get the per-vertex values written + * out by vertex shader output variables of the same names. Since a + * geometry shader operates on a set of vertices, each input varying + * variable (or input block, see interface blocks below) needs to be + * declared as an array. + */ + if (state->stage == MESA_SHADER_GEOMETRY && this->array_specifier == NULL && + var_mode == ir_var_shader_in) { + _mesa_glsl_error(&loc, state, "geometry shader inputs must be arrays"); + } else if ((state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) && + this->array_specifier == NULL && + var_mode == ir_var_shader_in) { + _mesa_glsl_error(&loc, state, "per-vertex tessellation shader inputs must be arrays"); + } else if (state->stage == MESA_SHADER_TESS_CTRL && + this->array_specifier == NULL && + var_mode == ir_var_shader_out) { + _mesa_glsl_error(&loc, state, "tessellation control shader outputs must be arrays"); + } + + + /* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec + * says: + * + * "If an instance name (instance-name) is used, then it puts all the + * members inside a scope within its own name space, accessed with the + * field selector ( . ) operator (analogously to structures)." + */ + if (this->instance_name) { + if (redeclaring_per_vertex) { + /* When a built-in in an unnamed interface block is redeclared, + * get_variable_being_redeclared() calls + * check_builtin_array_max_size() to make sure that built-in array + * variables aren't redeclared to illegal sizes. But we're looking + * at a redeclaration of a named built-in interface block. So we + * have to manually call check_builtin_array_max_size() for all parts + * of the interface that are arrays. + */ + for (unsigned i = 0; i < num_variables; i++) { + if (fields[i].type->is_array()) { + const unsigned size = fields[i].type->array_size(); + check_builtin_array_max_size(fields[i].name, size, loc, state); + } + } + } else { + validate_identifier(this->instance_name, loc, state); + } + + ir_variable *var; + + if (this->array_specifier != NULL) { + const glsl_type *block_array_type = + process_array_type(&loc, block_type, this->array_specifier, state); + + /* Section 4.3.7 (Interface Blocks) of the GLSL 1.50 spec says: + * + * For uniform blocks declared an array, each individual array + * element corresponds to a separate buffer object backing one + * instance of the block. As the array size indicates the number + * of buffer objects needed, uniform block array declarations + * must specify an array size. + * + * And a few paragraphs later: + * + * Geometry shader input blocks must be declared as arrays and + * follow the array declaration and linking rules for all + * geometry shader inputs. All other input and output block + * arrays must specify an array size. + * + * The same applies to tessellation shaders. + * + * The upshot of this is that the only circumstance where an + * interface array size *doesn't* need to be specified is on a + * geometry shader input, tessellation control shader input, + * tessellation control shader output, and tessellation evaluation + * shader input. + */ + if (block_array_type->is_unsized_array()) { + bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY || + state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL; + bool allow_outputs = state->stage == MESA_SHADER_TESS_CTRL; + + if (this->layout.flags.q.in) { + if (!allow_inputs) + _mesa_glsl_error(&loc, state, + "unsized input block arrays not allowed in " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } else if (this->layout.flags.q.out) { + if (!allow_outputs) + _mesa_glsl_error(&loc, state, + "unsized output block arrays not allowed in " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } else { + /* by elimination, this is a uniform block array */ + _mesa_glsl_error(&loc, state, + "unsized uniform block arrays not allowed in " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } + } + + /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec: + * + * * Arrays of arrays of blocks are not allowed + */ + if (state->es_shader && block_array_type->is_array() && + block_array_type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "arrays of arrays interface blocks are " + "not allowed"); + } + + var = new(state) ir_variable(block_array_type, + this->instance_name, + var_mode); + } else { + var = new(state) ir_variable(block_type, + this->instance_name, + var_mode); + } + + var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED + ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout; + + if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) + var->data.read_only = true; + + if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in) + handle_geometry_shader_input_decl(state, loc, var); + else if ((state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) && var_mode == ir_var_shader_in) + handle_tess_shader_input_decl(state, loc, var); + else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out) + handle_tess_ctrl_shader_output_decl(state, loc, var); + + for (unsigned i = 0; i < num_variables; i++) { + if (fields[i].type->is_unsized_array()) { + if (var_mode == ir_var_shader_storage) { + if (i != (num_variables - 1)) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + fields[i].name); + } + } else { + /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays": + * + * "If an array is declared as the last member of a shader storage + * block and the size is not specified at compile-time, it is + * sized at run-time. In all other cases, arrays are sized only + * at compile-time." + */ + if (state->es_shader) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + fields[i].name); + } + } + } + } + + if (ir_variable *earlier = + state->symbols->get_variable(this->instance_name)) { + if (!redeclaring_per_vertex) { + _mesa_glsl_error(&loc, state, "`%s' redeclared", + this->instance_name); + } + earlier->data.how_declared = ir_var_declared_normally; + earlier->type = var->type; + earlier->reinit_interface_type(block_type); + delete var; + } else { + if (this->layout.flags.q.explicit_binding) { + apply_explicit_binding(state, &loc, var, var->type, + &this->layout); + } + + var->data.stream = qual_stream; + if (layout.flags.q.explicit_location) { + var->data.location = expl_location; + var->data.explicit_location = true; + } + + state->symbols->add_variable(var); + instructions->push_tail(var); + } + } else { + /* In order to have an array size, the block must also be declared with + * an instance name. + */ + assert(this->array_specifier == NULL); + + for (unsigned i = 0; i < num_variables; i++) { + ir_variable *var = + new(state) ir_variable(fields[i].type, + ralloc_strdup(state, fields[i].name), + var_mode); + var->data.interpolation = fields[i].interpolation; + var->data.centroid = fields[i].centroid; + var->data.sample = fields[i].sample; + var->data.patch = fields[i].patch; + var->data.stream = qual_stream; + var->data.location = fields[i].location; + if (fields[i].location != -1) + var->data.explicit_location = true; + var->init_interface_type(block_type); + + if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) + var->data.read_only = true; + + /* Precision qualifiers do not have any meaning in Desktop GLSL */ + if (state->es_shader) { + var->data.precision = + select_gles_precision(fields[i].precision, fields[i].type, + state, &loc); + } + + if (fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED) { + var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED + ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout; + } else { + var->data.matrix_layout = fields[i].matrix_layout; + } + + if (var->data.mode == ir_var_shader_storage) { + var->data.image_read_only = fields[i].image_read_only; + var->data.image_write_only = fields[i].image_write_only; + var->data.image_coherent = fields[i].image_coherent; + var->data.image_volatile = fields[i].image_volatile; + var->data.image_restrict = fields[i].image_restrict; + } + + /* Examine var name here since var may get deleted in the next call */ + bool var_is_gl_id = is_gl_identifier(var->name); + + if (redeclaring_per_vertex) { + ir_variable *earlier = + get_variable_being_redeclared(var, loc, state, + true /* allow_all_redeclarations */); + if (!var_is_gl_id || earlier == NULL) { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex can only " + "include built-in variables"); + } else if (earlier->data.how_declared == ir_var_declared_normally) { + _mesa_glsl_error(&loc, state, + "`%s' has already been redeclared", + earlier->name); + } else { + earlier->data.how_declared = ir_var_declared_in_block; + earlier->reinit_interface_type(block_type); + } + continue; + } + + if (state->symbols->get_variable(var->name) != NULL) + _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name); + + /* Propagate the "binding" keyword into this UBO/SSBO's fields. + * The UBO declaration itself doesn't get an ir_variable unless it + * has an instance name. This is ugly. + */ + if (this->layout.flags.q.explicit_binding) { + apply_explicit_binding(state, &loc, var, + var->get_interface_type(), &this->layout); + } + + if (var->type->is_unsized_array()) { + if (var->is_in_shader_storage_block()) { + if (!is_unsized_array_last_element(var)) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + var->name); + } + var->data.from_ssbo_unsized_array = true; + } else { + /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays": + * + * "If an array is declared as the last member of a shader storage + * block and the size is not specified at compile-time, it is + * sized at run-time. In all other cases, arrays are sized only + * at compile-time." + */ + if (state->es_shader) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + var->name); + } + } + } + + state->symbols->add_variable(var); + instructions->push_tail(var); + } + + if (redeclaring_per_vertex && block_type != earlier_per_vertex) { + /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10 spec: + * + * It is also a compilation error ... to redeclare a built-in + * block and then use a member from that built-in block that was + * not included in the redeclaration. + * + * This appears to be a clarification to the behaviour established + * for gl_PerVertex by GLSL 1.50, therefore we implement this + * behaviour regardless of GLSL version. + * + * To prevent the shader from using a member that was not included in + * the redeclaration, we disable any ir_variables that are still + * associated with the old declaration of gl_PerVertex (since we've + * already updated all of the variables contained in the new + * gl_PerVertex to point to it). + * + * As a side effect this will prevent + * validate_intrastage_interface_blocks() from getting confused and + * thinking there are conflicting definitions of gl_PerVertex in the + * shader. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *const var = node->as_variable(); + if (var != NULL && + var->get_interface_type() == earlier_per_vertex && + var->data.mode == var_mode) { + if (var->data.how_declared == ir_var_declared_normally) { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex cannot " + "follow a redeclaration of `%s'", + var->name); + } + state->symbols->disable_variable(var->name); + var->remove(); + } + } + } + } + + return NULL; +} + + +ir_rvalue * +ast_tcs_output_layout::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + unsigned num_vertices; + if (!state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", &num_vertices, + false)) { + /* return here to stop cascading incorrect error messages */ + return NULL; + } + + /* If any shader outputs occurred before this declaration and specified an + * array size, make sure the size they specified is consistent with the + * primitive type. + */ + if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) { + _mesa_glsl_error(&loc, state, + "this tessellation control shader output layout " + "specifies %u vertices, but a previous output " + "is declared with size %u", + num_vertices, state->tcs_output_size); + return NULL; + } + + state->tcs_output_vertices_specified = true; + + /* If any shader outputs occurred before this declaration and did not + * specify an array size, their size is determined now. + */ + foreach_in_list (ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + if (var == NULL || var->data.mode != ir_var_shader_out) + continue; + + /* Note: Not all tessellation control shader output are arrays. */ + if (!var->type->is_unsized_array() || var->data.patch) + continue; + + if (var->data.max_array_access >= num_vertices) { + _mesa_glsl_error(&loc, state, + "this tessellation control shader output layout " + "specifies %u vertices, but an access to element " + "%u of output `%s' already exists", num_vertices, + var->data.max_array_access, var->name); + } else { + var->type = glsl_type::get_array_instance(var->type->fields.array, + num_vertices); + } + } + + return NULL; +} + + +ir_rvalue * +ast_gs_input_layout::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + /* If any geometry input layout declaration preceded this one, make sure it + * was consistent with this one. + */ + if (state->gs_input_prim_type_specified && + state->in_qualifier->prim_type != this->prim_type) { + _mesa_glsl_error(&loc, state, + "geometry shader input layout does not match" + " previous declaration"); + return NULL; + } + + /* If any shader inputs occurred before this declaration and specified an + * array size, make sure the size they specified is consistent with the + * primitive type. + */ + unsigned num_vertices = vertices_per_prim(this->prim_type); + if (state->gs_input_size != 0 && state->gs_input_size != num_vertices) { + _mesa_glsl_error(&loc, state, + "this geometry shader input layout implies %u vertices" + " per primitive, but a previous input is declared" + " with size %u", num_vertices, state->gs_input_size); + return NULL; + } + + state->gs_input_prim_type_specified = true; + + /* If any shader inputs occurred before this declaration and did not + * specify an array size, their size is determined now. + */ + foreach_in_list(ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + if (var == NULL || var->data.mode != ir_var_shader_in) + continue; + + /* Note: gl_PrimitiveIDIn has mode ir_var_shader_in, but it's not an + * array; skip it. + */ + + if (var->type->is_unsized_array()) { + if (var->data.max_array_access >= num_vertices) { + _mesa_glsl_error(&loc, state, + "this geometry shader input layout implies %u" + " vertices, but an access to element %u of input" + " `%s' already exists", num_vertices, + var->data.max_array_access, var->name); + } else { + var->type = glsl_type::get_array_instance(var->type->fields.array, + num_vertices); + } + } + } + + return NULL; +} + + +ir_rvalue * +ast_cs_input_layout::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + /* From the ARB_compute_shader specification: + * + * If the local size of the shader in any dimension is greater + * than the maximum size supported by the implementation for that + * dimension, a compile-time error results. + * + * It is not clear from the spec how the error should be reported if + * the total size of the work group exceeds + * MAX_COMPUTE_WORK_GROUP_INVOCATIONS, but it seems reasonable to + * report it at compile time as well. + */ + GLuint64 total_invocations = 1; + unsigned qual_local_size[3]; + for (int i = 0; i < 3; i++) { + + char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c", + 'x' + i); + /* Infer a local_size of 1 for unspecified dimensions */ + if (this->local_size[i] == NULL) { + qual_local_size[i] = 1; + } else if (!this->local_size[i]-> + process_qualifier_constant(state, local_size_str, + &qual_local_size[i], false)) { + ralloc_free(local_size_str); + return NULL; + } + ralloc_free(local_size_str); + + if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) { + _mesa_glsl_error(&loc, state, + "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE" + " (%d)", 'x' + i, + state->ctx->Const.MaxComputeWorkGroupSize[i]); + break; + } + total_invocations *= qual_local_size[i]; + if (total_invocations > + state->ctx->Const.MaxComputeWorkGroupInvocations) { + _mesa_glsl_error(&loc, state, + "product of local_sizes exceeds " + "MAX_COMPUTE_WORK_GROUP_INVOCATIONS (%d)", + state->ctx->Const.MaxComputeWorkGroupInvocations); + break; + } + } + + /* If any compute input layout declaration preceded this one, make sure it + * was consistent with this one. + */ + if (state->cs_input_local_size_specified) { + for (int i = 0; i < 3; i++) { + if (state->cs_input_local_size[i] != qual_local_size[i]) { + _mesa_glsl_error(&loc, state, + "compute shader input layout does not match" + " previous declaration"); + return NULL; + } + } + } + + state->cs_input_local_size_specified = true; + for (int i = 0; i < 3; i++) + state->cs_input_local_size[i] = qual_local_size[i]; + + /* We may now declare the built-in constant gl_WorkGroupSize (see + * builtin_variable_generator::generate_constants() for why we didn't + * declare it earlier). + */ + ir_variable *var = new(state->symbols) + ir_variable(glsl_type::uvec3_type, "gl_WorkGroupSize", ir_var_auto); + var->data.how_declared = ir_var_declared_implicitly; + var->data.read_only = true; + instructions->push_tail(var); + state->symbols->add_variable(var); + ir_constant_data data; + memset(&data, 0, sizeof(data)); + for (int i = 0; i < 3; i++) + data.u[i] = qual_local_size[i]; + var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data); + var->constant_initializer = + new(var) ir_constant(glsl_type::uvec3_type, &data); + var->data.has_initializer = true; + + return NULL; +} + + +static void +detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, + exec_list *instructions) +{ + bool gl_FragColor_assigned = false; + bool gl_FragData_assigned = false; + bool gl_FragSecondaryColor_assigned = false; + bool gl_FragSecondaryData_assigned = false; + bool user_defined_fs_output_assigned = false; + ir_variable *user_defined_fs_output = NULL; + + /* It would be nice to have proper location information. */ + YYLTYPE loc; + memset(&loc, 0, sizeof(loc)); + + foreach_in_list(ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + + if (!var || !var->data.assigned) + continue; + + if (strcmp(var->name, "gl_FragColor") == 0) + gl_FragColor_assigned = true; + else if (strcmp(var->name, "gl_FragData") == 0) + gl_FragData_assigned = true; + else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0) + gl_FragSecondaryColor_assigned = true; + else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0) + gl_FragSecondaryData_assigned = true; + else if (!is_gl_identifier(var->name)) { + if (state->stage == MESA_SHADER_FRAGMENT && + var->data.mode == ir_var_shader_out) { + user_defined_fs_output_assigned = true; + user_defined_fs_output = var; + } + } + } + + /* From the GLSL 1.30 spec: + * + * "If a shader statically assigns a value to gl_FragColor, it + * may not assign a value to any element of gl_FragData. If a + * shader statically writes a value to any element of + * gl_FragData, it may not assign a value to + * gl_FragColor. That is, a shader may assign values to either + * gl_FragColor or gl_FragData, but not both. Multiple shaders + * linked together must also consistently write just one of + * these variables. Similarly, if user declared output + * variables are in use (statically assigned to), then the + * built-in variables gl_FragColor and gl_FragData may not be + * assigned to. These incorrect usages all generate compile + * time errors." + */ + if (gl_FragColor_assigned && gl_FragData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and `gl_FragData'"); + } else if (gl_FragColor_assigned && user_defined_fs_output_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and `%s'", + user_defined_fs_output->name); + } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragSecondaryColorEXT' and" + " `gl_FragSecondaryDataEXT'"); + } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and" + " `gl_FragSecondaryDataEXT'"); + } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragData' and" + " `gl_FragSecondaryColorEXT'"); + } else if (gl_FragData_assigned && user_defined_fs_output_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragData' and `%s'", + user_defined_fs_output->name); + } + + if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) && + !state->EXT_blend_func_extended_enable) { + _mesa_glsl_error(&loc, state, + "Dual source blending requires EXT_blend_func_extended"); + } +} + + +static void +remove_per_vertex_blocks(exec_list *instructions, + _mesa_glsl_parse_state *state, ir_variable_mode mode) +{ + /* Find the gl_PerVertex interface block of the appropriate (in/out) mode, + * if it exists in this shader type. + */ + const glsl_type *per_vertex = NULL; + switch (mode) { + case ir_var_shader_in: + if (ir_variable *gl_in = state->symbols->get_variable("gl_in")) + per_vertex = gl_in->get_interface_type(); + break; + case ir_var_shader_out: + if (ir_variable *gl_Position = + state->symbols->get_variable("gl_Position")) { + per_vertex = gl_Position->get_interface_type(); + } + break; + default: + assert(!"Unexpected mode"); + break; + } + + /* If we didn't find a built-in gl_PerVertex interface block, then we don't + * need to do anything. + */ + if (per_vertex == NULL) + return; + + /* If the interface block is used by the shader, then we don't need to do + * anything. + */ + interface_block_usage_visitor v(mode, per_vertex); + v.run(instructions); + if (v.usage_found()) + return; + + /* Remove any ir_variable declarations that refer to the interface block + * we're removing. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *const var = node->as_variable(); + if (var != NULL && var->get_interface_type() == per_vertex && + var->data.mode == mode) { + state->symbols->disable_variable(var->name); + var->remove(); + } + } +} diff --git a/src/compiler/glsl/ast_type.cpp b/src/compiler/glsl/ast_type.cpp new file mode 100644 index 0000000..e0e3311 --- /dev/null +++ b/src/compiler/glsl/ast_type.cpp @@ -0,0 +1,548 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ast.h" + +void +ast_type_specifier::print(void) const +{ + if (structure) { + structure->print(); + } else { + printf("%s ", type_name); + } + + if (array_specifier) { + array_specifier->print(); + } +} + +bool +ast_fully_specified_type::has_qualifiers(_mesa_glsl_parse_state *state) const +{ + /* 'subroutine' isnt a real qualifier. */ + ast_type_qualifier subroutine_only; + subroutine_only.flags.i = 0; + subroutine_only.flags.q.subroutine = 1; + subroutine_only.flags.q.subroutine_def = 1; + if (state->has_explicit_uniform_location()) { + subroutine_only.flags.q.explicit_index = 1; + } + return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0; +} + +bool ast_type_qualifier::has_interpolation() const +{ + return this->flags.q.smooth + || this->flags.q.flat + || this->flags.q.noperspective; +} + +bool +ast_type_qualifier::has_layout() const +{ + return this->flags.q.origin_upper_left + || this->flags.q.pixel_center_integer + || this->flags.q.depth_any + || this->flags.q.depth_greater + || this->flags.q.depth_less + || this->flags.q.depth_unchanged + || this->flags.q.std140 + || this->flags.q.std430 + || this->flags.q.shared + || this->flags.q.column_major + || this->flags.q.row_major + || this->flags.q.packed + || this->flags.q.explicit_location + || this->flags.q.explicit_image_format + || this->flags.q.explicit_index + || this->flags.q.explicit_binding + || this->flags.q.explicit_offset + || this->flags.q.explicit_stream; +} + +bool +ast_type_qualifier::has_storage() const +{ + return this->flags.q.constant + || this->flags.q.attribute + || this->flags.q.varying + || this->flags.q.in + || this->flags.q.out + || this->flags.q.uniform + || this->flags.q.buffer + || this->flags.q.shared_storage; +} + +bool +ast_type_qualifier::has_auxiliary_storage() const +{ + return this->flags.q.centroid + || this->flags.q.sample + || this->flags.q.patch; +} + +const char* +ast_type_qualifier::interpolation_string() const +{ + if (this->flags.q.smooth) + return "smooth"; + else if (this->flags.q.flat) + return "flat"; + else if (this->flags.q.noperspective) + return "noperspective"; + else + return NULL; +} + +/** + * This function merges both duplicate identifies within a single layout and + * multiple layout qualifiers on a single variable declaration. The + * is_single_layout_merge param is used differentiate between the two. + */ +bool +ast_type_qualifier::merge_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + bool is_single_layout_merge) +{ + ast_type_qualifier ubo_mat_mask; + ubo_mat_mask.flags.i = 0; + ubo_mat_mask.flags.q.row_major = 1; + ubo_mat_mask.flags.q.column_major = 1; + + ast_type_qualifier ubo_layout_mask; + ubo_layout_mask.flags.i = 0; + ubo_layout_mask.flags.q.std140 = 1; + ubo_layout_mask.flags.q.packed = 1; + ubo_layout_mask.flags.q.shared = 1; + ubo_layout_mask.flags.q.std430 = 1; + + ast_type_qualifier ubo_binding_mask; + ubo_binding_mask.flags.i = 0; + ubo_binding_mask.flags.q.explicit_binding = 1; + ubo_binding_mask.flags.q.explicit_offset = 1; + + ast_type_qualifier stream_layout_mask; + stream_layout_mask.flags.i = 0; + stream_layout_mask.flags.q.stream = 1; + + /* Uniform block layout qualifiers get to overwrite each + * other (rightmost having priority), while all other + * qualifiers currently don't allow duplicates. + */ + ast_type_qualifier allowed_duplicates_mask; + allowed_duplicates_mask.flags.i = + ubo_mat_mask.flags.i | + ubo_layout_mask.flags.i | + ubo_binding_mask.flags.i; + + /* Geometry shaders can have several layout qualifiers + * assigning different stream values. + */ + if (state->stage == MESA_SHADER_GEOMETRY) + allowed_duplicates_mask.flags.i |= + stream_layout_mask.flags.i; + + if (is_single_layout_merge && !state->has_enhanced_layouts() && + (this->flags.i & q.flags.i & ~allowed_duplicates_mask.flags.i) != 0) { + _mesa_glsl_error(loc, state, + "duplicate layout qualifiers used"); + return false; + } + + if (q.flags.q.prim_type) { + if (this->flags.q.prim_type && this->prim_type != q.prim_type) { + _mesa_glsl_error(loc, state, + "conflicting primitive type qualifiers used"); + return false; + } + this->prim_type = q.prim_type; + } + + if (q.flags.q.max_vertices) { + if (this->max_vertices) { + this->max_vertices->merge_qualifier(q.max_vertices); + } else { + this->max_vertices = q.max_vertices; + } + } + + if (q.flags.q.subroutine_def) { + if (this->flags.q.subroutine_def) { + _mesa_glsl_error(loc, state, + "conflicting subroutine qualifiers used"); + } else { + this->subroutine_list = q.subroutine_list; + } + } + + if (q.flags.q.invocations) { + if (this->invocations) { + this->invocations->merge_qualifier(q.invocations); + } else { + this->invocations = q.invocations; + } + } + + if (state->stage == MESA_SHADER_GEOMETRY && + state->has_explicit_attrib_stream()) { + if (!this->flags.q.explicit_stream) { + if (q.flags.q.stream) { + this->flags.q.stream = 1; + this->stream = q.stream; + } else if (!this->flags.q.stream && this->flags.q.out) { + /* Assign default global stream value */ + this->flags.q.stream = 1; + this->stream = state->out_qualifier->stream; + } + } + } + + if (q.flags.q.vertices) { + if (this->vertices) { + this->vertices->merge_qualifier(q.vertices); + } else { + this->vertices = q.vertices; + } + } + + if (q.flags.q.vertex_spacing) { + if (this->flags.q.vertex_spacing && this->vertex_spacing != q.vertex_spacing) { + _mesa_glsl_error(loc, state, + "conflicting vertex spacing used"); + return false; + } + this->vertex_spacing = q.vertex_spacing; + } + + if (q.flags.q.ordering) { + if (this->flags.q.ordering && this->ordering != q.ordering) { + _mesa_glsl_error(loc, state, + "conflicting ordering used"); + return false; + } + this->ordering = q.ordering; + } + + if (q.flags.q.point_mode) { + if (this->flags.q.point_mode && this->point_mode != q.point_mode) { + _mesa_glsl_error(loc, state, + "conflicting point mode used"); + return false; + } + this->point_mode = q.point_mode; + } + + if ((q.flags.i & ubo_mat_mask.flags.i) != 0) + this->flags.i &= ~ubo_mat_mask.flags.i; + if ((q.flags.i & ubo_layout_mask.flags.i) != 0) + this->flags.i &= ~ubo_layout_mask.flags.i; + + for (int i = 0; i < 3; i++) { + if (q.flags.q.local_size & (1 << i)) { + if (this->local_size[i]) { + this->local_size[i]->merge_qualifier(q.local_size[i]); + } else { + this->local_size[i] = q.local_size[i]; + } + } + } + + this->flags.i |= q.flags.i; + + if (q.flags.q.explicit_location) + this->location = q.location; + + if (q.flags.q.explicit_index) + this->index = q.index; + + if (q.flags.q.explicit_binding) + this->binding = q.binding; + + if (q.flags.q.explicit_offset) + this->offset = q.offset; + + if (q.precision != ast_precision_none) + this->precision = q.precision; + + if (q.flags.q.explicit_image_format) { + this->image_format = q.image_format; + this->image_base_type = q.image_base_type; + } + + return true; +} + +bool +ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + ast_node* &node, bool create_node) +{ + void *mem_ctx = state; + const bool r = this->merge_qualifier(loc, state, q, false); + + if (state->stage == MESA_SHADER_GEOMETRY) { + if (q.flags.q.prim_type) { + /* Make sure this is a valid output primitive type. */ + switch (q.prim_type) { + case GL_POINTS: + case GL_LINE_STRIP: + case GL_TRIANGLE_STRIP: + break; + default: + _mesa_glsl_error(loc, state, "invalid geometry shader output " + "primitive type"); + break; + } + } + + /* Allow future assigments of global out's stream id value */ + this->flags.q.explicit_stream = 0; + } else if (state->stage == MESA_SHADER_TESS_CTRL) { + if (create_node) { + node = new(mem_ctx) ast_tcs_output_layout(*loc); + } + } else { + _mesa_glsl_error(loc, state, "out layout qualifiers only valid in " + "tessellation control or geometry shaders"); + } + + return r; +} + +bool +ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + ast_node* &node, bool create_node) +{ + void *mem_ctx = state; + bool create_gs_ast = false; + bool create_cs_ast = false; + ast_type_qualifier valid_in_mask; + valid_in_mask.flags.i = 0; + + switch (state->stage) { + case MESA_SHADER_TESS_EVAL: + if (q.flags.q.prim_type) { + /* Make sure this is a valid input primitive type. */ + switch (q.prim_type) { + case GL_TRIANGLES: + case GL_QUADS: + case GL_ISOLINES: + break; + default: + _mesa_glsl_error(loc, state, + "invalid tessellation evaluation " + "shader input primitive type"); + break; + } + } + + valid_in_mask.flags.q.prim_type = 1; + valid_in_mask.flags.q.vertex_spacing = 1; + valid_in_mask.flags.q.ordering = 1; + valid_in_mask.flags.q.point_mode = 1; + break; + case MESA_SHADER_GEOMETRY: + if (q.flags.q.prim_type) { + /* Make sure this is a valid input primitive type. */ + switch (q.prim_type) { + case GL_POINTS: + case GL_LINES: + case GL_LINES_ADJACENCY: + case GL_TRIANGLES: + case GL_TRIANGLES_ADJACENCY: + break; + default: + _mesa_glsl_error(loc, state, + "invalid geometry shader input primitive type"); + break; + } + } + + create_gs_ast |= + q.flags.q.prim_type && + !state->in_qualifier->flags.q.prim_type; + + valid_in_mask.flags.q.prim_type = 1; + valid_in_mask.flags.q.invocations = 1; + break; + case MESA_SHADER_FRAGMENT: + valid_in_mask.flags.q.early_fragment_tests = 1; + break; + case MESA_SHADER_COMPUTE: + create_cs_ast |= + q.flags.q.local_size != 0 && + state->in_qualifier->flags.q.local_size == 0; + + valid_in_mask.flags.q.local_size = 7; + break; + default: + _mesa_glsl_error(loc, state, + "input layout qualifiers only valid in " + "geometry, fragment and compute shaders"); + break; + } + + /* Generate an error when invalid input layout qualifiers are used. */ + if ((q.flags.i & ~valid_in_mask.flags.i) != 0) { + _mesa_glsl_error(loc, state, + "invalid input layout qualifiers used"); + return false; + } + + /* Input layout qualifiers can be specified multiple + * times in separate declarations, as long as they match. + */ + if (this->flags.q.prim_type) { + if (q.flags.q.prim_type && + this->prim_type != q.prim_type) { + _mesa_glsl_error(loc, state, + "conflicting input primitive %s specified", + state->stage == MESA_SHADER_GEOMETRY ? + "type" : "mode"); + } + } else if (q.flags.q.prim_type) { + state->in_qualifier->flags.q.prim_type = 1; + state->in_qualifier->prim_type = q.prim_type; + } + + if (q.flags.q.invocations) { + this->flags.q.invocations = 1; + if (this->invocations) { + this->invocations->merge_qualifier(q.invocations); + } else { + this->invocations = q.invocations; + } + } + + if (q.flags.q.early_fragment_tests) { + state->fs_early_fragment_tests = true; + } + + if (this->flags.q.vertex_spacing) { + if (q.flags.q.vertex_spacing && + this->vertex_spacing != q.vertex_spacing) { + _mesa_glsl_error(loc, state, + "conflicting vertex spacing specified"); + } + } else if (q.flags.q.vertex_spacing) { + this->flags.q.vertex_spacing = 1; + this->vertex_spacing = q.vertex_spacing; + } + + if (this->flags.q.ordering) { + if (q.flags.q.ordering && + this->ordering != q.ordering) { + _mesa_glsl_error(loc, state, + "conflicting ordering specified"); + } + } else if (q.flags.q.ordering) { + this->flags.q.ordering = 1; + this->ordering = q.ordering; + } + + if (this->flags.q.point_mode) { + if (q.flags.q.point_mode && + this->point_mode != q.point_mode) { + _mesa_glsl_error(loc, state, + "conflicting point mode specified"); + } + } else if (q.flags.q.point_mode) { + this->flags.q.point_mode = 1; + this->point_mode = q.point_mode; + } + + if (create_node) { + if (create_gs_ast) { + node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type); + } else if (create_cs_ast) { + node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size); + } + } + + return true; +} + +bool +ast_layout_expression::process_qualifier_constant(struct _mesa_glsl_parse_state *state, + const char *qual_indentifier, + unsigned *value, + bool can_be_zero) +{ + int min_value = 0; + bool first_pass = true; + *value = 0; + + if (!can_be_zero) + min_value = 1; + + for (exec_node *node = layout_const_expressions.head; + !node->is_tail_sentinel(); node = node->next) { + + exec_list dummy_instructions; + ast_node *const_expression = exec_node_data(ast_node, node, link); + + ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); + + ir_constant *const const_int = ir->constant_expression_value(); + if (const_int == NULL || !const_int->type->is_integer()) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s must be an integral constant " + "expression", qual_indentifier); + return false; + } + + if (const_int->value.i[0] < min_value) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s layout qualifier is invalid " + "(%d < %d)", qual_indentifier, + const_int->value.i[0], min_value); + return false; + } + + if (!first_pass && *value != const_int->value.u[0]) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s layout qualifier does not " + "match previous declaration (%d vs %d)", + qual_indentifier, *value, const_int->value.i[0]); + return false; + } else { + first_pass = false; + *value = const_int->value.u[0]; + } + + /* If the location is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the location isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); + } + + return true; +} diff --git a/src/compiler/glsl/blob.c b/src/compiler/glsl/blob.c new file mode 100644 index 0000000..dd4341b --- /dev/null +++ b/src/compiler/glsl/blob.c @@ -0,0 +1,323 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <string.h> + +#include "main/macros.h" +#include "util/ralloc.h" +#include "blob.h" + +#define BLOB_INITIAL_SIZE 4096 + +/* Ensure that \blob will be able to fit an additional object of size + * \additional. The growing (if any) will occur by doubling the existing + * allocation. + */ +static bool +grow_to_fit(struct blob *blob, size_t additional) +{ + size_t to_allocate; + uint8_t *new_data; + + if (blob->size + additional <= blob->allocated) + return true; + + if (blob->allocated == 0) + to_allocate = BLOB_INITIAL_SIZE; + else + to_allocate = blob->allocated * 2; + + to_allocate = MAX2(to_allocate, blob->allocated + additional); + + new_data = reralloc_size(blob, blob->data, to_allocate); + if (new_data == NULL) + return false; + + blob->data = new_data; + blob->allocated = to_allocate; + + return true; +} + +/* Align the blob->size so that reading or writing a value at (blob->data + + * blob->size) will result in an access aligned to a granularity of \alignment + * bytes. + * + * \return True unless allocation fails + */ +static bool +align_blob(struct blob *blob, size_t alignment) +{ + const size_t new_size = ALIGN(blob->size, alignment); + + if (! grow_to_fit (blob, new_size - blob->size)) + return false; + + blob->size = new_size; + + return true; +} + +static void +align_blob_reader(struct blob_reader *blob, size_t alignment) +{ + blob->current = blob->data + ALIGN(blob->current - blob->data, alignment); +} + +struct blob * +blob_create(void *mem_ctx) +{ + struct blob *blob; + + blob = ralloc(mem_ctx, struct blob); + if (blob == NULL) + return NULL; + + blob->data = NULL; + blob->allocated = 0; + blob->size = 0; + + return blob; +} + +bool +blob_overwrite_bytes(struct blob *blob, + size_t offset, + const void *bytes, + size_t to_write) +{ + /* Detect an attempt to overwrite data out of bounds. */ + if (offset < 0 || blob->size - offset < to_write) + return false; + + memcpy(blob->data + offset, bytes, to_write); + + return true; +} + +bool +blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write) +{ + if (! grow_to_fit(blob, to_write)) + return false; + + memcpy(blob->data + blob->size, bytes, to_write); + blob->size += to_write; + + return true; +} + +uint8_t * +blob_reserve_bytes(struct blob *blob, size_t to_write) +{ + uint8_t *ret; + + if (! grow_to_fit (blob, to_write)) + return NULL; + + ret = blob->data + blob->size; + blob->size += to_write; + + return ret; +} + +bool +blob_write_uint32(struct blob *blob, uint32_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_overwrite_uint32 (struct blob *blob, + size_t offset, + uint32_t value) +{ + return blob_overwrite_bytes(blob, offset, &value, sizeof(value)); +} + +bool +blob_write_uint64(struct blob *blob, uint64_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_write_intptr(struct blob *blob, intptr_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_write_string(struct blob *blob, const char *str) +{ + return blob_write_bytes(blob, str, strlen(str) + 1); +} + +void +blob_reader_init(struct blob_reader *blob, uint8_t *data, size_t size) +{ + blob->data = data; + blob->end = data + size; + blob->current = data; + blob->overrun = false; +} + +/* Check that an object of size \size can be read from this blob. + * + * If not, set blob->overrun to indicate that we attempted to read too far. + */ +static bool +ensure_can_read(struct blob_reader *blob, size_t size) +{ + if (blob->current < blob->end && blob->end - blob->current >= size) + return true; + + blob->overrun = true; + + return false; +} + +void * +blob_read_bytes(struct blob_reader *blob, size_t size) +{ + void *ret; + + if (! ensure_can_read (blob, size)) + return NULL; + + ret = blob->current; + + blob->current += size; + + return ret; +} + +void +blob_copy_bytes(struct blob_reader *blob, uint8_t *dest, size_t size) +{ + uint8_t *bytes; + + bytes = blob_read_bytes(blob, size); + if (bytes == NULL) + return; + + memcpy(dest, bytes, size); +} + +/* These next three read functions have identical form. If we add any beyond + * these first three we should probably switch to generating these with a + * preprocessor macro. +*/ +uint32_t +blob_read_uint32(struct blob_reader *blob) +{ + uint32_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((uint32_t*) blob->current); + + blob->current += size; + + return ret; +} + +uint64_t +blob_read_uint64(struct blob_reader *blob) +{ + uint64_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((uint64_t*) blob->current); + + blob->current += size; + + return ret; +} + +intptr_t +blob_read_intptr(struct blob_reader *blob) +{ + intptr_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((intptr_t *) blob->current); + + blob->current += size; + + return ret; +} + +char * +blob_read_string(struct blob_reader *blob) +{ + int size; + char *ret; + uint8_t *nul; + + /* If we're already at the end, then this is an overrun. */ + if (blob->current >= blob->end) { + blob->overrun = true; + return NULL; + } + + /* Similarly, if there is no zero byte in the data remaining in this blob, + * we also consider that an overrun. + */ + nul = memchr(blob->current, 0, blob->end - blob->current); + + if (nul == NULL) { + blob->overrun = true; + return NULL; + } + + size = nul - blob->current + 1; + + assert(ensure_can_read(blob, size)); + + ret = (char *) blob->current; + + blob->current += size; + + return ret; +} diff --git a/src/compiler/glsl/blob.h b/src/compiler/glsl/blob.h new file mode 100644 index 0000000..ec903ec --- /dev/null +++ b/src/compiler/glsl/blob.h @@ -0,0 +1,289 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once +#ifndef BLOB_H +#define BLOB_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +/* The blob functions implement a simple, low-level API for serializing and + * deserializing. + * + * All objects written to a blob will be serialized directly, (without any + * additional meta-data to describe the data written). Therefore, it is the + * caller's responsibility to ensure that any data can be read later, (either + * by knowing exactly what data is expected, or by writing to the blob + * sufficient meta-data to describe what has been written). + * + * A blob is efficient in that it dynamically grows by doubling in size, so + * allocation costs are logarithmic. + */ + +struct blob { + /* The data actually written to the blob. */ + uint8_t *data; + + /** Number of bytes that have been allocated for \c data. */ + size_t allocated; + + /** The number of bytes that have actual data written to them. */ + size_t size; +}; + +/* When done reading, the caller can ensure that everything was consumed by + * checking the following: + * + * 1. blob->current should be equal to blob->end, (if not, too little was + * read). + * + * 2. blob->overrun should be false, (otherwise, too much was read). + */ +struct blob_reader { + uint8_t *data; + uint8_t *end; + uint8_t *current; + bool overrun; +}; + +/** + * Create a new, empty blob, belonging to \mem_ctx. + * + * \return The new blob, (or NULL in case of allocation failure). + */ +struct blob * +blob_create (void *mem_ctx); + +/** + * Add some unstructured, fixed-size data to a blob. + * + * \return True unless allocation failed. + */ +bool +blob_write_bytes (struct blob *blob, const void *bytes, size_t to_write); + +/** + * Reserve space in \blob for a number of bytes. + * + * Space will be allocated within the blob for these byes, but the bytes will + * be left uninitialized. The caller is expected to use the return value to + * write directly (and immediately) to these bytes. + * + * \note The return value is valid immediately upon return, but can be + * invalidated by any other call to a blob function. So the caller should call + * blob_reserve_byes immediately before writing through the returned pointer. + * + * This function is intended to be used when interfacing with an existing API + * that is not aware of the blob API, (so that blob_write_bytes cannot be + * called). + * + * \return A pointer to space allocated within \blob to which \to_write bytes + * can be written, (or NULL in case of any allocation error). + */ +uint8_t * +blob_reserve_bytes (struct blob *blob, size_t to_write); + +/** + * Overwrite some data previously written to the blob. + * + * Writes data to an existing portion of the blob at an offset of \offset. + * This data range must have previously been written to the blob by one of the + * blob_write_* calls. + * + * For example usage, see blob_overwrite_uint32 + * + * \return True unless the requested offset or offset+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_bytes (struct blob *blob, + size_t offset, + const void *bytes, + size_t to_write); + +/** + * Add a uint32_t to a blob. + * + * \note This function will only write to a uint32_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint32 (struct blob *blob, uint32_t value); + +/** + * Overwrite a uint32_t previously written to the blob. + * + * Writes a uint32_t value to an existing portion of the blob at an offset of + * \offset. This data range must have previously been written to the blob by + * one of the blob_write_* calls. + * + * + * The expected usage is something like the following pattern: + * + * size_t offset; + * + * offset = blob->size; + * blob_write_uint32 (blob, 0); // placeholder + * ... various blob write calls, writing N items ... + * blob_overwrite_uint32 (blob, offset, N); + * + * \return True unless the requested position or position+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_uint32 (struct blob *blob, + size_t offset, + uint32_t value); + +/** + * Add a uint64_t to a blob. + * + * \note This function will only write to a uint64_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint64 (struct blob *blob, uint64_t value); + +/** + * Add an intptr_t to a blob. + * + * \note This function will only write to an intptr_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_intptr (struct blob *blob, intptr_t value); + +/** + * Add a NULL-terminated string to a blob, (including the NULL terminator). + * + * \return True unless allocation failed. + */ +bool +blob_write_string (struct blob *blob, const char *str); + +/** + * Start reading a blob, (initializing the contents of \blob for reading). + * + * After this call, the caller can use the various blob_read_* functions to + * read elements from the data array. + * + * For all of the blob_read_* functions, if there is insufficient data + * remaining, the functions will do nothing, (perhaps returning default values + * such as 0). The caller can detect this by noting that the blob_reader's + * current value is unchanged before and after the call. + */ +void +blob_reader_init (struct blob_reader *blob, uint8_t *data, size_t size); + +/** + * Read some unstructured, fixed-size data from the current location, (and + * update the current location to just past this data). + * + * \note The memory returned belongs to the data underlying the blob reader. The + * caller must copy the data in order to use it after the lifetime of the data + * underlying the blob reader. + * + * \return The bytes read (see note above about memory lifetime). + */ +void * +blob_read_bytes (struct blob_reader *blob, size_t size); + +/** + * Read some unstructured, fixed-size data from the current location, copying + * it to \dest (and update the current location to just past this data) + */ +void +blob_copy_bytes (struct blob_reader *blob, uint8_t *dest, size_t size); + +/** + * Read a uint32_t from the current location, (and update the current location + * to just past this uint32_t). + * + * \note This function will only read from a uint32_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint32_t read + */ +uint32_t +blob_read_uint32 (struct blob_reader *blob); + +/** + * Read a uint64_t from the current location, (and update the current location + * to just past this uint64_t). + * + * \note This function will only read from a uint64_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint64_t read + */ +uint64_t +blob_read_uint64 (struct blob_reader *blob); + +/** + * Read an intptr_t value from the current location, (and update the + * current location to just past this intptr_t). + * + * \note This function will only read from an intptr_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The intptr_t read + */ +intptr_t +blob_read_intptr (struct blob_reader *blob); + +/** + * Read a NULL-terminated string from the current location, (and update the + * current location to just past this string). + * + * \note The memory returned belongs to the data underlying the blob reader. The + * caller must copy the string in order to use the string after the lifetime + * of the data underlying the blob reader. + * + * \return The string read (see note above about memory lifetime). However, if + * there is no NULL byte remaining within the blob, this function returns + * NULL. + */ +char * +blob_read_string (struct blob_reader *blob); + +#ifdef __cplusplus +} +#endif + +#endif /* BLOB_H */ diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp new file mode 100644 index 0000000..95e86df --- /dev/null +++ b/src/compiler/glsl/builtin_functions.cpp @@ -0,0 +1,5502 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file builtin_functions.cpp + * + * Support for GLSL built-in functions. + * + * This file is split into several main components: + * + * 1. Availability predicates + * + * A series of small functions that check whether the current shader + * supports the version/extensions required to expose a built-in. + * + * 2. Core builtin_builder class functionality + * + * 3. Lists of built-in functions + * + * The builtin_builder::create_builtins() function contains lists of all + * built-in function signatures, where they're available, what types they + * take, and so on. + * + * 4. Implementations of built-in function signatures + * + * A series of functions which create ir_function_signatures and emit IR + * via ir_builder to implement them. + * + * 5. External API + * + * A few functions the rest of the compiler can use to interact with the + * built-in function module. For example, searching for a built-in by + * name and parameters. + */ + +#include <stdarg.h> +#include <stdio.h> +#include "main/core.h" /* for struct gl_shader */ +#include "main/shaderobj.h" +#include "ir_builder.h" +#include "glsl_parser_extras.h" +#include "program/prog_instruction.h" +#include <math.h> + +#define M_PIf ((float) M_PI) +#define M_PI_2f ((float) M_PI_2) +#define M_PI_4f ((float) M_PI_4) + +using namespace ir_builder; + +/** + * Availability predicates: + * @{ + */ +static bool +always_available(const _mesa_glsl_parse_state *) +{ + return true; +} + +static bool +compatibility_vs_only(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_VERTEX && + state->language_version <= 130 && + !state->es_shader; +} + +static bool +fs_only(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT; +} + +static bool +gs_only(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_GEOMETRY; +} + +static bool +v110(const _mesa_glsl_parse_state *state) +{ + return !state->es_shader; +} + +static bool +v110_fs_only(const _mesa_glsl_parse_state *state) +{ + return !state->es_shader && state->stage == MESA_SHADER_FRAGMENT; +} + +static bool +v120(const _mesa_glsl_parse_state *state) +{ + return state->is_version(120, 300); +} + +static bool +v130(const _mesa_glsl_parse_state *state) +{ + return state->is_version(130, 300); +} + +static bool +v130_fs_only(const _mesa_glsl_parse_state *state) +{ + return state->is_version(130, 300) && + state->stage == MESA_SHADER_FRAGMENT; +} + +static bool +v140(const _mesa_glsl_parse_state *state) +{ + return state->is_version(140, 0); +} + +static bool +v140_or_es3(const _mesa_glsl_parse_state *state) +{ + return state->is_version(140, 300); +} + +static bool +v400_fs_only(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) && + state->stage == MESA_SHADER_FRAGMENT; +} + +static bool +es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(0, 310); +} + +static bool +texture_rectangle(const _mesa_glsl_parse_state *state) +{ + return state->ARB_texture_rectangle_enable; +} + +static bool +texture_external(const _mesa_glsl_parse_state *state) +{ + return state->OES_EGL_image_external_enable; +} + +/** True if texturing functions with explicit LOD are allowed. */ +static bool +lod_exists_in_stage(const _mesa_glsl_parse_state *state) +{ + /* Texturing functions with "Lod" in their name exist: + * - In the vertex shader stage (for all languages) + * - In any stage for GLSL 1.30+ or GLSL ES 3.00 + * - In any stage for desktop GLSL with ARB_shader_texture_lod enabled. + * + * Since ARB_shader_texture_lod can only be enabled on desktop GLSL, we + * don't need to explicitly check state->es_shader. + */ + return state->stage == MESA_SHADER_VERTEX || + state->is_version(130, 300) || + state->ARB_shader_texture_lod_enable; +} + +static bool +v110_lod(const _mesa_glsl_parse_state *state) +{ + return !state->es_shader && lod_exists_in_stage(state); +} + +static bool +shader_texture_lod(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_texture_lod_enable; +} + +static bool +shader_texture_lod_and_rect(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_texture_lod_enable && + state->ARB_texture_rectangle_enable; +} + +static bool +shader_bit_encoding(const _mesa_glsl_parse_state *state) +{ + return state->is_version(330, 300) || + state->ARB_shader_bit_encoding_enable || + state->ARB_gpu_shader5_enable; +} + +static bool +shader_integer_mix(const _mesa_glsl_parse_state *state) +{ + return state->is_version(450, 310) || + (v130(state) && state->EXT_shader_integer_mix_enable); +} + +static bool +shader_packing_or_es3(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shading_language_packing_enable || + state->is_version(420, 300); +} + +static bool +shader_packing_or_es3_or_gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shading_language_packing_enable || + state->ARB_gpu_shader5_enable || + state->is_version(400, 300); +} + +static bool +gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) || state->ARB_gpu_shader5_enable; +} + +static bool +gpu_shader5_or_es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 310) || state->ARB_gpu_shader5_enable; +} + +static bool +shader_packing_or_es31_or_gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shading_language_packing_enable || + state->ARB_gpu_shader5_enable || + state->is_version(400, 310); +} + +static bool +fs_gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(400, 0) || state->ARB_gpu_shader5_enable); +} + + +static bool +texture_array_lod(const _mesa_glsl_parse_state *state) +{ + return lod_exists_in_stage(state) && + state->EXT_texture_array_enable; +} + +static bool +fs_texture_array(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + state->EXT_texture_array_enable; +} + +static bool +texture_array(const _mesa_glsl_parse_state *state) +{ + return state->EXT_texture_array_enable; +} + +static bool +texture_multisample(const _mesa_glsl_parse_state *state) +{ + return state->is_version(150, 310) || + state->ARB_texture_multisample_enable; +} + +static bool +texture_multisample_array(const _mesa_glsl_parse_state *state) +{ + return state->is_version(150, 320) || + state->ARB_texture_multisample_enable || + state->OES_texture_storage_multisample_2d_array_enable; +} + +static bool +texture_samples_identical(const _mesa_glsl_parse_state *state) +{ + return texture_multisample(state) && + state->EXT_shader_samples_identical_enable; +} + +static bool +texture_samples_identical_array(const _mesa_glsl_parse_state *state) +{ + return texture_multisample_array(state) && + state->EXT_shader_samples_identical_enable; +} + +static bool +fs_texture_cube_map_array(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(400, 0) || + state->ARB_texture_cube_map_array_enable); +} + +static bool +texture_cube_map_array(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) || + state->ARB_texture_cube_map_array_enable; +} + +static bool +texture_query_levels(const _mesa_glsl_parse_state *state) +{ + return state->is_version(430, 0) || + state->ARB_texture_query_levels_enable; +} + +static bool +texture_query_lod(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + state->ARB_texture_query_lod_enable; +} + +static bool +texture_gather(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) || + state->ARB_texture_gather_enable || + state->ARB_gpu_shader5_enable; +} + +static bool +texture_gather_or_es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 310) || + state->ARB_texture_gather_enable || + state->ARB_gpu_shader5_enable; +} + +/* Only ARB_texture_gather but not GLSL 4.0 or ARB_gpu_shader5. + * used for relaxation of const offset requirements. + */ +static bool +texture_gather_only_or_es31(const _mesa_glsl_parse_state *state) +{ + return !state->is_version(400, 0) && + !state->ARB_gpu_shader5_enable && + (state->ARB_texture_gather_enable || + state->is_version(0, 310)); +} + +/* Desktop GL or OES_standard_derivatives + fragment shader only */ +static bool +fs_oes_derivatives(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(110, 300) || + state->OES_standard_derivatives_enable); +} + +static bool +fs_derivative_control(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(450, 0) || + state->ARB_derivative_control_enable); +} + +static bool +tex1d_lod(const _mesa_glsl_parse_state *state) +{ + return !state->es_shader && lod_exists_in_stage(state); +} + +/** True if sampler3D exists */ +static bool +tex3d(const _mesa_glsl_parse_state *state) +{ + /* sampler3D exists in all desktop GLSL versions, GLSL ES 1.00 with the + * OES_texture_3D extension, and in GLSL ES 3.00. + */ + return !state->es_shader || + state->OES_texture_3D_enable || + state->language_version >= 300; +} + +static bool +fs_tex3d(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (!state->es_shader || state->OES_texture_3D_enable); +} + +static bool +tex3d_lod(const _mesa_glsl_parse_state *state) +{ + return tex3d(state) && lod_exists_in_stage(state); +} + +static bool +shader_atomic_counters(const _mesa_glsl_parse_state *state) +{ + return state->has_atomic_counters(); +} + +static bool +shader_clock(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_clock_enable; +} + +static bool +shader_storage_buffer_object(const _mesa_glsl_parse_state *state) +{ + return state->has_shader_storage_buffer_objects(); +} + +static bool +shader_trinary_minmax(const _mesa_glsl_parse_state *state) +{ + return state->AMD_shader_trinary_minmax_enable; +} + +static bool +shader_image_load_store(const _mesa_glsl_parse_state *state) +{ + return (state->is_version(420, 310) || + state->ARB_shader_image_load_store_enable); +} + +static bool +shader_image_atomic(const _mesa_glsl_parse_state *state) +{ + return (state->is_version(420, 0) || + state->ARB_shader_image_load_store_enable); +} + +static bool +shader_image_size(const _mesa_glsl_parse_state *state) +{ + return state->is_version(430, 310) || + state->ARB_shader_image_size_enable; +} + +static bool +shader_samples(const _mesa_glsl_parse_state *state) +{ + return state->is_version(450, 0) || + state->ARB_shader_texture_image_samples_enable; +} + +static bool +gs_streams(const _mesa_glsl_parse_state *state) +{ + return gpu_shader5(state) && gs_only(state); +} + +static bool +fp64(const _mesa_glsl_parse_state *state) +{ + return state->has_double(); +} + +static bool +compute_shader(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_COMPUTE; +} + +static bool +buffer_atomics_supported(const _mesa_glsl_parse_state *state) +{ + return compute_shader(state) || shader_storage_buffer_object(state); +} + +static bool +barrier_supported(const _mesa_glsl_parse_state *state) +{ + return compute_shader(state) || + state->stage == MESA_SHADER_TESS_CTRL; +} + +/** @} */ + +/******************************************************************************/ + +namespace { + +/** + * builtin_builder: A singleton object representing the core of the built-in + * function module. + * + * It generates IR for every built-in function signature, and organizes them + * into functions. + */ +class builtin_builder { +public: + builtin_builder(); + ~builtin_builder(); + + void initialize(); + void release(); + ir_function_signature *find(_mesa_glsl_parse_state *state, + const char *name, exec_list *actual_parameters); + + /** + * A shader to hold all the built-in signatures; created by this module. + * + * This includes signatures for every built-in, regardless of version or + * enabled extensions. The availability predicate associated with each + * signature allows matching_signature() to filter out the irrelevant ones. + */ + gl_shader *shader; + +private: + void *mem_ctx; + + /** Global variables used by built-in functions. */ + ir_variable *gl_ModelViewProjectionMatrix; + ir_variable *gl_Vertex; + + void create_shader(); + void create_intrinsics(); + void create_builtins(); + + /** + * IR builder helpers: + * + * These convenience functions assist in emitting IR, but don't necessarily + * fit in ir_builder itself. Many of them rely on having a mem_ctx class + * member available. + */ + ir_variable *in_var(const glsl_type *type, const char *name); + ir_variable *out_var(const glsl_type *type, const char *name); + ir_constant *imm(float f, unsigned vector_elements=1); + ir_constant *imm(bool b, unsigned vector_elements=1); + ir_constant *imm(int i, unsigned vector_elements=1); + ir_constant *imm(unsigned u, unsigned vector_elements=1); + ir_constant *imm(double d, unsigned vector_elements=1); + ir_constant *imm(const glsl_type *type, const ir_constant_data &); + ir_dereference_variable *var_ref(ir_variable *var); + ir_dereference_array *array_ref(ir_variable *var, int i); + ir_swizzle *matrix_elt(ir_variable *var, int col, int row); + + ir_expression *asin_expr(ir_variable *x); + void do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x); + + /** + * Call function \param f with parameters specified as the linked + * list \param params of \c ir_variable objects. \param ret should + * point to the ir_variable that will hold the function return + * value, or be \c NULL if the function has void return type. + */ + ir_call *call(ir_function *f, ir_variable *ret, exec_list params); + + /** Create a new function and add the given signatures. */ + void add_function(const char *name, ...); + + typedef ir_function_signature *(builtin_builder::*image_prototype_ctr)(const glsl_type *image_type, + unsigned num_arguments, + unsigned flags); + + enum image_function_flags { + IMAGE_FUNCTION_EMIT_STUB = (1 << 0), + IMAGE_FUNCTION_RETURNS_VOID = (1 << 1), + IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2), + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3), + IMAGE_FUNCTION_READ_ONLY = (1 << 4), + IMAGE_FUNCTION_WRITE_ONLY = (1 << 5), + IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6), + IMAGE_FUNCTION_MS_ONLY = (1 << 7), + }; + + /** + * Create a new image built-in function for all known image types. + * \p flags is a bitfield of \c image_function_flags flags. + */ + void add_image_function(const char *name, + const char *intrinsic_name, + image_prototype_ctr prototype, + unsigned num_arguments, + unsigned flags); + + /** + * Create new functions for all known image built-ins and types. + * If \p glsl is \c true, use the GLSL built-in names and emit code + * to call into the actual compiler intrinsic. If \p glsl is + * false, emit a function prototype with no body for each image + * intrinsic name. + */ + void add_image_functions(bool glsl); + + ir_function_signature *new_sig(const glsl_type *return_type, + builtin_available_predicate avail, + int num_params, ...); + + /** + * Function signature generators: + * @{ + */ + ir_function_signature *unop(builtin_available_predicate avail, + ir_expression_operation opcode, + const glsl_type *return_type, + const glsl_type *param_type); + ir_function_signature *binop(builtin_available_predicate avail, + ir_expression_operation opcode, + const glsl_type *return_type, + const glsl_type *param0_type, + const glsl_type *param1_type); + +#define B0(X) ir_function_signature *_##X(); +#define B1(X) ir_function_signature *_##X(const glsl_type *); +#define B2(X) ir_function_signature *_##X(const glsl_type *, const glsl_type *); +#define B3(X) ir_function_signature *_##X(const glsl_type *, const glsl_type *, const glsl_type *); +#define BA1(X) ir_function_signature *_##X(builtin_available_predicate, const glsl_type *); +#define BA2(X) ir_function_signature *_##X(builtin_available_predicate, const glsl_type *, const glsl_type *); + B1(radians) + B1(degrees) + B1(sin) + B1(cos) + B1(tan) + B1(asin) + B1(acos) + B1(atan2) + B1(atan) + B1(sinh) + B1(cosh) + B1(tanh) + B1(asinh) + B1(acosh) + B1(atanh) + B1(pow) + B1(exp) + B1(log) + B1(exp2) + B1(log2) + BA1(sqrt) + BA1(inversesqrt) + BA1(abs) + BA1(sign) + BA1(floor) + BA1(trunc) + BA1(round) + BA1(roundEven) + BA1(ceil) + BA1(fract) + B2(mod) + BA1(modf) + BA2(min) + BA2(max) + BA2(clamp) + BA2(mix_lrp) + ir_function_signature *_mix_sel(builtin_available_predicate avail, + const glsl_type *val_type, + const glsl_type *blend_type); + BA2(step) + BA2(smoothstep) + BA1(isnan) + BA1(isinf) + B1(floatBitsToInt) + B1(floatBitsToUint) + B1(intBitsToFloat) + B1(uintBitsToFloat) + ir_function_signature *_packUnorm2x16(builtin_available_predicate avail); + ir_function_signature *_packSnorm2x16(builtin_available_predicate avail); + ir_function_signature *_packUnorm4x8(builtin_available_predicate avail); + ir_function_signature *_packSnorm4x8(builtin_available_predicate avail); + ir_function_signature *_unpackUnorm2x16(builtin_available_predicate avail); + ir_function_signature *_unpackSnorm2x16(builtin_available_predicate avail); + ir_function_signature *_unpackUnorm4x8(builtin_available_predicate avail); + ir_function_signature *_unpackSnorm4x8(builtin_available_predicate avail); + ir_function_signature *_packHalf2x16(builtin_available_predicate avail); + ir_function_signature *_unpackHalf2x16(builtin_available_predicate avail); + ir_function_signature *_packDouble2x32(builtin_available_predicate avail); + ir_function_signature *_unpackDouble2x32(builtin_available_predicate avail); + + BA1(length) + BA1(distance); + BA1(dot); + BA1(cross); + BA1(normalize); + B0(ftransform); + BA1(faceforward); + BA1(reflect); + BA1(refract); + BA1(matrixCompMult); + BA1(outerProduct); + BA1(determinant_mat2); + BA1(determinant_mat3); + BA1(determinant_mat4); + BA1(inverse_mat2); + BA1(inverse_mat3); + BA1(inverse_mat4); + BA1(transpose); + BA1(lessThan); + BA1(lessThanEqual); + BA1(greaterThan); + BA1(greaterThanEqual); + BA1(equal); + BA1(notEqual); + B1(any); + B1(all); + B1(not); + BA2(textureSize); + B1(textureSamples); + +/** Flags to _texture() */ +#define TEX_PROJECT 1 +#define TEX_OFFSET 2 +#define TEX_COMPONENT 4 +#define TEX_OFFSET_NONCONST 8 +#define TEX_OFFSET_ARRAY 16 + + ir_function_signature *_texture(ir_texture_opcode opcode, + builtin_available_predicate avail, + const glsl_type *return_type, + const glsl_type *sampler_type, + const glsl_type *coord_type, + int flags = 0); + B0(textureCubeArrayShadow); + ir_function_signature *_texelFetch(builtin_available_predicate avail, + const glsl_type *return_type, + const glsl_type *sampler_type, + const glsl_type *coord_type, + const glsl_type *offset_type = NULL); + + B0(EmitVertex) + B0(EndPrimitive) + ir_function_signature *_EmitStreamVertex(builtin_available_predicate avail, + const glsl_type *stream_type); + ir_function_signature *_EndStreamPrimitive(builtin_available_predicate avail, + const glsl_type *stream_type); + B0(barrier) + + BA2(textureQueryLod); + B1(textureQueryLevels); + BA2(textureSamplesIdentical); + B1(dFdx); + B1(dFdy); + B1(fwidth); + B1(dFdxCoarse); + B1(dFdyCoarse); + B1(fwidthCoarse); + B1(dFdxFine); + B1(dFdyFine); + B1(fwidthFine); + B1(noise1); + B1(noise2); + B1(noise3); + B1(noise4); + + B1(bitfieldExtract) + B1(bitfieldInsert) + B1(bitfieldReverse) + B1(bitCount) + B1(findLSB) + B1(findMSB) + BA1(fma) + B2(ldexp) + B2(frexp) + B2(dfrexp) + B1(uaddCarry) + B1(usubBorrow) + B1(mulExtended) + B1(interpolateAtCentroid) + B1(interpolateAtOffset) + B1(interpolateAtSample) + + ir_function_signature *_atomic_counter_intrinsic(builtin_available_predicate avail); + ir_function_signature *_atomic_counter_op(const char *intrinsic, + builtin_available_predicate avail); + + ir_function_signature *_atomic_intrinsic2(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_op2(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_intrinsic3(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_op3(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type); + + B1(min3) + B1(max3) + B1(mid3) + + ir_function_signature *_image_prototype(const glsl_type *image_type, + unsigned num_arguments, + unsigned flags); + ir_function_signature *_image_size_prototype(const glsl_type *image_type, + unsigned num_arguments, + unsigned flags); + ir_function_signature *_image_samples_prototype(const glsl_type *image_type, + unsigned num_arguments, + unsigned flags); + ir_function_signature *_image(image_prototype_ctr prototype, + const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags); + + ir_function_signature *_memory_barrier_intrinsic( + builtin_available_predicate avail); + ir_function_signature *_memory_barrier(const char *intrinsic_name, + builtin_available_predicate avail); + + ir_function_signature *_shader_clock_intrinsic(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_shader_clock(builtin_available_predicate avail, + const glsl_type *type); + +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef BA1 +#undef BA2 + /** @} */ +}; + +} /* anonymous namespace */ + +/** + * Core builtin_builder functionality: + * @{ + */ +builtin_builder::builtin_builder() + : shader(NULL), + gl_ModelViewProjectionMatrix(NULL), + gl_Vertex(NULL) +{ + mem_ctx = NULL; +} + +builtin_builder::~builtin_builder() +{ + ralloc_free(mem_ctx); +} + +ir_function_signature * +builtin_builder::find(_mesa_glsl_parse_state *state, + const char *name, exec_list *actual_parameters) +{ + /* The shader currently being compiled requested a built-in function; + * it needs to link against builtin_builder::shader in order to get them. + * + * Even if we don't find a matching signature, we still need to do this so + * that the "no matching signature" error will list potential candidates + * from the available built-ins. + */ + state->uses_builtin_functions = true; + + ir_function *f = shader->symbols->get_function(name); + if (f == NULL) + return NULL; + + ir_function_signature *sig = + f->matching_signature(state, actual_parameters, true); + if (sig == NULL) + return NULL; + + return sig; +} + +void +builtin_builder::initialize() +{ + /* If already initialized, don't do it again. */ + if (mem_ctx != NULL) + return; + + mem_ctx = ralloc_context(NULL); + create_shader(); + create_intrinsics(); + create_builtins(); +} + +void +builtin_builder::release() +{ + ralloc_free(mem_ctx); + mem_ctx = NULL; + + ralloc_free(shader); + shader = NULL; +} + +void +builtin_builder::create_shader() +{ + /* The target doesn't actually matter. There's no target for generic + * GLSL utility code that could be linked against any stage, so just + * arbitrarily pick GL_VERTEX_SHADER. + */ + shader = _mesa_new_shader(NULL, 0, GL_VERTEX_SHADER); + shader->symbols = new(mem_ctx) glsl_symbol_table; + + gl_ModelViewProjectionMatrix = + new(mem_ctx) ir_variable(glsl_type::mat4_type, + "gl_ModelViewProjectionMatrix", + ir_var_uniform); + + shader->symbols->add_variable(gl_ModelViewProjectionMatrix); + + gl_Vertex = in_var(glsl_type::vec4_type, "gl_Vertex"); + shader->symbols->add_variable(gl_Vertex); +} + +/** @} */ + +/** + * Create ir_function and ir_function_signature objects for each + * intrinsic. + */ +void +builtin_builder::create_intrinsics() +{ + add_function("__intrinsic_atomic_read", + _atomic_counter_intrinsic(shader_atomic_counters), + NULL); + add_function("__intrinsic_atomic_increment", + _atomic_counter_intrinsic(shader_atomic_counters), + NULL); + add_function("__intrinsic_atomic_predecrement", + _atomic_counter_intrinsic(shader_atomic_counters), + NULL); + + add_function("__intrinsic_atomic_add", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_min", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_max", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_and", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_or", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_xor", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_exchange", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_comp_swap", + _atomic_intrinsic3(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic3(buffer_atomics_supported, + glsl_type::int_type), + NULL); + + add_image_functions(false); + + add_function("__intrinsic_memory_barrier", + _memory_barrier_intrinsic(shader_image_load_store), + NULL); + add_function("__intrinsic_group_memory_barrier", + _memory_barrier_intrinsic(compute_shader), + NULL); + add_function("__intrinsic_memory_barrier_atomic_counter", + _memory_barrier_intrinsic(compute_shader), + NULL); + add_function("__intrinsic_memory_barrier_buffer", + _memory_barrier_intrinsic(compute_shader), + NULL); + add_function("__intrinsic_memory_barrier_image", + _memory_barrier_intrinsic(compute_shader), + NULL); + add_function("__intrinsic_memory_barrier_shared", + _memory_barrier_intrinsic(compute_shader), + NULL); + + add_function("__intrinsic_shader_clock", + _shader_clock_intrinsic(shader_clock, + glsl_type::uvec2_type), + NULL); +} + +/** + * Create ir_function and ir_function_signature objects for each built-in. + * + * Contains a list of every available built-in. + */ +void +builtin_builder::create_builtins() +{ +#define F(NAME) \ + add_function(#NAME, \ + _##NAME(glsl_type::float_type), \ + _##NAME(glsl_type::vec2_type), \ + _##NAME(glsl_type::vec3_type), \ + _##NAME(glsl_type::vec4_type), \ + NULL); + +#define FD(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FD130(NAME) \ + add_function(#NAME, \ + _##NAME(v130, glsl_type::float_type), \ + _##NAME(v130, glsl_type::vec2_type), \ + _##NAME(v130, glsl_type::vec3_type), \ + _##NAME(v130, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FDGS5(NAME) \ + add_function(#NAME, \ + _##NAME(gpu_shader5, glsl_type::float_type), \ + _##NAME(gpu_shader5, glsl_type::vec2_type), \ + _##NAME(gpu_shader5, glsl_type::vec3_type), \ + _##NAME(gpu_shader5, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FI(NAME) \ + add_function(#NAME, \ + _##NAME(glsl_type::float_type), \ + _##NAME(glsl_type::vec2_type), \ + _##NAME(glsl_type::vec3_type), \ + _##NAME(glsl_type::vec4_type), \ + _##NAME(glsl_type::int_type), \ + _##NAME(glsl_type::ivec2_type), \ + _##NAME(glsl_type::ivec3_type), \ + _##NAME(glsl_type::ivec4_type), \ + NULL); + +#define FID(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + _##NAME(always_available, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec2_type), \ + _##NAME(always_available, glsl_type::ivec3_type), \ + _##NAME(always_available, glsl_type::ivec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FIUD(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + \ + _##NAME(always_available, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec2_type), \ + _##NAME(always_available, glsl_type::ivec3_type), \ + _##NAME(always_available, glsl_type::ivec4_type), \ + \ + _##NAME(v130, glsl_type::uint_type), \ + _##NAME(v130, glsl_type::uvec2_type), \ + _##NAME(v130, glsl_type::uvec3_type), \ + _##NAME(v130, glsl_type::uvec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define IU(NAME) \ + add_function(#NAME, \ + _##NAME(glsl_type::int_type), \ + _##NAME(glsl_type::ivec2_type), \ + _##NAME(glsl_type::ivec3_type), \ + _##NAME(glsl_type::ivec4_type), \ + \ + _##NAME(glsl_type::uint_type), \ + _##NAME(glsl_type::uvec2_type), \ + _##NAME(glsl_type::uvec3_type), \ + _##NAME(glsl_type::uvec4_type), \ + NULL); + +#define FIUBD(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + \ + _##NAME(always_available, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec2_type), \ + _##NAME(always_available, glsl_type::ivec3_type), \ + _##NAME(always_available, glsl_type::ivec4_type), \ + \ + _##NAME(v130, glsl_type::uint_type), \ + _##NAME(v130, glsl_type::uvec2_type), \ + _##NAME(v130, glsl_type::uvec3_type), \ + _##NAME(v130, glsl_type::uvec4_type), \ + \ + _##NAME(always_available, glsl_type::bool_type), \ + _##NAME(always_available, glsl_type::bvec2_type), \ + _##NAME(always_available, glsl_type::bvec3_type), \ + _##NAME(always_available, glsl_type::bvec4_type), \ + \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FIUD2_MIXED(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec3_type, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec4_type, glsl_type::float_type), \ + \ + _##NAME(always_available, glsl_type::vec2_type, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type, glsl_type::vec4_type), \ + \ + _##NAME(always_available, glsl_type::int_type, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec2_type, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec3_type, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec4_type, glsl_type::int_type), \ + \ + _##NAME(always_available, glsl_type::ivec2_type, glsl_type::ivec2_type), \ + _##NAME(always_available, glsl_type::ivec3_type, glsl_type::ivec3_type), \ + _##NAME(always_available, glsl_type::ivec4_type, glsl_type::ivec4_type), \ + \ + _##NAME(v130, glsl_type::uint_type, glsl_type::uint_type), \ + _##NAME(v130, glsl_type::uvec2_type, glsl_type::uint_type), \ + _##NAME(v130, glsl_type::uvec3_type, glsl_type::uint_type), \ + _##NAME(v130, glsl_type::uvec4_type, glsl_type::uint_type), \ + \ + _##NAME(v130, glsl_type::uvec2_type, glsl_type::uvec2_type), \ + _##NAME(v130, glsl_type::uvec3_type, glsl_type::uvec3_type), \ + _##NAME(v130, glsl_type::uvec4_type, glsl_type::uvec4_type), \ + \ + _##NAME(fp64, glsl_type::double_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec3_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec4_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), \ + NULL); + + F(radians) + F(degrees) + F(sin) + F(cos) + F(tan) + F(asin) + F(acos) + + add_function("atan", + _atan(glsl_type::float_type), + _atan(glsl_type::vec2_type), + _atan(glsl_type::vec3_type), + _atan(glsl_type::vec4_type), + _atan2(glsl_type::float_type), + _atan2(glsl_type::vec2_type), + _atan2(glsl_type::vec3_type), + _atan2(glsl_type::vec4_type), + NULL); + + F(sinh) + F(cosh) + F(tanh) + F(asinh) + F(acosh) + F(atanh) + F(pow) + F(exp) + F(log) + F(exp2) + F(log2) + FD(sqrt) + FD(inversesqrt) + FID(abs) + FID(sign) + FD(floor) + FD(trunc) + FD(round) + FD(roundEven) + FD(ceil) + FD(fract) + + add_function("mod", + _mod(glsl_type::float_type, glsl_type::float_type), + _mod(glsl_type::vec2_type, glsl_type::float_type), + _mod(glsl_type::vec3_type, glsl_type::float_type), + _mod(glsl_type::vec4_type, glsl_type::float_type), + + _mod(glsl_type::vec2_type, glsl_type::vec2_type), + _mod(glsl_type::vec3_type, glsl_type::vec3_type), + _mod(glsl_type::vec4_type, glsl_type::vec4_type), + + _mod(glsl_type::double_type, glsl_type::double_type), + _mod(glsl_type::dvec2_type, glsl_type::double_type), + _mod(glsl_type::dvec3_type, glsl_type::double_type), + _mod(glsl_type::dvec4_type, glsl_type::double_type), + + _mod(glsl_type::dvec2_type, glsl_type::dvec2_type), + _mod(glsl_type::dvec3_type, glsl_type::dvec3_type), + _mod(glsl_type::dvec4_type, glsl_type::dvec4_type), + NULL); + + FD(modf) + + FIUD2_MIXED(min) + FIUD2_MIXED(max) + FIUD2_MIXED(clamp) + + add_function("mix", + _mix_lrp(always_available, glsl_type::float_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec2_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec3_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec4_type, glsl_type::float_type), + + _mix_lrp(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _mix_lrp(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _mix_lrp(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + + _mix_lrp(fp64, glsl_type::double_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec2_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec3_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec4_type, glsl_type::double_type), + + _mix_lrp(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _mix_lrp(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _mix_lrp(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), + + _mix_sel(v130, glsl_type::float_type, glsl_type::bool_type), + _mix_sel(v130, glsl_type::vec2_type, glsl_type::bvec2_type), + _mix_sel(v130, glsl_type::vec3_type, glsl_type::bvec3_type), + _mix_sel(v130, glsl_type::vec4_type, glsl_type::bvec4_type), + + _mix_sel(fp64, glsl_type::double_type, glsl_type::bool_type), + _mix_sel(fp64, glsl_type::dvec2_type, glsl_type::bvec2_type), + _mix_sel(fp64, glsl_type::dvec3_type, glsl_type::bvec3_type), + _mix_sel(fp64, glsl_type::dvec4_type, glsl_type::bvec4_type), + + _mix_sel(shader_integer_mix, glsl_type::int_type, glsl_type::bool_type), + _mix_sel(shader_integer_mix, glsl_type::ivec2_type, glsl_type::bvec2_type), + _mix_sel(shader_integer_mix, glsl_type::ivec3_type, glsl_type::bvec3_type), + _mix_sel(shader_integer_mix, glsl_type::ivec4_type, glsl_type::bvec4_type), + + _mix_sel(shader_integer_mix, glsl_type::uint_type, glsl_type::bool_type), + _mix_sel(shader_integer_mix, glsl_type::uvec2_type, glsl_type::bvec2_type), + _mix_sel(shader_integer_mix, glsl_type::uvec3_type, glsl_type::bvec3_type), + _mix_sel(shader_integer_mix, glsl_type::uvec4_type, glsl_type::bvec4_type), + + _mix_sel(shader_integer_mix, glsl_type::bool_type, glsl_type::bool_type), + _mix_sel(shader_integer_mix, glsl_type::bvec2_type, glsl_type::bvec2_type), + _mix_sel(shader_integer_mix, glsl_type::bvec3_type, glsl_type::bvec3_type), + _mix_sel(shader_integer_mix, glsl_type::bvec4_type, glsl_type::bvec4_type), + NULL); + + add_function("step", + _step(always_available, glsl_type::float_type, glsl_type::float_type), + _step(always_available, glsl_type::float_type, glsl_type::vec2_type), + _step(always_available, glsl_type::float_type, glsl_type::vec3_type), + _step(always_available, glsl_type::float_type, glsl_type::vec4_type), + + _step(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _step(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _step(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + _step(fp64, glsl_type::double_type, glsl_type::double_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec2_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec3_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec4_type), + + _step(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _step(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _step(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), + NULL); + + add_function("smoothstep", + _smoothstep(always_available, glsl_type::float_type, glsl_type::float_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec2_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec3_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec4_type), + + _smoothstep(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _smoothstep(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _smoothstep(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::double_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec2_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec3_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec4_type), + + _smoothstep(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _smoothstep(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _smoothstep(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), + NULL); + + FD130(isnan) + FD130(isinf) + + F(floatBitsToInt) + F(floatBitsToUint) + add_function("intBitsToFloat", + _intBitsToFloat(glsl_type::int_type), + _intBitsToFloat(glsl_type::ivec2_type), + _intBitsToFloat(glsl_type::ivec3_type), + _intBitsToFloat(glsl_type::ivec4_type), + NULL); + add_function("uintBitsToFloat", + _uintBitsToFloat(glsl_type::uint_type), + _uintBitsToFloat(glsl_type::uvec2_type), + _uintBitsToFloat(glsl_type::uvec3_type), + _uintBitsToFloat(glsl_type::uvec4_type), + NULL); + + add_function("packUnorm2x16", _packUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); + add_function("packSnorm2x16", _packSnorm2x16(shader_packing_or_es3), NULL); + add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("packSnorm4x8", _packSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("unpackUnorm2x16", _unpackUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); + add_function("unpackSnorm2x16", _unpackSnorm2x16(shader_packing_or_es3), NULL); + add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("packHalf2x16", _packHalf2x16(shader_packing_or_es3), NULL); + add_function("unpackHalf2x16", _unpackHalf2x16(shader_packing_or_es3), NULL); + add_function("packDouble2x32", _packDouble2x32(fp64), NULL); + add_function("unpackDouble2x32", _unpackDouble2x32(fp64), NULL); + + + FD(length) + FD(distance) + FD(dot) + + add_function("cross", _cross(always_available, glsl_type::vec3_type), + _cross(fp64, glsl_type::dvec3_type), NULL); + + FD(normalize) + add_function("ftransform", _ftransform(), NULL); + FD(faceforward) + FD(reflect) + FD(refract) + // ... + add_function("matrixCompMult", + _matrixCompMult(always_available, glsl_type::mat2_type), + _matrixCompMult(always_available, glsl_type::mat3_type), + _matrixCompMult(always_available, glsl_type::mat4_type), + _matrixCompMult(always_available, glsl_type::mat2x3_type), + _matrixCompMult(always_available, glsl_type::mat2x4_type), + _matrixCompMult(always_available, glsl_type::mat3x2_type), + _matrixCompMult(always_available, glsl_type::mat3x4_type), + _matrixCompMult(always_available, glsl_type::mat4x2_type), + _matrixCompMult(always_available, glsl_type::mat4x3_type), + _matrixCompMult(fp64, glsl_type::dmat2_type), + _matrixCompMult(fp64, glsl_type::dmat3_type), + _matrixCompMult(fp64, glsl_type::dmat4_type), + _matrixCompMult(fp64, glsl_type::dmat2x3_type), + _matrixCompMult(fp64, glsl_type::dmat2x4_type), + _matrixCompMult(fp64, glsl_type::dmat3x2_type), + _matrixCompMult(fp64, glsl_type::dmat3x4_type), + _matrixCompMult(fp64, glsl_type::dmat4x2_type), + _matrixCompMult(fp64, glsl_type::dmat4x3_type), + NULL); + add_function("outerProduct", + _outerProduct(v120, glsl_type::mat2_type), + _outerProduct(v120, glsl_type::mat3_type), + _outerProduct(v120, glsl_type::mat4_type), + _outerProduct(v120, glsl_type::mat2x3_type), + _outerProduct(v120, glsl_type::mat2x4_type), + _outerProduct(v120, glsl_type::mat3x2_type), + _outerProduct(v120, glsl_type::mat3x4_type), + _outerProduct(v120, glsl_type::mat4x2_type), + _outerProduct(v120, glsl_type::mat4x3_type), + _outerProduct(fp64, glsl_type::dmat2_type), + _outerProduct(fp64, glsl_type::dmat3_type), + _outerProduct(fp64, glsl_type::dmat4_type), + _outerProduct(fp64, glsl_type::dmat2x3_type), + _outerProduct(fp64, glsl_type::dmat2x4_type), + _outerProduct(fp64, glsl_type::dmat3x2_type), + _outerProduct(fp64, glsl_type::dmat3x4_type), + _outerProduct(fp64, glsl_type::dmat4x2_type), + _outerProduct(fp64, glsl_type::dmat4x3_type), + NULL); + add_function("determinant", + _determinant_mat2(v120, glsl_type::mat2_type), + _determinant_mat3(v120, glsl_type::mat3_type), + _determinant_mat4(v120, glsl_type::mat4_type), + _determinant_mat2(fp64, glsl_type::dmat2_type), + _determinant_mat3(fp64, glsl_type::dmat3_type), + _determinant_mat4(fp64, glsl_type::dmat4_type), + + NULL); + add_function("inverse", + _inverse_mat2(v140_or_es3, glsl_type::mat2_type), + _inverse_mat3(v140_or_es3, glsl_type::mat3_type), + _inverse_mat4(v140_or_es3, glsl_type::mat4_type), + _inverse_mat2(fp64, glsl_type::dmat2_type), + _inverse_mat3(fp64, glsl_type::dmat3_type), + _inverse_mat4(fp64, glsl_type::dmat4_type), + NULL); + add_function("transpose", + _transpose(v120, glsl_type::mat2_type), + _transpose(v120, glsl_type::mat3_type), + _transpose(v120, glsl_type::mat4_type), + _transpose(v120, glsl_type::mat2x3_type), + _transpose(v120, glsl_type::mat2x4_type), + _transpose(v120, glsl_type::mat3x2_type), + _transpose(v120, glsl_type::mat3x4_type), + _transpose(v120, glsl_type::mat4x2_type), + _transpose(v120, glsl_type::mat4x3_type), + _transpose(fp64, glsl_type::dmat2_type), + _transpose(fp64, glsl_type::dmat3_type), + _transpose(fp64, glsl_type::dmat4_type), + _transpose(fp64, glsl_type::dmat2x3_type), + _transpose(fp64, glsl_type::dmat2x4_type), + _transpose(fp64, glsl_type::dmat3x2_type), + _transpose(fp64, glsl_type::dmat3x4_type), + _transpose(fp64, glsl_type::dmat4x2_type), + _transpose(fp64, glsl_type::dmat4x3_type), + NULL); + FIUD(lessThan) + FIUD(lessThanEqual) + FIUD(greaterThan) + FIUD(greaterThanEqual) + FIUBD(notEqual) + FIUBD(equal) + + add_function("any", + _any(glsl_type::bvec2_type), + _any(glsl_type::bvec3_type), + _any(glsl_type::bvec4_type), + NULL); + + add_function("all", + _all(glsl_type::bvec2_type), + _all(glsl_type::bvec3_type), + _all(glsl_type::bvec4_type), + NULL); + + add_function("not", + _not(glsl_type::bvec2_type), + _not(glsl_type::bvec3_type), + _not(glsl_type::bvec4_type), + NULL); + + add_function("textureSize", + _textureSize(v130, glsl_type::int_type, glsl_type::sampler1D_type), + _textureSize(v130, glsl_type::int_type, glsl_type::isampler1D_type), + _textureSize(v130, glsl_type::int_type, glsl_type::usampler1D_type), + + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2D_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::isampler2D_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::usampler2D_type), + + _textureSize(v130, glsl_type::ivec3_type, glsl_type::sampler3D_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::isampler3D_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::usampler3D_type), + + _textureSize(v130, glsl_type::ivec2_type, glsl_type::samplerCube_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::isamplerCube_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::usamplerCube_type), + + _textureSize(v130, glsl_type::int_type, glsl_type::sampler1DShadow_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2DShadow_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::samplerCubeShadow_type), + + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler1DArray_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::isampler1DArray_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::usampler1DArray_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::sampler2DArray_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::isampler2DArray_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::usampler2DArray_type), + + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler1DArrayShadow_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::sampler2DArrayShadow_type), + + _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::samplerCubeArray_type), + _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::isamplerCubeArray_type), + _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::usamplerCubeArray_type), + _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::samplerCubeArrayShadow_type), + + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2DRect_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::isampler2DRect_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::usampler2DRect_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2DRectShadow_type), + + _textureSize(v140, glsl_type::int_type, glsl_type::samplerBuffer_type), + _textureSize(v140, glsl_type::int_type, glsl_type::isamplerBuffer_type), + _textureSize(v140, glsl_type::int_type, glsl_type::usamplerBuffer_type), + _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::sampler2DMS_type), + _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::isampler2DMS_type), + _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::usampler2DMS_type), + + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::sampler2DMSArray_type), + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::isampler2DMSArray_type), + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type), + NULL); + + add_function("textureSamples", + _textureSamples(glsl_type::sampler2DMS_type), + _textureSamples(glsl_type::isampler2DMS_type), + _textureSamples(glsl_type::usampler2DMS_type), + + _textureSamples(glsl_type::sampler2DMSArray_type), + _textureSamples(glsl_type::isampler2DMSArray_type), + _textureSamples(glsl_type::usampler2DMSArray_type), + NULL); + + add_function("texture", + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::samplerCubeShadow_type, glsl_type::vec4_type), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_tex, texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_tex, texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_tex, texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), + /* samplerCubeArrayShadow is special; it has an extra parameter + * for the shadow comparitor since there is no vec5 type. + */ + _textureCubeArrayShadow(), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::samplerCubeShadow_type, glsl_type::vec4_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_txb, fs_texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txb, fs_texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txb, fs_texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("textureLod", + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_txl, texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txl, texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txl, texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("textureOffset", + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + NULL); + + add_function("textureProj", + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texelFetch", + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::int_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::ivec2_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::ivec3_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::ivec2_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::ivec2_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::ivec3_type), + + _texelFetch(v140, glsl_type::vec4_type, glsl_type::samplerBuffer_type, glsl_type::int_type), + _texelFetch(v140, glsl_type::ivec4_type, glsl_type::isamplerBuffer_type, glsl_type::int_type), + _texelFetch(v140, glsl_type::uvec4_type, glsl_type::usamplerBuffer_type, glsl_type::int_type), + + _texelFetch(texture_multisample, glsl_type::vec4_type, glsl_type::sampler2DMS_type, glsl_type::ivec2_type), + _texelFetch(texture_multisample, glsl_type::ivec4_type, glsl_type::isampler2DMS_type, glsl_type::ivec2_type), + _texelFetch(texture_multisample, glsl_type::uvec4_type, glsl_type::usampler2DMS_type, glsl_type::ivec2_type), + + _texelFetch(texture_multisample_array, glsl_type::vec4_type, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), + _texelFetch(texture_multisample_array, glsl_type::ivec4_type, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), + _texelFetch(texture_multisample_array, glsl_type::uvec4_type, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), + NULL); + + add_function("texelFetchOffset", + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::int_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::int_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::int_type, glsl_type::int_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::ivec3_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::ivec3_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::ivec3_type, glsl_type::ivec3_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::ivec2_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::ivec2_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::ivec2_type, glsl_type::int_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::ivec3_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::ivec3_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::ivec3_type, glsl_type::ivec2_type), + + NULL); + + add_function("textureProjOffset", + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + NULL); + + add_function("textureLodOffset", + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + NULL); + + add_function("textureProjLod", + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("textureProjLodOffset", + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + NULL); + + add_function("textureGrad", + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::samplerCubeShadow_type, glsl_type::vec4_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_txd, texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txd, texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txd, texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), + NULL); + + add_function("textureGradOffset", + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type, TEX_OFFSET), + NULL); + + add_function("textureProjGrad", + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("textureProjGradOffset", + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + NULL); + + add_function("EmitVertex", _EmitVertex(), NULL); + add_function("EndPrimitive", _EndPrimitive(), NULL); + add_function("EmitStreamVertex", + _EmitStreamVertex(gs_streams, glsl_type::uint_type), + _EmitStreamVertex(gs_streams, glsl_type::int_type), + NULL); + add_function("EndStreamPrimitive", + _EndStreamPrimitive(gs_streams, glsl_type::uint_type), + _EndStreamPrimitive(gs_streams, glsl_type::int_type), + NULL); + add_function("barrier", _barrier(), NULL); + + add_function("textureQueryLOD", + _textureQueryLod(texture_query_lod, glsl_type::sampler1D_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler1D_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler1D_type, glsl_type::float_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler2D_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler2D_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler3D_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler3D_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::samplerCube_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler1DArray_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler1DArray_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler1DArray_type, glsl_type::float_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler2DArray_type, glsl_type::vec2_type), + + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler1DShadow_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("textureQueryLod", + _textureQueryLod(v400_fs_only, glsl_type::sampler1D_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler1D_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler1D_type, glsl_type::float_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler2D_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler2D_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler3D_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler3D_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::samplerCube_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler1DArray_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler1DArray_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler1DArray_type, glsl_type::float_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler2DArray_type, glsl_type::vec2_type), + + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler1DShadow_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("textureQueryLevels", + _textureQueryLevels(glsl_type::sampler1D_type), + _textureQueryLevels(glsl_type::sampler2D_type), + _textureQueryLevels(glsl_type::sampler3D_type), + _textureQueryLevels(glsl_type::samplerCube_type), + _textureQueryLevels(glsl_type::sampler1DArray_type), + _textureQueryLevels(glsl_type::sampler2DArray_type), + _textureQueryLevels(glsl_type::samplerCubeArray_type), + _textureQueryLevels(glsl_type::sampler1DShadow_type), + _textureQueryLevels(glsl_type::sampler2DShadow_type), + _textureQueryLevels(glsl_type::samplerCubeShadow_type), + _textureQueryLevels(glsl_type::sampler1DArrayShadow_type), + _textureQueryLevels(glsl_type::sampler2DArrayShadow_type), + _textureQueryLevels(glsl_type::samplerCubeArrayShadow_type), + + _textureQueryLevels(glsl_type::isampler1D_type), + _textureQueryLevels(glsl_type::isampler2D_type), + _textureQueryLevels(glsl_type::isampler3D_type), + _textureQueryLevels(glsl_type::isamplerCube_type), + _textureQueryLevels(glsl_type::isampler1DArray_type), + _textureQueryLevels(glsl_type::isampler2DArray_type), + _textureQueryLevels(glsl_type::isamplerCubeArray_type), + + _textureQueryLevels(glsl_type::usampler1D_type), + _textureQueryLevels(glsl_type::usampler2D_type), + _textureQueryLevels(glsl_type::usampler3D_type), + _textureQueryLevels(glsl_type::usamplerCube_type), + _textureQueryLevels(glsl_type::usampler1DArray_type), + _textureQueryLevels(glsl_type::usampler2DArray_type), + _textureQueryLevels(glsl_type::usamplerCubeArray_type), + + NULL); + + add_function("textureSamplesIdenticalEXT", + _textureSamplesIdentical(texture_samples_identical, glsl_type::sampler2DMS_type, glsl_type::ivec2_type), + _textureSamplesIdentical(texture_samples_identical, glsl_type::isampler2DMS_type, glsl_type::ivec2_type), + _textureSamplesIdentical(texture_samples_identical, glsl_type::usampler2DMS_type, glsl_type::ivec2_type), + + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), + NULL); + + add_function("texture1D", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + NULL); + + add_function("texture1DArray", + _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + NULL); + + add_function("texture1DProj", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture1DLod", + _texture(ir_txl, tex1d_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + NULL); + + add_function("texture1DArrayLod", + _texture(ir_txl, texture_array_lod, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + NULL); + + add_function("texture1DProjLod", + _texture(ir_txl, tex1d_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txl, tex1d_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture2D", + _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_tex, texture_external, glsl_type::vec4_type, glsl_type::samplerExternalOES_type, glsl_type::vec2_type), + NULL); + + add_function("texture2DArray", + _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + NULL); + + add_function("texture2DProj", + _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, texture_external, glsl_type::vec4_type, glsl_type::samplerExternalOES_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, texture_external, glsl_type::vec4_type, glsl_type::samplerExternalOES_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture2DLod", + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + NULL); + + add_function("texture2DArrayLod", + _texture(ir_txl, texture_array_lod, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + NULL); + + add_function("texture2DProjLod", + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture3D", + _texture(ir_tex, tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + _texture(ir_txb, fs_tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + NULL); + + add_function("texture3DProj", + _texture(ir_tex, tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, fs_tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture3DLod", + _texture(ir_txl, tex3d_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + NULL); + + add_function("texture3DProjLod", + _texture(ir_txl, tex3d_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("textureCube", + _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + NULL); + + add_function("textureCubeLod", + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + NULL); + + add_function("texture2DRect", + _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + NULL); + + add_function("texture2DRectProj", + _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow1D", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow1DArray", + _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2D", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2DArray", + _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), + _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), + NULL); + + add_function("shadow1DProj", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow2DProj", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow1DLod", + _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2DLod", + _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow1DArrayLod", + _texture(ir_txl, texture_array_lod, glsl_type::vec4_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow1DProjLod", + _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow2DProjLod", + _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow2DRect", + _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2DRectProj", + _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture1DGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + NULL); + + add_function("texture1DProjGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture2DGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + NULL); + + add_function("texture2DProjGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture3DGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + NULL); + + add_function("texture3DProjGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("textureCubeGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + NULL); + + add_function("shadow1DGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow1DProjGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow2DGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2DProjGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture2DRectGradARB", + _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + NULL); + + add_function("texture2DRectProjGradARB", + _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow2DRectGradARB", + _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2DRectProjGradARB", + _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("textureGather", + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), + + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec4_type), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type), + NULL); + + add_function("textureGatherOffset", + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + NULL); + + add_function("textureGatherOffsets", + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + NULL); + + F(dFdx) + F(dFdy) + F(fwidth) + F(dFdxCoarse) + F(dFdyCoarse) + F(fwidthCoarse) + F(dFdxFine) + F(dFdyFine) + F(fwidthFine) + F(noise1) + F(noise2) + F(noise3) + F(noise4) + + IU(bitfieldExtract) + IU(bitfieldInsert) + IU(bitfieldReverse) + IU(bitCount) + IU(findLSB) + IU(findMSB) + FDGS5(fma) + + add_function("ldexp", + _ldexp(glsl_type::float_type, glsl_type::int_type), + _ldexp(glsl_type::vec2_type, glsl_type::ivec2_type), + _ldexp(glsl_type::vec3_type, glsl_type::ivec3_type), + _ldexp(glsl_type::vec4_type, glsl_type::ivec4_type), + _ldexp(glsl_type::double_type, glsl_type::int_type), + _ldexp(glsl_type::dvec2_type, glsl_type::ivec2_type), + _ldexp(glsl_type::dvec3_type, glsl_type::ivec3_type), + _ldexp(glsl_type::dvec4_type, glsl_type::ivec4_type), + NULL); + + add_function("frexp", + _frexp(glsl_type::float_type, glsl_type::int_type), + _frexp(glsl_type::vec2_type, glsl_type::ivec2_type), + _frexp(glsl_type::vec3_type, glsl_type::ivec3_type), + _frexp(glsl_type::vec4_type, glsl_type::ivec4_type), + _dfrexp(glsl_type::double_type, glsl_type::int_type), + _dfrexp(glsl_type::dvec2_type, glsl_type::ivec2_type), + _dfrexp(glsl_type::dvec3_type, glsl_type::ivec3_type), + _dfrexp(glsl_type::dvec4_type, glsl_type::ivec4_type), + NULL); + add_function("uaddCarry", + _uaddCarry(glsl_type::uint_type), + _uaddCarry(glsl_type::uvec2_type), + _uaddCarry(glsl_type::uvec3_type), + _uaddCarry(glsl_type::uvec4_type), + NULL); + add_function("usubBorrow", + _usubBorrow(glsl_type::uint_type), + _usubBorrow(glsl_type::uvec2_type), + _usubBorrow(glsl_type::uvec3_type), + _usubBorrow(glsl_type::uvec4_type), + NULL); + add_function("imulExtended", + _mulExtended(glsl_type::int_type), + _mulExtended(glsl_type::ivec2_type), + _mulExtended(glsl_type::ivec3_type), + _mulExtended(glsl_type::ivec4_type), + NULL); + add_function("umulExtended", + _mulExtended(glsl_type::uint_type), + _mulExtended(glsl_type::uvec2_type), + _mulExtended(glsl_type::uvec3_type), + _mulExtended(glsl_type::uvec4_type), + NULL); + add_function("interpolateAtCentroid", + _interpolateAtCentroid(glsl_type::float_type), + _interpolateAtCentroid(glsl_type::vec2_type), + _interpolateAtCentroid(glsl_type::vec3_type), + _interpolateAtCentroid(glsl_type::vec4_type), + NULL); + add_function("interpolateAtOffset", + _interpolateAtOffset(glsl_type::float_type), + _interpolateAtOffset(glsl_type::vec2_type), + _interpolateAtOffset(glsl_type::vec3_type), + _interpolateAtOffset(glsl_type::vec4_type), + NULL); + add_function("interpolateAtSample", + _interpolateAtSample(glsl_type::float_type), + _interpolateAtSample(glsl_type::vec2_type), + _interpolateAtSample(glsl_type::vec3_type), + _interpolateAtSample(glsl_type::vec4_type), + NULL); + + add_function("atomicCounter", + _atomic_counter_op("__intrinsic_atomic_read", + shader_atomic_counters), + NULL); + add_function("atomicCounterIncrement", + _atomic_counter_op("__intrinsic_atomic_increment", + shader_atomic_counters), + NULL); + add_function("atomicCounterDecrement", + _atomic_counter_op("__intrinsic_atomic_predecrement", + shader_atomic_counters), + NULL); + + add_function("atomicAdd", + _atomic_op2("__intrinsic_atomic_add", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_add", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicMin", + _atomic_op2("__intrinsic_atomic_min", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_min", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicMax", + _atomic_op2("__intrinsic_atomic_max", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_max", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicAnd", + _atomic_op2("__intrinsic_atomic_and", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_and", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicOr", + _atomic_op2("__intrinsic_atomic_or", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_or", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicXor", + _atomic_op2("__intrinsic_atomic_xor", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_xor", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicExchange", + _atomic_op2("__intrinsic_atomic_exchange", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_exchange", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicCompSwap", + _atomic_op3("__intrinsic_atomic_comp_swap", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op3("__intrinsic_atomic_comp_swap", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + + add_function("min3", + _min3(glsl_type::float_type), + _min3(glsl_type::vec2_type), + _min3(glsl_type::vec3_type), + _min3(glsl_type::vec4_type), + + _min3(glsl_type::int_type), + _min3(glsl_type::ivec2_type), + _min3(glsl_type::ivec3_type), + _min3(glsl_type::ivec4_type), + + _min3(glsl_type::uint_type), + _min3(glsl_type::uvec2_type), + _min3(glsl_type::uvec3_type), + _min3(glsl_type::uvec4_type), + NULL); + + add_function("max3", + _max3(glsl_type::float_type), + _max3(glsl_type::vec2_type), + _max3(glsl_type::vec3_type), + _max3(glsl_type::vec4_type), + + _max3(glsl_type::int_type), + _max3(glsl_type::ivec2_type), + _max3(glsl_type::ivec3_type), + _max3(glsl_type::ivec4_type), + + _max3(glsl_type::uint_type), + _max3(glsl_type::uvec2_type), + _max3(glsl_type::uvec3_type), + _max3(glsl_type::uvec4_type), + NULL); + + add_function("mid3", + _mid3(glsl_type::float_type), + _mid3(glsl_type::vec2_type), + _mid3(glsl_type::vec3_type), + _mid3(glsl_type::vec4_type), + + _mid3(glsl_type::int_type), + _mid3(glsl_type::ivec2_type), + _mid3(glsl_type::ivec3_type), + _mid3(glsl_type::ivec4_type), + + _mid3(glsl_type::uint_type), + _mid3(glsl_type::uvec2_type), + _mid3(glsl_type::uvec3_type), + _mid3(glsl_type::uvec4_type), + NULL); + + add_image_functions(true); + + add_function("memoryBarrier", + _memory_barrier("__intrinsic_memory_barrier", + shader_image_load_store), + NULL); + add_function("groupMemoryBarrier", + _memory_barrier("__intrinsic_group_memory_barrier", + compute_shader), + NULL); + add_function("memoryBarrierAtomicCounter", + _memory_barrier("__intrinsic_memory_barrier_atomic_counter", + compute_shader), + NULL); + add_function("memoryBarrierBuffer", + _memory_barrier("__intrinsic_memory_barrier_buffer", + compute_shader), + NULL); + add_function("memoryBarrierImage", + _memory_barrier("__intrinsic_memory_barrier_image", + compute_shader), + NULL); + add_function("memoryBarrierShared", + _memory_barrier("__intrinsic_memory_barrier_shared", + compute_shader), + NULL); + + add_function("clock2x32ARB", + _shader_clock(shader_clock, + glsl_type::uvec2_type), + NULL); + +#undef F +#undef FI +#undef FIUD +#undef FIUBD +#undef FIU2_MIXED +} + +void +builtin_builder::add_function(const char *name, ...) +{ + va_list ap; + + ir_function *f = new(mem_ctx) ir_function(name); + + va_start(ap, name); + while (true) { + ir_function_signature *sig = va_arg(ap, ir_function_signature *); + if (sig == NULL) + break; + + if (false) { + exec_list stuff; + stuff.push_tail(sig); + validate_ir_tree(&stuff); + } + + f->add_signature(sig); + } + va_end(ap); + + shader->symbols->add_function(f); +} + +void +builtin_builder::add_image_function(const char *name, + const char *intrinsic_name, + image_prototype_ctr prototype, + unsigned num_arguments, + unsigned flags) +{ + static const glsl_type *const types[] = { + glsl_type::image1D_type, + glsl_type::image2D_type, + glsl_type::image3D_type, + glsl_type::image2DRect_type, + glsl_type::imageCube_type, + glsl_type::imageBuffer_type, + glsl_type::image1DArray_type, + glsl_type::image2DArray_type, + glsl_type::imageCubeArray_type, + glsl_type::image2DMS_type, + glsl_type::image2DMSArray_type, + glsl_type::iimage1D_type, + glsl_type::iimage2D_type, + glsl_type::iimage3D_type, + glsl_type::iimage2DRect_type, + glsl_type::iimageCube_type, + glsl_type::iimageBuffer_type, + glsl_type::iimage1DArray_type, + glsl_type::iimage2DArray_type, + glsl_type::iimageCubeArray_type, + glsl_type::iimage2DMS_type, + glsl_type::iimage2DMSArray_type, + glsl_type::uimage1D_type, + glsl_type::uimage2D_type, + glsl_type::uimage3D_type, + glsl_type::uimage2DRect_type, + glsl_type::uimageCube_type, + glsl_type::uimageBuffer_type, + glsl_type::uimage1DArray_type, + glsl_type::uimage2DArray_type, + glsl_type::uimageCubeArray_type, + glsl_type::uimage2DMS_type, + glsl_type::uimage2DMSArray_type + }; + + ir_function *f = new(mem_ctx) ir_function(name); + + for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) { + if ((types[i]->sampler_type != GLSL_TYPE_FLOAT || + (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) && + (types[i]->sampler_dimensionality == GLSL_SAMPLER_DIM_MS || + !(flags & IMAGE_FUNCTION_MS_ONLY))) + f->add_signature(_image(prototype, types[i], intrinsic_name, + num_arguments, flags)); + } + + shader->symbols->add_function(f); +} + +void +builtin_builder::add_image_functions(bool glsl) +{ + const unsigned flags = (glsl ? IMAGE_FUNCTION_EMIT_STUB : 0); + + add_image_function(glsl ? "imageLoad" : "__intrinsic_image_load", + "__intrinsic_image_load", + &builtin_builder::_image_prototype, 0, + (flags | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | + IMAGE_FUNCTION_READ_ONLY)); + + add_image_function(glsl ? "imageStore" : "__intrinsic_image_store", + "__intrinsic_image_store", + &builtin_builder::_image_prototype, 1, + (flags | IMAGE_FUNCTION_RETURNS_VOID | + IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | + IMAGE_FUNCTION_WRITE_ONLY)); + + const unsigned atom_flags = flags | IMAGE_FUNCTION_AVAIL_ATOMIC; + + add_image_function(glsl ? "imageAtomicAdd" : "__intrinsic_image_atomic_add", + "__intrinsic_image_atomic_add", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function(glsl ? "imageAtomicMin" : "__intrinsic_image_atomic_min", + "__intrinsic_image_atomic_min", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function(glsl ? "imageAtomicMax" : "__intrinsic_image_atomic_max", + "__intrinsic_image_atomic_max", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function(glsl ? "imageAtomicAnd" : "__intrinsic_image_atomic_and", + "__intrinsic_image_atomic_and", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function(glsl ? "imageAtomicOr" : "__intrinsic_image_atomic_or", + "__intrinsic_image_atomic_or", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function(glsl ? "imageAtomicXor" : "__intrinsic_image_atomic_xor", + "__intrinsic_image_atomic_xor", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function((glsl ? "imageAtomicExchange" : + "__intrinsic_image_atomic_exchange"), + "__intrinsic_image_atomic_exchange", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function((glsl ? "imageAtomicCompSwap" : + "__intrinsic_image_atomic_comp_swap"), + "__intrinsic_image_atomic_comp_swap", + &builtin_builder::_image_prototype, 2, atom_flags); + + add_image_function(glsl ? "imageSize" : "__intrinsic_image_size", + "__intrinsic_image_size", + &builtin_builder::_image_size_prototype, 1, + flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE); + + add_image_function(glsl ? "imageSamples" : "__intrinsic_image_samples", + "__intrinsic_image_samples", + &builtin_builder::_image_samples_prototype, 1, + flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | + IMAGE_FUNCTION_MS_ONLY); +} + +ir_variable * +builtin_builder::in_var(const glsl_type *type, const char *name) +{ + return new(mem_ctx) ir_variable(type, name, ir_var_function_in); +} + +ir_variable * +builtin_builder::out_var(const glsl_type *type, const char *name) +{ + return new(mem_ctx) ir_variable(type, name, ir_var_function_out); +} + +ir_constant * +builtin_builder::imm(bool b, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(b, vector_elements); +} + +ir_constant * +builtin_builder::imm(float f, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(f, vector_elements); +} + +ir_constant * +builtin_builder::imm(int i, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(i, vector_elements); +} + +ir_constant * +builtin_builder::imm(unsigned u, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(u, vector_elements); +} + +ir_constant * +builtin_builder::imm(double d, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(d, vector_elements); +} + +ir_constant * +builtin_builder::imm(const glsl_type *type, const ir_constant_data &data) +{ + return new(mem_ctx) ir_constant(type, &data); +} + +#define IMM_FP(type, val) (type->base_type == GLSL_TYPE_DOUBLE) ? imm(val) : imm((float)val) + +ir_dereference_variable * +builtin_builder::var_ref(ir_variable *var) +{ + return new(mem_ctx) ir_dereference_variable(var); +} + +ir_dereference_array * +builtin_builder::array_ref(ir_variable *var, int idx) +{ + return new(mem_ctx) ir_dereference_array(var, imm(idx)); +} + +/** Return an element of a matrix */ +ir_swizzle * +builtin_builder::matrix_elt(ir_variable *var, int column, int row) +{ + return swizzle(array_ref(var, column), row, 1); +} + +/** + * Implementations of built-in functions: + * @{ + */ +ir_function_signature * +builtin_builder::new_sig(const glsl_type *return_type, + builtin_available_predicate avail, + int num_params, + ...) +{ + va_list ap; + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(return_type, avail); + + exec_list plist; + va_start(ap, num_params); + for (int i = 0; i < num_params; i++) { + plist.push_tail(va_arg(ap, ir_variable *)); + } + va_end(ap); + + sig->replace_parameters(&plist); + return sig; +} + +#define MAKE_SIG(return_type, avail, ...) \ + ir_function_signature *sig = \ + new_sig(return_type, avail, __VA_ARGS__); \ + ir_factory body(&sig->body, mem_ctx); \ + sig->is_defined = true; + +#define MAKE_INTRINSIC(return_type, avail, ...) \ + ir_function_signature *sig = \ + new_sig(return_type, avail, __VA_ARGS__); \ + sig->is_intrinsic = true; + +ir_function_signature * +builtin_builder::unop(builtin_available_predicate avail, + ir_expression_operation opcode, + const glsl_type *return_type, + const glsl_type *param_type) +{ + ir_variable *x = in_var(param_type, "x"); + MAKE_SIG(return_type, avail, 1, x); + body.emit(ret(expr(opcode, x))); + return sig; +} + +#define UNOP(NAME, OPCODE, AVAIL) \ +ir_function_signature * \ +builtin_builder::_##NAME(const glsl_type *type) \ +{ \ + return unop(&AVAIL, OPCODE, type, type); \ +} + +#define UNOPA(NAME, OPCODE) \ +ir_function_signature * \ +builtin_builder::_##NAME(builtin_available_predicate avail, const glsl_type *type) \ +{ \ + return unop(avail, OPCODE, type, type); \ +} + +ir_function_signature * +builtin_builder::binop(builtin_available_predicate avail, + ir_expression_operation opcode, + const glsl_type *return_type, + const glsl_type *param0_type, + const glsl_type *param1_type) +{ + ir_variable *x = in_var(param0_type, "x"); + ir_variable *y = in_var(param1_type, "y"); + MAKE_SIG(return_type, avail, 2, x, y); + body.emit(ret(expr(opcode, x, y))); + return sig; +} + +#define BINOP(NAME, OPCODE, AVAIL) \ +ir_function_signature * \ +builtin_builder::_##NAME(const glsl_type *return_type, \ + const glsl_type *param0_type, \ + const glsl_type *param1_type) \ +{ \ + return binop(&AVAIL, OPCODE, return_type, param0_type, param1_type); \ +} + +/** + * Angle and Trigonometry Functions @{ + */ + +ir_function_signature * +builtin_builder::_radians(const glsl_type *type) +{ + ir_variable *degrees = in_var(type, "degrees"); + MAKE_SIG(type, always_available, 1, degrees); + body.emit(ret(mul(degrees, imm(0.0174532925f)))); + return sig; +} + +ir_function_signature * +builtin_builder::_degrees(const glsl_type *type) +{ + ir_variable *radians = in_var(type, "radians"); + MAKE_SIG(type, always_available, 1, radians); + body.emit(ret(mul(radians, imm(57.29578f)))); + return sig; +} + +UNOP(sin, ir_unop_sin, always_available) +UNOP(cos, ir_unop_cos, always_available) + +ir_function_signature * +builtin_builder::_tan(const glsl_type *type) +{ + ir_variable *theta = in_var(type, "theta"); + MAKE_SIG(type, always_available, 1, theta); + body.emit(ret(div(sin(theta), cos(theta)))); + return sig; +} + +ir_expression * +builtin_builder::asin_expr(ir_variable *x) +{ + return mul(sign(x), + sub(imm(M_PI_2f), + mul(sqrt(sub(imm(1.0f), abs(x))), + add(imm(M_PI_2f), + mul(abs(x), + add(imm(M_PI_4f - 1.0f), + mul(abs(x), + add(imm(0.086566724f), + mul(abs(x), imm(-0.03102955f)))))))))); +} + +ir_call * +builtin_builder::call(ir_function *f, ir_variable *ret, exec_list params) +{ + exec_list actual_params; + + foreach_in_list(ir_variable, var, ¶ms) { + actual_params.push_tail(var_ref(var)); + } + + ir_function_signature *sig = + f->exact_matching_signature(NULL, &actual_params); + if (!sig) + return NULL; + + ir_dereference_variable *deref = + (sig->return_type->is_void() ? NULL : var_ref(ret)); + + return new(mem_ctx) ir_call(sig, deref, &actual_params); +} + +ir_function_signature * +builtin_builder::_asin(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, always_available, 1, x); + + body.emit(ret(asin_expr(x))); + + return sig; +} + +ir_function_signature * +builtin_builder::_acos(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, always_available, 1, x); + + body.emit(ret(sub(imm(M_PI_2f), asin_expr(x)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_atan2(const glsl_type *type) +{ + ir_variable *vec_y = in_var(type, "vec_y"); + ir_variable *vec_x = in_var(type, "vec_x"); + MAKE_SIG(type, always_available, 2, vec_y, vec_x); + + ir_variable *vec_result = body.make_temp(type, "vec_result"); + ir_variable *r = body.make_temp(glsl_type::float_type, "r"); + for (int i = 0; i < type->vector_elements; i++) { + ir_variable *y = body.make_temp(glsl_type::float_type, "y"); + ir_variable *x = body.make_temp(glsl_type::float_type, "x"); + body.emit(assign(y, swizzle(vec_y, i, 1))); + body.emit(assign(x, swizzle(vec_x, i, 1))); + + /* If |x| >= 1.0e-8 * |y|: */ + ir_if *outer_if = + new(mem_ctx) ir_if(greater(abs(x), mul(imm(1.0e-8f), abs(y)))); + + ir_factory outer_then(&outer_if->then_instructions, mem_ctx); + + /* Then...call atan(y/x) */ + do_atan(outer_then, glsl_type::float_type, r, div(y, x)); + + /* ...and fix it up: */ + ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f))); + inner_if->then_instructions.push_tail( + if_tree(gequal(y, imm(0.0f)), + assign(r, add(r, imm(M_PIf))), + assign(r, sub(r, imm(M_PIf))))); + outer_then.emit(inner_if); + + /* Else... */ + outer_if->else_instructions.push_tail( + assign(r, mul(sign(y), imm(M_PI_2f)))); + + body.emit(outer_if); + + body.emit(assign(vec_result, r, 1 << i)); + } + body.emit(ret(vec_result)); + + return sig; +} + +void +builtin_builder::do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x) +{ + /* + * range-reduction, first step: + * + * / y_over_x if |y_over_x| <= 1.0; + * x = < + * \ 1.0 / y_over_x otherwise + */ + ir_variable *x = body.make_temp(type, "atan_x"); + body.emit(assign(x, div(min2(abs(y_over_x), + imm(1.0f)), + max2(abs(y_over_x), + imm(1.0f))))); + + /* + * approximate atan by evaluating polynomial: + * + * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + + * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + + * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 + */ + ir_variable *tmp = body.make_temp(type, "atan_tmp"); + body.emit(assign(tmp, mul(x, x))); + body.emit(assign(tmp, mul(add(mul(sub(mul(add(mul(sub(mul(add(mul(imm(-0.0121323213173444f), + tmp), + imm(0.0536813784310406f)), + tmp), + imm(0.1173503194786851f)), + tmp), + imm(0.1938924977115610f)), + tmp), + imm(0.3326756418091246f)), + tmp), + imm(0.9999793128310355f)), + x))); + + /* range-reduction fixup */ + body.emit(assign(tmp, add(tmp, + mul(b2f(greater(abs(y_over_x), + imm(1.0f, type->components()))), + add(mul(tmp, + imm(-2.0f)), + imm(M_PI_2f)))))); + + /* sign fixup */ + body.emit(assign(res, mul(tmp, sign(y_over_x)))); +} + +ir_function_signature * +builtin_builder::_atan(const glsl_type *type) +{ + ir_variable *y_over_x = in_var(type, "y_over_x"); + MAKE_SIG(type, always_available, 1, y_over_x); + + ir_variable *tmp = body.make_temp(type, "tmp"); + do_atan(body, type, tmp, y_over_x); + body.emit(ret(tmp)); + + return sig; +} + +ir_function_signature * +builtin_builder::_sinh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + /* 0.5 * (e^x - e^(-x)) */ + body.emit(ret(mul(imm(0.5f), sub(exp(x), exp(neg(x)))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_cosh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + /* 0.5 * (e^x + e^(-x)) */ + body.emit(ret(mul(imm(0.5f), add(exp(x), exp(neg(x)))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_tanh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + /* (e^x - e^(-x)) / (e^x + e^(-x)) */ + body.emit(ret(div(sub(exp(x), exp(neg(x))), + add(exp(x), exp(neg(x)))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_asinh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + body.emit(ret(mul(sign(x), log(add(abs(x), sqrt(add(mul(x, x), + imm(1.0f)))))))); + return sig; +} + +ir_function_signature * +builtin_builder::_acosh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + body.emit(ret(log(add(x, sqrt(sub(mul(x, x), imm(1.0f))))))); + return sig; +} + +ir_function_signature * +builtin_builder::_atanh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + body.emit(ret(mul(imm(0.5f), log(div(add(imm(1.0f), x), + sub(imm(1.0f), x)))))); + return sig; +} +/** @} */ + +/** + * Exponential Functions @{ + */ + +ir_function_signature * +builtin_builder::_pow(const glsl_type *type) +{ + return binop(always_available, ir_binop_pow, type, type, type); +} + +UNOP(exp, ir_unop_exp, always_available) +UNOP(log, ir_unop_log, always_available) +UNOP(exp2, ir_unop_exp2, always_available) +UNOP(log2, ir_unop_log2, always_available) +UNOPA(sqrt, ir_unop_sqrt) +UNOPA(inversesqrt, ir_unop_rsq) + +/** @} */ + +UNOPA(abs, ir_unop_abs) +UNOPA(sign, ir_unop_sign) +UNOPA(floor, ir_unop_floor) +UNOPA(trunc, ir_unop_trunc) +UNOPA(round, ir_unop_round_even) +UNOPA(roundEven, ir_unop_round_even) +UNOPA(ceil, ir_unop_ceil) +UNOPA(fract, ir_unop_fract) + +ir_function_signature * +builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type) +{ + return binop(always_available, ir_binop_mod, x_type, x_type, y_type); +} + +ir_function_signature * +builtin_builder::_modf(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *i = out_var(type, "i"); + MAKE_SIG(type, avail, 2, x, i); + + ir_variable *t = body.make_temp(type, "t"); + body.emit(assign(t, expr(ir_unop_trunc, x))); + body.emit(assign(i, t)); + body.emit(ret(sub(x, t))); + + return sig; +} + +ir_function_signature * +builtin_builder::_min(builtin_available_predicate avail, + const glsl_type *x_type, const glsl_type *y_type) +{ + return binop(avail, ir_binop_min, x_type, x_type, y_type); +} + +ir_function_signature * +builtin_builder::_max(builtin_available_predicate avail, + const glsl_type *x_type, const glsl_type *y_type) +{ + return binop(avail, ir_binop_max, x_type, x_type, y_type); +} + +ir_function_signature * +builtin_builder::_clamp(builtin_available_predicate avail, + const glsl_type *val_type, const glsl_type *bound_type) +{ + ir_variable *x = in_var(val_type, "x"); + ir_variable *minVal = in_var(bound_type, "minVal"); + ir_variable *maxVal = in_var(bound_type, "maxVal"); + MAKE_SIG(val_type, avail, 3, x, minVal, maxVal); + + body.emit(ret(clamp(x, minVal, maxVal))); + + return sig; +} + +ir_function_signature * +builtin_builder::_mix_lrp(builtin_available_predicate avail, const glsl_type *val_type, const glsl_type *blend_type) +{ + ir_variable *x = in_var(val_type, "x"); + ir_variable *y = in_var(val_type, "y"); + ir_variable *a = in_var(blend_type, "a"); + MAKE_SIG(val_type, avail, 3, x, y, a); + + body.emit(ret(lrp(x, y, a))); + + return sig; +} + +ir_function_signature * +builtin_builder::_mix_sel(builtin_available_predicate avail, + const glsl_type *val_type, + const glsl_type *blend_type) +{ + ir_variable *x = in_var(val_type, "x"); + ir_variable *y = in_var(val_type, "y"); + ir_variable *a = in_var(blend_type, "a"); + MAKE_SIG(val_type, avail, 3, x, y, a); + + /* csel matches the ternary operator in that a selector of true choses the + * first argument. This differs from mix(x, y, false) which choses the + * second argument (to remain consistent with the interpolating version of + * mix() which takes a blend factor from 0.0 to 1.0 where 0.0 is only x. + * + * To handle the behavior mismatch, reverse the x and y arguments. + */ + body.emit(ret(csel(a, y, x))); + + return sig; +} + +ir_function_signature * +builtin_builder::_step(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type) +{ + ir_variable *edge = in_var(edge_type, "edge"); + ir_variable *x = in_var(x_type, "x"); + MAKE_SIG(x_type, avail, 2, edge, x); + + ir_variable *t = body.make_temp(x_type, "t"); + if (x_type->vector_elements == 1) { + /* Both are floats */ + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(x, edge))))); + else + body.emit(assign(t, b2f(gequal(x, edge)))); + } else if (edge_type->vector_elements == 1) { + /* x is a vector but edge is a float */ + for (int i = 0; i < x_type->vector_elements; i++) { + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), edge))), 1 << i)); + else + body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i)); + } + } else { + /* Both are vectors */ + for (int i = 0; i < x_type->vector_elements; i++) { + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1)))), + 1 << i)); + else + body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))), + 1 << i)); + + } + } + body.emit(ret(t)); + + return sig; +} + +ir_function_signature * +builtin_builder::_smoothstep(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type) +{ + ir_variable *edge0 = in_var(edge_type, "edge0"); + ir_variable *edge1 = in_var(edge_type, "edge1"); + ir_variable *x = in_var(x_type, "x"); + MAKE_SIG(x_type, avail, 3, edge0, edge1, x); + + /* From the GLSL 1.10 specification: + * + * genType t; + * t = clamp((x - edge0) / (edge1 - edge0), 0, 1); + * return t * t * (3 - 2 * t); + */ + + ir_variable *t = body.make_temp(x_type, "t"); + body.emit(assign(t, clamp(div(sub(x, edge0), sub(edge1, edge0)), + IMM_FP(x_type, 0.0), IMM_FP(x_type, 1.0)))); + + body.emit(ret(mul(t, mul(t, sub(IMM_FP(x_type, 3.0), mul(IMM_FP(x_type, 2.0), t)))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_isnan(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); + + body.emit(ret(nequal(x, x))); + + return sig; +} + +ir_function_signature * +builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); + + ir_constant_data infinities; + for (int i = 0; i < type->vector_elements; i++) { + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + infinities.f[i] = INFINITY; + break; + case GLSL_TYPE_DOUBLE: + infinities.d[i] = INFINITY; + break; + default: + unreachable("unknown type"); + } + } + + body.emit(ret(equal(abs(x), imm(type, infinities)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_floatBitsToInt(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::ivec(type->vector_elements), shader_bit_encoding, 1, x); + body.emit(ret(bitcast_f2i(x))); + return sig; +} + +ir_function_signature * +builtin_builder::_floatBitsToUint(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::uvec(type->vector_elements), shader_bit_encoding, 1, x); + body.emit(ret(bitcast_f2u(x))); + return sig; +} + +ir_function_signature * +builtin_builder::_intBitsToFloat(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::vec(type->vector_elements), shader_bit_encoding, 1, x); + body.emit(ret(bitcast_i2f(x))); + return sig; +} + +ir_function_signature * +builtin_builder::_uintBitsToFloat(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::vec(type->vector_elements), shader_bit_encoding, 1, x); + body.emit(ret(bitcast_u2f(x))); + return sig; +} + +ir_function_signature * +builtin_builder::_packUnorm2x16(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::vec2_type, "v"); + MAKE_SIG(glsl_type::uint_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_unorm_2x16, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_packSnorm2x16(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::vec2_type, "v"); + MAKE_SIG(glsl_type::uint_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_snorm_2x16, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_packUnorm4x8(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::vec4_type, "v"); + MAKE_SIG(glsl_type::uint_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_unorm_4x8, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_packSnorm4x8(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::vec4_type, "v"); + MAKE_SIG(glsl_type::uint_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_snorm_4x8, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackUnorm2x16(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::uint_type, "p"); + MAKE_SIG(glsl_type::vec2_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_unorm_2x16, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackSnorm2x16(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::uint_type, "p"); + MAKE_SIG(glsl_type::vec2_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_snorm_2x16, p))); + return sig; +} + + +ir_function_signature * +builtin_builder::_unpackUnorm4x8(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::uint_type, "p"); + MAKE_SIG(glsl_type::vec4_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_unorm_4x8, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackSnorm4x8(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::uint_type, "p"); + MAKE_SIG(glsl_type::vec4_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_snorm_4x8, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_packHalf2x16(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::vec2_type, "v"); + MAKE_SIG(glsl_type::uint_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_half_2x16, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackHalf2x16(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::uint_type, "p"); + MAKE_SIG(glsl_type::vec2_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_half_2x16, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_packDouble2x32(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::uvec2_type, "v"); + MAKE_SIG(glsl_type::double_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_double_2x32, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackDouble2x32(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::double_type, "p"); + MAKE_SIG(glsl_type::uvec2_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_double_2x32, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_length(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type->get_base_type(), avail, 1, x); + + body.emit(ret(sqrt(dot(x, x)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_distance(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *p0 = in_var(type, "p0"); + ir_variable *p1 = in_var(type, "p1"); + MAKE_SIG(type->get_base_type(), avail, 2, p0, p1); + + if (type->vector_elements == 1) { + body.emit(ret(abs(sub(p0, p1)))); + } else { + ir_variable *p = body.make_temp(type, "p"); + body.emit(assign(p, sub(p0, p1))); + body.emit(ret(sqrt(dot(p, p)))); + } + + return sig; +} + +ir_function_signature * +builtin_builder::_dot(builtin_available_predicate avail, const glsl_type *type) +{ + if (type->vector_elements == 1) + return binop(avail, ir_binop_mul, type, type, type); + + return binop(avail, ir_binop_dot, + type->get_base_type(), type, type); +} + +ir_function_signature * +builtin_builder::_cross(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *a = in_var(type, "a"); + ir_variable *b = in_var(type, "b"); + MAKE_SIG(type, avail, 2, a, b); + + int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0); + int zxy = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, 0); + + body.emit(ret(sub(mul(swizzle(a, yzx, 3), swizzle(b, zxy, 3)), + mul(swizzle(a, zxy, 3), swizzle(b, yzx, 3))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_normalize(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, avail, 1, x); + + if (type->vector_elements == 1) { + body.emit(ret(sign(x))); + } else { + body.emit(ret(mul(x, rsq(dot(x, x))))); + } + + return sig; +} + +ir_function_signature * +builtin_builder::_ftransform() +{ + MAKE_SIG(glsl_type::vec4_type, compatibility_vs_only, 0); + + body.emit(ret(new(mem_ctx) ir_expression(ir_binop_mul, + glsl_type::vec4_type, + var_ref(gl_ModelViewProjectionMatrix), + var_ref(gl_Vertex)))); + + /* FINISHME: Once the ir_expression() constructor handles type inference + * for matrix operations, we can simplify this to: + * + * body.emit(ret(mul(gl_ModelViewProjectionMatrix, gl_Vertex))); + */ + return sig; +} + +ir_function_signature * +builtin_builder::_faceforward(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *N = in_var(type, "N"); + ir_variable *I = in_var(type, "I"); + ir_variable *Nref = in_var(type, "Nref"); + MAKE_SIG(type, avail, 3, N, I, Nref); + + body.emit(if_tree(less(dot(Nref, I), IMM_FP(type, 0.0)), + ret(N), ret(neg(N)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_reflect(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *I = in_var(type, "I"); + ir_variable *N = in_var(type, "N"); + MAKE_SIG(type, avail, 2, I, N); + + /* I - 2 * dot(N, I) * N */ + body.emit(ret(sub(I, mul(IMM_FP(type, 2.0), mul(dot(N, I), N))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_refract(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *I = in_var(type, "I"); + ir_variable *N = in_var(type, "N"); + ir_variable *eta = in_var(type->get_base_type(), "eta"); + MAKE_SIG(type, avail, 3, I, N, eta); + + ir_variable *n_dot_i = body.make_temp(type->get_base_type(), "n_dot_i"); + body.emit(assign(n_dot_i, dot(N, I))); + + /* From the GLSL 1.10 specification: + * k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) + * if (k < 0.0) + * return genType(0.0) + * else + * return eta * I - (eta * dot(N, I) + sqrt(k)) * N + */ + ir_variable *k = body.make_temp(type->get_base_type(), "k"); + body.emit(assign(k, sub(IMM_FP(type, 1.0), + mul(eta, mul(eta, sub(IMM_FP(type, 1.0), + mul(n_dot_i, n_dot_i))))))); + body.emit(if_tree(less(k, IMM_FP(type, 0.0)), + ret(ir_constant::zero(mem_ctx, type)), + ret(sub(mul(eta, I), + mul(add(mul(eta, n_dot_i), sqrt(k)), N))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_matrixCompMult(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + MAKE_SIG(type, avail, 2, x, y); + + ir_variable *z = body.make_temp(type, "z"); + for (int i = 0; i < type->matrix_columns; i++) { + body.emit(assign(array_ref(z, i), mul(array_ref(x, i), array_ref(y, i)))); + } + body.emit(ret(z)); + + return sig; +} + +ir_function_signature * +builtin_builder::_outerProduct(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *c; + ir_variable *r; + + if (type->base_type == GLSL_TYPE_DOUBLE) { + r = in_var(glsl_type::dvec(type->matrix_columns), "r"); + c = in_var(glsl_type::dvec(type->vector_elements), "c"); + } else { + r = in_var(glsl_type::vec(type->matrix_columns), "r"); + c = in_var(glsl_type::vec(type->vector_elements), "c"); + } + MAKE_SIG(type, avail, 2, c, r); + + ir_variable *m = body.make_temp(type, "m"); + for (int i = 0; i < type->matrix_columns; i++) { + body.emit(assign(array_ref(m, i), mul(c, swizzle(r, i, 1)))); + } + body.emit(ret(m)); + + return sig; +} + +ir_function_signature * +builtin_builder::_transpose(builtin_available_predicate avail, const glsl_type *orig_type) +{ + const glsl_type *transpose_type = + glsl_type::get_instance(orig_type->base_type, + orig_type->matrix_columns, + orig_type->vector_elements); + + ir_variable *m = in_var(orig_type, "m"); + MAKE_SIG(transpose_type, avail, 1, m); + + ir_variable *t = body.make_temp(transpose_type, "t"); + for (int i = 0; i < orig_type->matrix_columns; i++) { + for (int j = 0; j < orig_type->vector_elements; j++) { + body.emit(assign(array_ref(t, j), + matrix_elt(m, i, j), + 1 << i)); + } + } + body.emit(ret(t)); + + return sig; +} + +ir_function_signature * +builtin_builder::_determinant_mat2(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type->get_base_type(), avail, 1, m); + + body.emit(ret(sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), + mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_determinant_mat3(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type->get_base_type(), avail, 1, m); + + ir_expression *f1 = + sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 1))); + + ir_expression *f2 = + sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 0))); + + ir_expression *f3 = + sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), + mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 0))); + + body.emit(ret(add(sub(mul(matrix_elt(m, 0, 0), f1), + mul(matrix_elt(m, 0, 1), f2)), + mul(matrix_elt(m, 0, 2), f3)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_determinant_mat4(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(btype, avail, 1, m); + + ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00"); + ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01"); + ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02"); + ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03"); + ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04"); + ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05"); + ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06"); + ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07"); + ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08"); + ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09"); + ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10"); + ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11"); + ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12"); + ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13"); + ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14"); + ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15"); + ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16"); + ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17"); + ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18"); + + body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor02, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 2))))); + body.emit(assign(SubFactor03, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor04, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 2))))); + body.emit(assign(SubFactor05, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 1))))); + body.emit(assign(SubFactor06, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor07, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor08, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor09, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor10, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor11, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor12, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 1))))); + body.emit(assign(SubFactor13, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 2), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor14, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor15, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor16, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); + + ir_variable *adj_0 = body.make_temp(btype == glsl_type::float_type ? glsl_type::vec4_type : glsl_type::dvec4_type, "adj_0"); + + body.emit(assign(adj_0, + add(sub(mul(matrix_elt(m, 1, 1), SubFactor00), + mul(matrix_elt(m, 1, 2), SubFactor01)), + mul(matrix_elt(m, 1, 3), SubFactor02)), + WRITEMASK_X)); + body.emit(assign(adj_0, neg( + add(sub(mul(matrix_elt(m, 1, 0), SubFactor00), + mul(matrix_elt(m, 1, 2), SubFactor03)), + mul(matrix_elt(m, 1, 3), SubFactor04))), + WRITEMASK_Y)); + body.emit(assign(adj_0, + add(sub(mul(matrix_elt(m, 1, 0), SubFactor01), + mul(matrix_elt(m, 1, 1), SubFactor03)), + mul(matrix_elt(m, 1, 3), SubFactor05)), + WRITEMASK_Z)); + body.emit(assign(adj_0, neg( + add(sub(mul(matrix_elt(m, 1, 0), SubFactor02), + mul(matrix_elt(m, 1, 1), SubFactor04)), + mul(matrix_elt(m, 1, 2), SubFactor05))), + WRITEMASK_W)); + + body.emit(ret(dot(array_ref(m, 0), adj_0))); + + return sig; +} + +ir_function_signature * +builtin_builder::_inverse_mat2(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type, avail, 1, m); + + ir_variable *adj = body.make_temp(type, "adj"); + body.emit(assign(array_ref(adj, 0), matrix_elt(m, 1, 1), 1 << 0)); + body.emit(assign(array_ref(adj, 0), neg(matrix_elt(m, 0, 1)), 1 << 1)); + body.emit(assign(array_ref(adj, 1), neg(matrix_elt(m, 1, 0)), 1 << 0)); + body.emit(assign(array_ref(adj, 1), matrix_elt(m, 0, 0), 1 << 1)); + + ir_expression *det = + sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), + mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))); + + body.emit(ret(div(adj, det))); + return sig; +} + +ir_function_signature * +builtin_builder::_inverse_mat3(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(type, avail, 1, m); + + ir_variable *f11_22_21_12 = body.make_temp(btype, "f11_22_21_12"); + ir_variable *f10_22_20_12 = body.make_temp(btype, "f10_22_20_12"); + ir_variable *f10_21_20_11 = body.make_temp(btype, "f10_21_20_11"); + + body.emit(assign(f11_22_21_12, + sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 2))))); + body.emit(assign(f10_22_20_12, + sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); + body.emit(assign(f10_21_20_11, + sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), + mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); + + ir_variable *adj = body.make_temp(type, "adj"); + body.emit(assign(array_ref(adj, 0), f11_22_21_12, WRITEMASK_X)); + body.emit(assign(array_ref(adj, 1), neg(f10_22_20_12), WRITEMASK_X)); + body.emit(assign(array_ref(adj, 2), f10_21_20_11, WRITEMASK_X)); + + body.emit(assign(array_ref(adj, 0), neg( + sub(mul(matrix_elt(m, 0, 1), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 2, 1), matrix_elt(m, 0, 2)))), + WRITEMASK_Y)); + body.emit(assign(array_ref(adj, 1), + sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 2, 0), matrix_elt(m, 0, 2))), + WRITEMASK_Y)); + body.emit(assign(array_ref(adj, 2), neg( + sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 2, 1)), + mul(matrix_elt(m, 2, 0), matrix_elt(m, 0, 1)))), + WRITEMASK_Y)); + + body.emit(assign(array_ref(adj, 0), + sub(mul(matrix_elt(m, 0, 1), matrix_elt(m, 1, 2)), + mul(matrix_elt(m, 1, 1), matrix_elt(m, 0, 2))), + WRITEMASK_Z)); + body.emit(assign(array_ref(adj, 1), neg( + sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 2)), + mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 2)))), + WRITEMASK_Z)); + body.emit(assign(array_ref(adj, 2), + sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), + mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))), + WRITEMASK_Z)); + + ir_expression *det = + add(sub(mul(matrix_elt(m, 0, 0), f11_22_21_12), + mul(matrix_elt(m, 0, 1), f10_22_20_12)), + mul(matrix_elt(m, 0, 2), f10_21_20_11)); + + body.emit(ret(div(adj, det))); + + return sig; +} + +ir_function_signature * +builtin_builder::_inverse_mat4(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(type, avail, 1, m); + + ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00"); + ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01"); + ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02"); + ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03"); + ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04"); + ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05"); + ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06"); + ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07"); + ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08"); + ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09"); + ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10"); + ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11"); + ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12"); + ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13"); + ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14"); + ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15"); + ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16"); + ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17"); + ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18"); + + body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor02, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 2))))); + body.emit(assign(SubFactor03, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor04, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 2))))); + body.emit(assign(SubFactor05, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 1))))); + body.emit(assign(SubFactor06, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor07, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor08, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor09, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor10, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor11, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor12, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 1))))); + body.emit(assign(SubFactor13, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 2), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor14, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor15, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor16, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); + + ir_variable *adj = body.make_temp(btype == glsl_type::float_type ? glsl_type::mat4_type : glsl_type::dmat4_type, "adj"); + body.emit(assign(array_ref(adj, 0), + add(sub(mul(matrix_elt(m, 1, 1), SubFactor00), + mul(matrix_elt(m, 1, 2), SubFactor01)), + mul(matrix_elt(m, 1, 3), SubFactor02)), + WRITEMASK_X)); + body.emit(assign(array_ref(adj, 1), neg( + add(sub(mul(matrix_elt(m, 1, 0), SubFactor00), + mul(matrix_elt(m, 1, 2), SubFactor03)), + mul(matrix_elt(m, 1, 3), SubFactor04))), + WRITEMASK_X)); + body.emit(assign(array_ref(adj, 2), + add(sub(mul(matrix_elt(m, 1, 0), SubFactor01), + mul(matrix_elt(m, 1, 1), SubFactor03)), + mul(matrix_elt(m, 1, 3), SubFactor05)), + WRITEMASK_X)); + body.emit(assign(array_ref(adj, 3), neg( + add(sub(mul(matrix_elt(m, 1, 0), SubFactor02), + mul(matrix_elt(m, 1, 1), SubFactor04)), + mul(matrix_elt(m, 1, 2), SubFactor05))), + WRITEMASK_X)); + + body.emit(assign(array_ref(adj, 0), neg( + add(sub(mul(matrix_elt(m, 0, 1), SubFactor00), + mul(matrix_elt(m, 0, 2), SubFactor01)), + mul(matrix_elt(m, 0, 3), SubFactor02))), + WRITEMASK_Y)); + body.emit(assign(array_ref(adj, 1), + add(sub(mul(matrix_elt(m, 0, 0), SubFactor00), + mul(matrix_elt(m, 0, 2), SubFactor03)), + mul(matrix_elt(m, 0, 3), SubFactor04)), + WRITEMASK_Y)); + body.emit(assign(array_ref(adj, 2), neg( + add(sub(mul(matrix_elt(m, 0, 0), SubFactor01), + mul(matrix_elt(m, 0, 1), SubFactor03)), + mul(matrix_elt(m, 0, 3), SubFactor05))), + WRITEMASK_Y)); + body.emit(assign(array_ref(adj, 3), + add(sub(mul(matrix_elt(m, 0, 0), SubFactor02), + mul(matrix_elt(m, 0, 1), SubFactor04)), + mul(matrix_elt(m, 0, 2), SubFactor05)), + WRITEMASK_Y)); + + body.emit(assign(array_ref(adj, 0), + add(sub(mul(matrix_elt(m, 0, 1), SubFactor06), + mul(matrix_elt(m, 0, 2), SubFactor07)), + mul(matrix_elt(m, 0, 3), SubFactor08)), + WRITEMASK_Z)); + body.emit(assign(array_ref(adj, 1), neg( + add(sub(mul(matrix_elt(m, 0, 0), SubFactor06), + mul(matrix_elt(m, 0, 2), SubFactor09)), + mul(matrix_elt(m, 0, 3), SubFactor10))), + WRITEMASK_Z)); + body.emit(assign(array_ref(adj, 2), + add(sub(mul(matrix_elt(m, 0, 0), SubFactor11), + mul(matrix_elt(m, 0, 1), SubFactor09)), + mul(matrix_elt(m, 0, 3), SubFactor12)), + WRITEMASK_Z)); + body.emit(assign(array_ref(adj, 3), neg( + add(sub(mul(matrix_elt(m, 0, 0), SubFactor08), + mul(matrix_elt(m, 0, 1), SubFactor10)), + mul(matrix_elt(m, 0, 2), SubFactor12))), + WRITEMASK_Z)); + + body.emit(assign(array_ref(adj, 0), neg( + add(sub(mul(matrix_elt(m, 0, 1), SubFactor13), + mul(matrix_elt(m, 0, 2), SubFactor14)), + mul(matrix_elt(m, 0, 3), SubFactor15))), + WRITEMASK_W)); + body.emit(assign(array_ref(adj, 1), + add(sub(mul(matrix_elt(m, 0, 0), SubFactor13), + mul(matrix_elt(m, 0, 2), SubFactor16)), + mul(matrix_elt(m, 0, 3), SubFactor17)), + WRITEMASK_W)); + body.emit(assign(array_ref(adj, 2), neg( + add(sub(mul(matrix_elt(m, 0, 0), SubFactor14), + mul(matrix_elt(m, 0, 1), SubFactor16)), + mul(matrix_elt(m, 0, 3), SubFactor18))), + WRITEMASK_W)); + body.emit(assign(array_ref(adj, 3), + add(sub(mul(matrix_elt(m, 0, 0), SubFactor15), + mul(matrix_elt(m, 0, 1), SubFactor17)), + mul(matrix_elt(m, 0, 2), SubFactor18)), + WRITEMASK_W)); + + ir_expression *det = + add(mul(matrix_elt(m, 0, 0), matrix_elt(adj, 0, 0)), + add(mul(matrix_elt(m, 0, 1), matrix_elt(adj, 1, 0)), + add(mul(matrix_elt(m, 0, 2), matrix_elt(adj, 2, 0)), + mul(matrix_elt(m, 0, 3), matrix_elt(adj, 3, 0))))); + + body.emit(ret(div(adj, det))); + + return sig; +} + + +ir_function_signature * +builtin_builder::_lessThan(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_less, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_lessThanEqual(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_lequal, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_greaterThan(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_greater, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_greaterThanEqual(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_gequal, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_equal(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_equal, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_notEqual(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_nequal, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_any(const glsl_type *type) +{ + ir_variable *v = in_var(type, "v"); + MAKE_SIG(glsl_type::bool_type, always_available, 1, v); + + const unsigned vec_elem = v->type->vector_elements; + body.emit(ret(expr(ir_binop_any_nequal, v, imm(false, vec_elem)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_all(const glsl_type *type) +{ + ir_variable *v = in_var(type, "v"); + MAKE_SIG(glsl_type::bool_type, always_available, 1, v); + + const unsigned vec_elem = v->type->vector_elements; + body.emit(ret(expr(ir_binop_all_equal, v, imm(true, vec_elem)))); + + return sig; +} + +UNOP(not, ir_unop_logic_not, always_available) + +static bool +has_lod(const glsl_type *sampler_type) +{ + assert(sampler_type->is_sampler()); + + switch (sampler_type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_BUF: + case GLSL_SAMPLER_DIM_MS: + return false; + default: + return true; + } +} + +ir_function_signature * +builtin_builder::_textureSize(builtin_available_predicate avail, + const glsl_type *return_type, + const glsl_type *sampler_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + /* The sampler always exists; add optional lod later. */ + MAKE_SIG(return_type, avail, 1, s); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_txs); + tex->set_sampler(new(mem_ctx) ir_dereference_variable(s), return_type); + + if (has_lod(sampler_type)) { + ir_variable *lod = in_var(glsl_type::int_type, "lod"); + sig->parameters.push_tail(lod); + tex->lod_info.lod = var_ref(lod); + } else { + tex->lod_info.lod = imm(0u); + } + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_textureSamples(const glsl_type *sampler_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + MAKE_SIG(glsl_type::int_type, shader_samples, 1, s); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_texture_samples); + tex->set_sampler(new(mem_ctx) ir_dereference_variable(s), glsl_type::int_type); + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_texture(ir_texture_opcode opcode, + builtin_available_predicate avail, + const glsl_type *return_type, + const glsl_type *sampler_type, + const glsl_type *coord_type, + int flags) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + ir_variable *P = in_var(coord_type, "P"); + /* The sampler and coordinate always exist; add optional parameters later. */ + MAKE_SIG(return_type, avail, 2, s, P); + + ir_texture *tex = new(mem_ctx) ir_texture(opcode); + tex->set_sampler(var_ref(s), return_type); + + const int coord_size = sampler_type->coordinate_components(); + + if (coord_size == coord_type->vector_elements) { + tex->coordinate = var_ref(P); + } else { + /* The incoming coordinate also has the projector or shadow comparitor, + * so we need to swizzle those away. + */ + tex->coordinate = swizzle_for_size(P, coord_size); + } + + /* The projector is always in the last component. */ + if (flags & TEX_PROJECT) + tex->projector = swizzle(P, coord_type->vector_elements - 1, 1); + + if (sampler_type->sampler_shadow) { + if (opcode == ir_tg4) { + /* gather has refz as a separate parameter, immediately after the + * coordinate + */ + ir_variable *refz = in_var(glsl_type::float_type, "refz"); + sig->parameters.push_tail(refz); + tex->shadow_comparitor = var_ref(refz); + } else { + /* The shadow comparitor is normally in the Z component, but a few types + * have sufficiently large coordinates that it's in W. + */ + tex->shadow_comparitor = swizzle(P, MAX2(coord_size, SWIZZLE_Z), 1); + } + } + + if (opcode == ir_txl) { + ir_variable *lod = in_var(glsl_type::float_type, "lod"); + sig->parameters.push_tail(lod); + tex->lod_info.lod = var_ref(lod); + } else if (opcode == ir_txd) { + int grad_size = coord_size - (sampler_type->sampler_array ? 1 : 0); + ir_variable *dPdx = in_var(glsl_type::vec(grad_size), "dPdx"); + ir_variable *dPdy = in_var(glsl_type::vec(grad_size), "dPdy"); + sig->parameters.push_tail(dPdx); + sig->parameters.push_tail(dPdy); + tex->lod_info.grad.dPdx = var_ref(dPdx); + tex->lod_info.grad.dPdy = var_ref(dPdy); + } + + if (flags & (TEX_OFFSET | TEX_OFFSET_NONCONST)) { + int offset_size = coord_size - (sampler_type->sampler_array ? 1 : 0); + ir_variable *offset = + new(mem_ctx) ir_variable(glsl_type::ivec(offset_size), "offset", + (flags & TEX_OFFSET) ? ir_var_const_in : ir_var_function_in); + sig->parameters.push_tail(offset); + tex->offset = var_ref(offset); + } + + if (flags & TEX_OFFSET_ARRAY) { + ir_variable *offsets = + new(mem_ctx) ir_variable(glsl_type::get_array_instance(glsl_type::ivec2_type, 4), + "offsets", ir_var_const_in); + sig->parameters.push_tail(offsets); + tex->offset = var_ref(offsets); + } + + if (opcode == ir_tg4) { + if (flags & TEX_COMPONENT) { + ir_variable *component = + new(mem_ctx) ir_variable(glsl_type::int_type, "comp", ir_var_const_in); + sig->parameters.push_tail(component); + tex->lod_info.component = var_ref(component); + } + else { + tex->lod_info.component = imm(0); + } + } + + /* The "bias" parameter comes /after/ the "offset" parameter, which is + * inconsistent with both textureLodOffset and textureGradOffset. + */ + if (opcode == ir_txb) { + ir_variable *bias = in_var(glsl_type::float_type, "bias"); + sig->parameters.push_tail(bias); + tex->lod_info.bias = var_ref(bias); + } + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_textureCubeArrayShadow() +{ + ir_variable *s = in_var(glsl_type::samplerCubeArrayShadow_type, "sampler"); + ir_variable *P = in_var(glsl_type::vec4_type, "P"); + ir_variable *compare = in_var(glsl_type::float_type, "compare"); + MAKE_SIG(glsl_type::float_type, texture_cube_map_array, 3, s, P, compare); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_tex); + tex->set_sampler(var_ref(s), glsl_type::float_type); + + tex->coordinate = var_ref(P); + tex->shadow_comparitor = var_ref(compare); + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_texelFetch(builtin_available_predicate avail, + const glsl_type *return_type, + const glsl_type *sampler_type, + const glsl_type *coord_type, + const glsl_type *offset_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + ir_variable *P = in_var(coord_type, "P"); + /* The sampler and coordinate always exist; add optional parameters later. */ + MAKE_SIG(return_type, avail, 2, s, P); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_txf); + tex->coordinate = var_ref(P); + tex->set_sampler(var_ref(s), return_type); + + if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + ir_variable *sample = in_var(glsl_type::int_type, "sample"); + sig->parameters.push_tail(sample); + tex->lod_info.sample_index = var_ref(sample); + tex->op = ir_txf_ms; + } else if (has_lod(sampler_type)) { + ir_variable *lod = in_var(glsl_type::int_type, "lod"); + sig->parameters.push_tail(lod); + tex->lod_info.lod = var_ref(lod); + } else { + tex->lod_info.lod = imm(0u); + } + + if (offset_type != NULL) { + ir_variable *offset = + new(mem_ctx) ir_variable(offset_type, "offset", ir_var_const_in); + sig->parameters.push_tail(offset); + tex->offset = var_ref(offset); + } + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_EmitVertex() +{ + MAKE_SIG(glsl_type::void_type, gs_only, 0); + + ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1); + body.emit(new(mem_ctx) ir_emit_vertex(stream)); + + return sig; +} + +ir_function_signature * +builtin_builder::_EmitStreamVertex(builtin_available_predicate avail, + const glsl_type *stream_type) +{ + /* Section 8.12 (Geometry Shader Functions) of the GLSL 4.0 spec says: + * + * "Emit the current values of output variables to the current output + * primitive on stream stream. The argument to stream must be a constant + * integral expression." + */ + ir_variable *stream = + new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in); + + MAKE_SIG(glsl_type::void_type, avail, 1, stream); + + body.emit(new(mem_ctx) ir_emit_vertex(var_ref(stream))); + + return sig; +} + +ir_function_signature * +builtin_builder::_EndPrimitive() +{ + MAKE_SIG(glsl_type::void_type, gs_only, 0); + + ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1); + body.emit(new(mem_ctx) ir_end_primitive(stream)); + + return sig; +} + +ir_function_signature * +builtin_builder::_EndStreamPrimitive(builtin_available_predicate avail, + const glsl_type *stream_type) +{ + /* Section 8.12 (Geometry Shader Functions) of the GLSL 4.0 spec says: + * + * "Completes the current output primitive on stream stream and starts + * a new one. The argument to stream must be a constant integral + * expression." + */ + ir_variable *stream = + new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in); + + MAKE_SIG(glsl_type::void_type, avail, 1, stream); + + body.emit(new(mem_ctx) ir_end_primitive(var_ref(stream))); + + return sig; +} + +ir_function_signature * +builtin_builder::_barrier() +{ + MAKE_SIG(glsl_type::void_type, barrier_supported, 0); + + body.emit(new(mem_ctx) ir_barrier()); + return sig; +} + +ir_function_signature * +builtin_builder::_textureQueryLod(builtin_available_predicate avail, + const glsl_type *sampler_type, + const glsl_type *coord_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + ir_variable *coord = in_var(coord_type, "coord"); + /* The sampler and coordinate always exist; add optional parameters later. */ + MAKE_SIG(glsl_type::vec2_type, avail, 2, s, coord); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_lod); + tex->coordinate = var_ref(coord); + tex->set_sampler(var_ref(s), glsl_type::vec2_type); + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_textureQueryLevels(const glsl_type *sampler_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + const glsl_type *return_type = glsl_type::int_type; + MAKE_SIG(return_type, texture_query_levels, 1, s); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_query_levels); + tex->set_sampler(var_ref(s), return_type); + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_textureSamplesIdentical(builtin_available_predicate avail, + const glsl_type *sampler_type, + const glsl_type *coord_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + ir_variable *P = in_var(coord_type, "P"); + const glsl_type *return_type = glsl_type::bool_type; + MAKE_SIG(return_type, avail, 2, s, P); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_samples_identical); + tex->coordinate = var_ref(P); + tex->set_sampler(var_ref(s), return_type); + + body.emit(ret(tex)); + + return sig; +} + +UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives) +UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control) +UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control) +UNOP(dFdy, ir_unop_dFdy, fs_oes_derivatives) +UNOP(dFdyCoarse, ir_unop_dFdy_coarse, fs_derivative_control) +UNOP(dFdyFine, ir_unop_dFdy_fine, fs_derivative_control) + +ir_function_signature * +builtin_builder::_fwidth(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(type, fs_oes_derivatives, 1, p); + + body.emit(ret(add(abs(expr(ir_unop_dFdx, p)), abs(expr(ir_unop_dFdy, p))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_fwidthCoarse(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(type, fs_derivative_control, 1, p); + + body.emit(ret(add(abs(expr(ir_unop_dFdx_coarse, p)), + abs(expr(ir_unop_dFdy_coarse, p))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_fwidthFine(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(type, fs_derivative_control, 1, p); + + body.emit(ret(add(abs(expr(ir_unop_dFdx_fine, p)), + abs(expr(ir_unop_dFdy_fine, p))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_noise1(const glsl_type *type) +{ + return unop(v110, ir_unop_noise, glsl_type::float_type, type); +} + +ir_function_signature * +builtin_builder::_noise2(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(glsl_type::vec2_type, v110, 1, p); + + ir_constant_data b_offset; + b_offset.f[0] = 601.0f; + b_offset.f[1] = 313.0f; + b_offset.f[2] = 29.0f; + b_offset.f[3] = 277.0f; + + ir_variable *a = body.make_temp(glsl_type::float_type, "a"); + ir_variable *b = body.make_temp(glsl_type::float_type, "b"); + ir_variable *t = body.make_temp(glsl_type::vec2_type, "t"); + body.emit(assign(a, expr(ir_unop_noise, p))); + body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, b_offset))))); + body.emit(assign(t, a, WRITEMASK_X)); + body.emit(assign(t, b, WRITEMASK_Y)); + body.emit(ret(t)); + + return sig; +} + +ir_function_signature * +builtin_builder::_noise3(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(glsl_type::vec3_type, v110, 1, p); + + ir_constant_data b_offset; + b_offset.f[0] = 601.0f; + b_offset.f[1] = 313.0f; + b_offset.f[2] = 29.0f; + b_offset.f[3] = 277.0f; + + ir_constant_data c_offset; + c_offset.f[0] = 1559.0f; + c_offset.f[1] = 113.0f; + c_offset.f[2] = 1861.0f; + c_offset.f[3] = 797.0f; + + ir_variable *a = body.make_temp(glsl_type::float_type, "a"); + ir_variable *b = body.make_temp(glsl_type::float_type, "b"); + ir_variable *c = body.make_temp(glsl_type::float_type, "c"); + ir_variable *t = body.make_temp(glsl_type::vec3_type, "t"); + body.emit(assign(a, expr(ir_unop_noise, p))); + body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, b_offset))))); + body.emit(assign(c, expr(ir_unop_noise, add(p, imm(type, c_offset))))); + body.emit(assign(t, a, WRITEMASK_X)); + body.emit(assign(t, b, WRITEMASK_Y)); + body.emit(assign(t, c, WRITEMASK_Z)); + body.emit(ret(t)); + + return sig; +} + +ir_function_signature * +builtin_builder::_noise4(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(glsl_type::vec4_type, v110, 1, p); + + ir_variable *_p = body.make_temp(type, "_p"); + + ir_constant_data p_offset; + p_offset.f[0] = 1559.0f; + p_offset.f[1] = 113.0f; + p_offset.f[2] = 1861.0f; + p_offset.f[3] = 797.0f; + + body.emit(assign(_p, add(p, imm(type, p_offset)))); + + ir_constant_data offset; + offset.f[0] = 601.0f; + offset.f[1] = 313.0f; + offset.f[2] = 29.0f; + offset.f[3] = 277.0f; + + ir_variable *a = body.make_temp(glsl_type::float_type, "a"); + ir_variable *b = body.make_temp(glsl_type::float_type, "b"); + ir_variable *c = body.make_temp(glsl_type::float_type, "c"); + ir_variable *d = body.make_temp(glsl_type::float_type, "d"); + ir_variable *t = body.make_temp(glsl_type::vec4_type, "t"); + body.emit(assign(a, expr(ir_unop_noise, p))); + body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, offset))))); + body.emit(assign(c, expr(ir_unop_noise, _p))); + body.emit(assign(d, expr(ir_unop_noise, add(_p, imm(type, offset))))); + body.emit(assign(t, a, WRITEMASK_X)); + body.emit(assign(t, b, WRITEMASK_Y)); + body.emit(assign(t, c, WRITEMASK_Z)); + body.emit(assign(t, d, WRITEMASK_W)); + body.emit(ret(t)); + + return sig; +} + +ir_function_signature * +builtin_builder::_bitfieldExtract(const glsl_type *type) +{ + bool is_uint = type->base_type == GLSL_TYPE_UINT; + ir_variable *value = in_var(type, "value"); + ir_variable *offset = in_var(glsl_type::int_type, "offset"); + ir_variable *bits = in_var(glsl_type::int_type, "bits"); + MAKE_SIG(type, gpu_shader5_or_es31, 3, value, offset, bits); + + operand cast_offset = is_uint ? i2u(offset) : operand(offset); + operand cast_bits = is_uint ? i2u(bits) : operand(bits); + + body.emit(ret(expr(ir_triop_bitfield_extract, value, + swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements), + swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_bitfieldInsert(const glsl_type *type) +{ + bool is_uint = type->base_type == GLSL_TYPE_UINT; + ir_variable *base = in_var(type, "base"); + ir_variable *insert = in_var(type, "insert"); + ir_variable *offset = in_var(glsl_type::int_type, "offset"); + ir_variable *bits = in_var(glsl_type::int_type, "bits"); + MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits); + + operand cast_offset = is_uint ? i2u(offset) : operand(offset); + operand cast_bits = is_uint ? i2u(bits) : operand(bits); + + body.emit(ret(bitfield_insert(base, insert, + swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements), + swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements)))); + + return sig; +} + +UNOP(bitfieldReverse, ir_unop_bitfield_reverse, gpu_shader5_or_es31) + +ir_function_signature * +builtin_builder::_bitCount(const glsl_type *type) +{ + return unop(gpu_shader5_or_es31, ir_unop_bit_count, + glsl_type::ivec(type->vector_elements), type); +} + +ir_function_signature * +builtin_builder::_findLSB(const glsl_type *type) +{ + return unop(gpu_shader5_or_es31, ir_unop_find_lsb, + glsl_type::ivec(type->vector_elements), type); +} + +ir_function_signature * +builtin_builder::_findMSB(const glsl_type *type) +{ + return unop(gpu_shader5_or_es31, ir_unop_find_msb, + glsl_type::ivec(type->vector_elements), type); +} + +ir_function_signature * +builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *a = in_var(type, "a"); + ir_variable *b = in_var(type, "b"); + ir_variable *c = in_var(type, "c"); + MAKE_SIG(type, avail, 3, a, b, c); + + body.emit(ret(ir_builder::fma(a, b, c))); + + return sig; +} + +ir_function_signature * +builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type) +{ + return binop(x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31, + ir_binop_ldexp, x_type, x_type, exp_type); +} + +ir_function_signature * +builtin_builder::_dfrexp(const glsl_type *x_type, const glsl_type *exp_type) +{ + ir_variable *x = in_var(x_type, "x"); + ir_variable *exponent = out_var(exp_type, "exp"); + MAKE_SIG(x_type, fp64, 2, x, exponent); + + body.emit(assign(exponent, expr(ir_unop_frexp_exp, x))); + + body.emit(ret(expr(ir_unop_frexp_sig, x))); + return sig; +} + +ir_function_signature * +builtin_builder::_frexp(const glsl_type *x_type, const glsl_type *exp_type) +{ + ir_variable *x = in_var(x_type, "x"); + ir_variable *exponent = out_var(exp_type, "exp"); + MAKE_SIG(x_type, gpu_shader5_or_es31, 2, x, exponent); + + const unsigned vec_elem = x_type->vector_elements; + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); + + /* Single-precision floating-point values are stored as + * 1 sign bit; + * 8 exponent bits; + * 23 mantissa bits. + * + * An exponent shift of 23 will shift the mantissa out, leaving only the + * exponent and sign bit (which itself may be zero, if the absolute value + * was taken before the bitcast and shift. + */ + ir_constant *exponent_shift = imm(23); + ir_constant *exponent_bias = imm(-126, vec_elem); + + ir_constant *sign_mantissa_mask = imm(0x807fffffu, vec_elem); + + /* Exponent of floating-point values in the range [0.5, 1.0). */ + ir_constant *exponent_value = imm(0x3f000000u, vec_elem); + + ir_variable *is_not_zero = body.make_temp(bvec, "is_not_zero"); + body.emit(assign(is_not_zero, nequal(abs(x), imm(0.0f, vec_elem)))); + + /* Since abs(x) ensures that the sign bit is zero, we don't need to bitcast + * to unsigned integers to ensure that 1 bits aren't shifted in. + */ + body.emit(assign(exponent, rshift(bitcast_f2i(abs(x)), exponent_shift))); + body.emit(assign(exponent, add(exponent, csel(is_not_zero, exponent_bias, + imm(0, vec_elem))))); + + ir_variable *bits = body.make_temp(uvec, "bits"); + body.emit(assign(bits, bitcast_f2u(x))); + body.emit(assign(bits, bit_and(bits, sign_mantissa_mask))); + body.emit(assign(bits, bit_or(bits, csel(is_not_zero, exponent_value, + imm(0u, vec_elem))))); + body.emit(ret(bitcast_u2f(bits))); + + return sig; +} + +ir_function_signature * +builtin_builder::_uaddCarry(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *carry = out_var(type, "carry"); + MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, carry); + + body.emit(assign(carry, ir_builder::carry(x, y))); + body.emit(ret(add(x, y))); + + return sig; +} + +ir_function_signature * +builtin_builder::_usubBorrow(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *borrow = out_var(type, "borrow"); + MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, borrow); + + body.emit(assign(borrow, ir_builder::borrow(x, y))); + body.emit(ret(sub(x, y))); + + return sig; +} + +/** + * For both imulExtended() and umulExtended() built-ins. + */ +ir_function_signature * +builtin_builder::_mulExtended(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *msb = out_var(type, "msb"); + ir_variable *lsb = out_var(type, "lsb"); + MAKE_SIG(glsl_type::void_type, gpu_shader5_or_es31, 4, x, y, msb, lsb); + + body.emit(assign(msb, imul_high(x, y))); + body.emit(assign(lsb, mul(x, y))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtCentroid(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + MAKE_SIG(type, fs_gpu_shader5, 1, interpolant); + + body.emit(ret(interpolate_at_centroid(interpolant))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtOffset(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + ir_variable *offset = in_var(glsl_type::vec2_type, "offset"); + MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, offset); + + body.emit(ret(interpolate_at_offset(interpolant, offset))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtSample(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + ir_variable *sample_num = in_var(glsl_type::int_type, "sample_num"); + MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, sample_num); + + body.emit(ret(interpolate_at_sample(interpolant, sample_num))); + + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail) +{ + ir_variable *counter = in_var(glsl_type::atomic_uint_type, "counter"); + MAKE_INTRINSIC(glsl_type::uint_type, avail, 1, counter); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_intrinsic2(builtin_available_predicate avail, + const glsl_type *type) +{ + ir_variable *atomic = in_var(type, "atomic"); + ir_variable *data = in_var(type, "data"); + MAKE_INTRINSIC(type, avail, 2, atomic, data); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_intrinsic3(builtin_available_predicate avail, + const glsl_type *type) +{ + ir_variable *atomic = in_var(type, "atomic"); + ir_variable *data1 = in_var(type, "data1"); + ir_variable *data2 = in_var(type, "data2"); + MAKE_INTRINSIC(type, avail, 3, atomic, data1, data2); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_counter_op(const char *intrinsic, + builtin_available_predicate avail) +{ + ir_variable *counter = in_var(glsl_type::atomic_uint_type, "atomic_counter"); + MAKE_SIG(glsl_type::uint_type, avail, 1, counter); + + ir_variable *retval = body.make_temp(glsl_type::uint_type, "atomic_retval"); + body.emit(call(shader->symbols->get_function(intrinsic), retval, + sig->parameters)); + body.emit(ret(retval)); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_op2(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type) +{ + ir_variable *atomic = in_var(type, "atomic_var"); + ir_variable *data = in_var(type, "atomic_data"); + MAKE_SIG(type, avail, 2, atomic, data); + + ir_variable *retval = body.make_temp(type, "atomic_retval"); + body.emit(call(shader->symbols->get_function(intrinsic), retval, + sig->parameters)); + body.emit(ret(retval)); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_op3(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type) +{ + ir_variable *atomic = in_var(type, "atomic_var"); + ir_variable *data1 = in_var(type, "atomic_data1"); + ir_variable *data2 = in_var(type, "atomic_data2"); + MAKE_SIG(type, avail, 3, atomic, data1, data2); + + ir_variable *retval = body.make_temp(type, "atomic_retval"); + body.emit(call(shader->symbols->get_function(intrinsic), retval, + sig->parameters)); + body.emit(ret(retval)); + return sig; +} + +ir_function_signature * +builtin_builder::_min3(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *z = in_var(type, "z"); + MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); + + ir_expression *min3 = min2(x, min2(y,z)); + body.emit(ret(min3)); + + return sig; +} + +ir_function_signature * +builtin_builder::_max3(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *z = in_var(type, "z"); + MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); + + ir_expression *max3 = max2(x, max2(y,z)); + body.emit(ret(max3)); + + return sig; +} + +ir_function_signature * +builtin_builder::_mid3(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *z = in_var(type, "z"); + MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); + + ir_expression *mid3 = max2(min2(x, y), max2(min2(x, z), min2(y, z))); + body.emit(ret(mid3)); + + return sig; +} + +ir_function_signature * +builtin_builder::_image_prototype(const glsl_type *image_type, + unsigned num_arguments, + unsigned flags) +{ + const glsl_type *data_type = glsl_type::get_instance( + image_type->sampler_type, + (flags & IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE ? 4 : 1), + 1); + const glsl_type *ret_type = (flags & IMAGE_FUNCTION_RETURNS_VOID ? + glsl_type::void_type : data_type); + + /* Addressing arguments that are always present. */ + ir_variable *image = in_var(image_type, "image"); + ir_variable *coord = in_var( + glsl_type::ivec(image_type->coordinate_components()), "coord"); + + const builtin_available_predicate avail = + (flags & IMAGE_FUNCTION_AVAIL_ATOMIC ? shader_image_atomic : + shader_image_load_store); + ir_function_signature *sig = new_sig(ret_type, avail, 2, image, coord); + + /* Sample index for multisample images. */ + if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) + sig->parameters.push_tail(in_var(glsl_type::int_type, "sample")); + + /* Data arguments. */ + for (unsigned i = 0; i < num_arguments; ++i) { + char *arg_name = ralloc_asprintf(NULL, "arg%d", i); + sig->parameters.push_tail(in_var(data_type, arg_name)); + ralloc_free(arg_name); + } + + /* Set the maximal set of qualifiers allowed for this image + * built-in. Function calls with arguments having fewer + * qualifiers than present in the prototype are allowed by the + * spec, but not with more, i.e. this will make the compiler + * accept everything that needs to be accepted, and reject cases + * like loads from write-only or stores to read-only images. + */ + image->data.image_read_only = (flags & IMAGE_FUNCTION_READ_ONLY) != 0; + image->data.image_write_only = (flags & IMAGE_FUNCTION_WRITE_ONLY) != 0; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; + + return sig; +} + +ir_function_signature * +builtin_builder::_image_size_prototype(const glsl_type *image_type, + unsigned /* num_arguments */, + unsigned /* flags */) +{ + const glsl_type *ret_type; + unsigned num_components = image_type->coordinate_components(); + + /* From the ARB_shader_image_size extension: + * "Cube images return the dimensions of one face." + */ + if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && + !image_type->sampler_array) { + num_components = 2; + } + + /* FIXME: Add the highp precision qualifier for GLES 3.10 when it is + * supported by mesa. + */ + ret_type = glsl_type::get_instance(GLSL_TYPE_INT, num_components, 1); + + ir_variable *image = in_var(image_type, "image"); + ir_function_signature *sig = new_sig(ret_type, shader_image_size, 1, image); + + /* Set the maximal set of qualifiers allowed for this image + * built-in. Function calls with arguments having fewer + * qualifiers than present in the prototype are allowed by the + * spec, but not with more, i.e. this will make the compiler + * accept everything that needs to be accepted, and reject cases + * like loads from write-only or stores to read-only images. + */ + image->data.image_read_only = true; + image->data.image_write_only = true; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; + + return sig; +} + +ir_function_signature * +builtin_builder::_image_samples_prototype(const glsl_type *image_type, + unsigned /* num_arguments */, + unsigned /* flags */) +{ + ir_variable *image = in_var(image_type, "image"); + ir_function_signature *sig = + new_sig(glsl_type::int_type, shader_samples, 1, image); + + /* Set the maximal set of qualifiers allowed for this image + * built-in. Function calls with arguments having fewer + * qualifiers than present in the prototype are allowed by the + * spec, but not with more, i.e. this will make the compiler + * accept everything that needs to be accepted, and reject cases + * like loads from write-only or stores to read-only images. + */ + image->data.image_read_only = true; + image->data.image_write_only = true; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; + + return sig; +} + +ir_function_signature * +builtin_builder::_image(image_prototype_ctr prototype, + const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags) +{ + ir_function_signature *sig = (this->*prototype)(image_type, + num_arguments, flags); + + if (flags & IMAGE_FUNCTION_EMIT_STUB) { + ir_factory body(&sig->body, mem_ctx); + ir_function *f = shader->symbols->get_function(intrinsic_name); + + if (flags & IMAGE_FUNCTION_RETURNS_VOID) { + body.emit(call(f, NULL, sig->parameters)); + } else { + ir_variable *ret_val = + body.make_temp(sig->return_type, "_ret_val"); + body.emit(call(f, ret_val, sig->parameters)); + body.emit(ret(ret_val)); + } + + sig->is_defined = true; + + } else { + sig->is_intrinsic = true; + } + + return sig; +} + +ir_function_signature * +builtin_builder::_memory_barrier_intrinsic(builtin_available_predicate avail) +{ + MAKE_INTRINSIC(glsl_type::void_type, avail, 0); + return sig; +} + +ir_function_signature * +builtin_builder::_memory_barrier(const char *intrinsic_name, + builtin_available_predicate avail) +{ + MAKE_SIG(glsl_type::void_type, avail, 0); + body.emit(call(shader->symbols->get_function(intrinsic_name), + NULL, sig->parameters)); + return sig; +} + +ir_function_signature * +builtin_builder::_shader_clock_intrinsic(builtin_available_predicate avail, + const glsl_type *type) +{ + MAKE_INTRINSIC(type, avail, 0); + return sig; +} + +ir_function_signature * +builtin_builder::_shader_clock(builtin_available_predicate avail, + const glsl_type *type) +{ + MAKE_SIG(type, avail, 0); + + ir_variable *retval = body.make_temp(type, "clock_retval"); + + body.emit(call(shader->symbols->get_function("__intrinsic_shader_clock"), + retval, sig->parameters)); + body.emit(ret(retval)); + return sig; +} + +/** @} */ + +/******************************************************************************/ + +/* The singleton instance of builtin_builder. */ +static builtin_builder builtins; +static mtx_t builtins_lock = _MTX_INITIALIZER_NP; + +/** + * External API (exposing the built-in module to the rest of the compiler): + * @{ + */ +void +_mesa_glsl_initialize_builtin_functions() +{ + mtx_lock(&builtins_lock); + builtins.initialize(); + mtx_unlock(&builtins_lock); +} + +void +_mesa_glsl_release_builtin_functions() +{ + mtx_lock(&builtins_lock); + builtins.release(); + mtx_unlock(&builtins_lock); +} + +ir_function_signature * +_mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, + const char *name, exec_list *actual_parameters) +{ + ir_function_signature * s; + mtx_lock(&builtins_lock); + s = builtins.find(state, name, actual_parameters); + mtx_unlock(&builtins_lock); + return s; +} + +ir_function * +_mesa_glsl_find_builtin_function_by_name(const char *name) +{ + ir_function *f; + mtx_lock(&builtins_lock); + f = builtins.shader->symbols->get_function(name); + mtx_unlock(&builtins_lock); + return f; +} + +gl_shader * +_mesa_glsl_get_builtin_function_shader() +{ + return builtins.shader; +} + + +/** + * Get the function signature for main from a shader + */ +ir_function_signature * +_mesa_get_main_function_signature(gl_shader *sh) +{ + ir_function *const f = sh->symbols->get_function("main"); + if (f != NULL) { + exec_list void_parameters; + + /* Look for the 'void main()' signature and ensure that it's defined. + * This keeps the linker from accidentally pick a shader that just + * contains a prototype for main. + * + * We don't have to check for multiple definitions of main (in multiple + * shaders) because that would have already been caught above. + */ + ir_function_signature *sig = + f->matching_signature(NULL, &void_parameters, false); + if ((sig != NULL) && sig->is_defined) { + return sig; + } + } + + return NULL; +} + +/** @} */ diff --git a/src/compiler/glsl/builtin_types.cpp b/src/compiler/glsl/builtin_types.cpp new file mode 100644 index 0000000..ee24bd5 --- /dev/null +++ b/src/compiler/glsl/builtin_types.cpp @@ -0,0 +1,394 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file builtin_types.cpp + * + * The glsl_type class has static members to represent all the built-in types + * (such as the glsl_type::_float_type flyweight) as well as convenience pointer + * accessors (such as glsl_type::float_type). Those global variables are + * declared and initialized in this file. + * + * This also contains _mesa_glsl_initialize_types(), a function which populates + * a symbol table with the available built-in types for a particular language + * version and set of enabled extensions. + */ + +#include "compiler/glsl_types.h" +#include "glsl_parser_extras.h" +#include "util/macros.h" + +/** + * Declarations of type flyweights (glsl_type::_foo_type) and + * convenience pointers (glsl_type::foo_type). + * @{ + */ +#define DECL_TYPE(NAME, ...) + +#define STRUCT_TYPE(NAME) \ + const glsl_type glsl_type::_struct_##NAME##_type = \ + glsl_type(NAME##_fields, ARRAY_SIZE(NAME##_fields), #NAME); \ + const glsl_type *const glsl_type::struct_##NAME##_type = \ + &glsl_type::_struct_##NAME##_type; + +static const struct glsl_struct_field gl_DepthRangeParameters_fields[] = { + glsl_struct_field(glsl_type::float_type, "near"), + glsl_struct_field(glsl_type::float_type, "far"), + glsl_struct_field(glsl_type::float_type, "diff"), +}; + +static const struct glsl_struct_field gl_PointParameters_fields[] = { + glsl_struct_field(glsl_type::float_type, "size"), + glsl_struct_field(glsl_type::float_type, "sizeMin"), + glsl_struct_field(glsl_type::float_type, "sizeMax"), + glsl_struct_field(glsl_type::float_type, "fadeThresholdSize"), + glsl_struct_field(glsl_type::float_type, "distanceConstantAttenuation"), + glsl_struct_field(glsl_type::float_type, "distanceLinearAttenuation"), + glsl_struct_field(glsl_type::float_type, "distanceQuadraticAttenuation"), +}; + +static const struct glsl_struct_field gl_MaterialParameters_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "emission"), + glsl_struct_field(glsl_type::vec4_type, "ambient"), + glsl_struct_field(glsl_type::vec4_type, "diffuse"), + glsl_struct_field(glsl_type::vec4_type, "specular"), + glsl_struct_field(glsl_type::float_type, "shininess"), +}; + +static const struct glsl_struct_field gl_LightSourceParameters_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "ambient"), + glsl_struct_field(glsl_type::vec4_type, "diffuse"), + glsl_struct_field(glsl_type::vec4_type, "specular"), + glsl_struct_field(glsl_type::vec4_type, "position"), + glsl_struct_field(glsl_type::vec4_type, "halfVector"), + glsl_struct_field(glsl_type::vec3_type, "spotDirection"), + glsl_struct_field(glsl_type::float_type, "spotExponent"), + glsl_struct_field(glsl_type::float_type, "spotCutoff"), + glsl_struct_field(glsl_type::float_type, "spotCosCutoff"), + glsl_struct_field(glsl_type::float_type, "constantAttenuation"), + glsl_struct_field(glsl_type::float_type, "linearAttenuation"), + glsl_struct_field(glsl_type::float_type, "quadraticAttenuation"), +}; + +static const struct glsl_struct_field gl_LightModelParameters_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "ambient"), +}; + +static const struct glsl_struct_field gl_LightModelProducts_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "sceneColor"), +}; + +static const struct glsl_struct_field gl_LightProducts_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "ambient"), + glsl_struct_field(glsl_type::vec4_type, "diffuse"), + glsl_struct_field(glsl_type::vec4_type, "specular"), +}; + +static const struct glsl_struct_field gl_FogParameters_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "color"), + glsl_struct_field(glsl_type::float_type, "density"), + glsl_struct_field(glsl_type::float_type, "start"), + glsl_struct_field(glsl_type::float_type, "end"), + glsl_struct_field(glsl_type::float_type, "scale"), +}; + +#include "compiler/builtin_type_macros.h" +/** @} */ + +/** + * Code to populate a symbol table with the built-in types available in a + * particular shading language version. The table below contains tags every + * type with the GLSL/GLSL ES versions where it was introduced. + * + * @{ + */ +#define T(TYPE, MIN_GL, MIN_ES) \ + { glsl_type::TYPE##_type, MIN_GL, MIN_ES }, + +static const struct builtin_type_versions { + const glsl_type *const type; + int min_gl; + int min_es; +} builtin_type_versions[] = { + T(void, 110, 100) + T(bool, 110, 100) + T(bvec2, 110, 100) + T(bvec3, 110, 100) + T(bvec4, 110, 100) + T(int, 110, 100) + T(ivec2, 110, 100) + T(ivec3, 110, 100) + T(ivec4, 110, 100) + T(uint, 130, 300) + T(uvec2, 130, 300) + T(uvec3, 130, 300) + T(uvec4, 130, 300) + T(float, 110, 100) + T(vec2, 110, 100) + T(vec3, 110, 100) + T(vec4, 110, 100) + T(mat2, 110, 100) + T(mat3, 110, 100) + T(mat4, 110, 100) + T(mat2x3, 120, 300) + T(mat2x4, 120, 300) + T(mat3x2, 120, 300) + T(mat3x4, 120, 300) + T(mat4x2, 120, 300) + T(mat4x3, 120, 300) + + T(double, 400, 999) + T(dvec2, 400, 999) + T(dvec3, 400, 999) + T(dvec4, 400, 999) + T(dmat2, 400, 999) + T(dmat3, 400, 999) + T(dmat4, 400, 999) + T(dmat2x3, 400, 999) + T(dmat2x4, 400, 999) + T(dmat3x2, 400, 999) + T(dmat3x4, 400, 999) + T(dmat4x2, 400, 999) + T(dmat4x3, 400, 999) + + T(sampler1D, 110, 999) + T(sampler2D, 110, 100) + T(sampler3D, 110, 300) + T(samplerCube, 110, 100) + T(sampler1DArray, 130, 999) + T(sampler2DArray, 130, 300) + T(samplerCubeArray, 400, 999) + T(sampler2DRect, 140, 999) + T(samplerBuffer, 140, 999) + T(sampler2DMS, 150, 310) + T(sampler2DMSArray, 150, 999) + + T(isampler1D, 130, 999) + T(isampler2D, 130, 300) + T(isampler3D, 130, 300) + T(isamplerCube, 130, 300) + T(isampler1DArray, 130, 999) + T(isampler2DArray, 130, 300) + T(isamplerCubeArray, 400, 999) + T(isampler2DRect, 140, 999) + T(isamplerBuffer, 140, 999) + T(isampler2DMS, 150, 310) + T(isampler2DMSArray, 150, 999) + + T(usampler1D, 130, 999) + T(usampler2D, 130, 300) + T(usampler3D, 130, 300) + T(usamplerCube, 130, 300) + T(usampler1DArray, 130, 999) + T(usampler2DArray, 130, 300) + T(usamplerCubeArray, 400, 999) + T(usampler2DRect, 140, 999) + T(usamplerBuffer, 140, 999) + T(usampler2DMS, 150, 310) + T(usampler2DMSArray, 150, 999) + + T(sampler1DShadow, 110, 999) + T(sampler2DShadow, 110, 300) + T(samplerCubeShadow, 130, 300) + T(sampler1DArrayShadow, 130, 999) + T(sampler2DArrayShadow, 130, 300) + T(samplerCubeArrayShadow, 400, 999) + T(sampler2DRectShadow, 140, 999) + + T(struct_gl_DepthRangeParameters, 110, 100) + + T(image1D, 420, 999) + T(image2D, 420, 310) + T(image3D, 420, 310) + T(image2DRect, 420, 999) + T(imageCube, 420, 310) + T(imageBuffer, 420, 999) + T(image1DArray, 420, 999) + T(image2DArray, 420, 310) + T(imageCubeArray, 420, 999) + T(image2DMS, 420, 999) + T(image2DMSArray, 420, 999) + T(iimage1D, 420, 999) + T(iimage2D, 420, 310) + T(iimage3D, 420, 310) + T(iimage2DRect, 420, 999) + T(iimageCube, 420, 310) + T(iimageBuffer, 420, 999) + T(iimage1DArray, 420, 999) + T(iimage2DArray, 420, 310) + T(iimageCubeArray, 420, 999) + T(iimage2DMS, 420, 999) + T(iimage2DMSArray, 420, 999) + T(uimage1D, 420, 999) + T(uimage2D, 420, 310) + T(uimage3D, 420, 310) + T(uimage2DRect, 420, 999) + T(uimageCube, 420, 310) + T(uimageBuffer, 420, 999) + T(uimage1DArray, 420, 999) + T(uimage2DArray, 420, 310) + T(uimageCubeArray, 420, 999) + T(uimage2DMS, 420, 999) + T(uimage2DMSArray, 420, 999) + + T(atomic_uint, 420, 310) +}; + +static const glsl_type *const deprecated_types[] = { + glsl_type::struct_gl_PointParameters_type, + glsl_type::struct_gl_MaterialParameters_type, + glsl_type::struct_gl_LightSourceParameters_type, + glsl_type::struct_gl_LightModelParameters_type, + glsl_type::struct_gl_LightModelProducts_type, + glsl_type::struct_gl_LightProducts_type, + glsl_type::struct_gl_FogParameters_type, +}; + +static inline void +add_type(glsl_symbol_table *symbols, const glsl_type *const type) +{ + symbols->add_type(type->name, type); +} + +/** + * Populate the symbol table with available built-in types. + */ +void +_mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) +{ + struct glsl_symbol_table *symbols = state->symbols; + + for (unsigned i = 0; i < ARRAY_SIZE(builtin_type_versions); i++) { + const struct builtin_type_versions *const t = &builtin_type_versions[i]; + if (state->is_version(t->min_gl, t->min_es)) { + add_type(symbols, t->type); + } + } + + /* Add deprecated structure types. While these were deprecated in 1.30, + * they're still present. We've removed them in 1.40+ (OpenGL 3.1+). + */ + if (!state->es_shader && state->language_version < 140) { + for (unsigned i = 0; i < ARRAY_SIZE(deprecated_types); i++) { + add_type(symbols, deprecated_types[i]); + } + } + + /* Add types for enabled extensions. They may have already been added + * by the version-based loop, but attempting to add them a second time + * is harmless. + */ + if (state->ARB_texture_cube_map_array_enable) { + add_type(symbols, glsl_type::samplerCubeArray_type); + add_type(symbols, glsl_type::samplerCubeArrayShadow_type); + add_type(symbols, glsl_type::isamplerCubeArray_type); + add_type(symbols, glsl_type::usamplerCubeArray_type); + } + + if (state->ARB_texture_multisample_enable || + state->OES_texture_storage_multisample_2d_array_enable) { + add_type(symbols, glsl_type::sampler2DMS_type); + add_type(symbols, glsl_type::isampler2DMS_type); + add_type(symbols, glsl_type::usampler2DMS_type); + add_type(symbols, glsl_type::sampler2DMSArray_type); + add_type(symbols, glsl_type::isampler2DMSArray_type); + add_type(symbols, glsl_type::usampler2DMSArray_type); + } + + if (state->ARB_texture_rectangle_enable) { + add_type(symbols, glsl_type::sampler2DRect_type); + add_type(symbols, glsl_type::sampler2DRectShadow_type); + } + + if (state->EXT_texture_array_enable) { + add_type(symbols, glsl_type::sampler1DArray_type); + add_type(symbols, glsl_type::sampler2DArray_type); + add_type(symbols, glsl_type::sampler1DArrayShadow_type); + add_type(symbols, glsl_type::sampler2DArrayShadow_type); + } + + if (state->OES_EGL_image_external_enable) { + add_type(symbols, glsl_type::samplerExternalOES_type); + } + + if (state->OES_texture_3D_enable) { + add_type(symbols, glsl_type::sampler3D_type); + } + + if (state->ARB_shader_image_load_store_enable) { + add_type(symbols, glsl_type::image1D_type); + add_type(symbols, glsl_type::image2D_type); + add_type(symbols, glsl_type::image3D_type); + add_type(symbols, glsl_type::image2DRect_type); + add_type(symbols, glsl_type::imageCube_type); + add_type(symbols, glsl_type::imageBuffer_type); + add_type(symbols, glsl_type::image1DArray_type); + add_type(symbols, glsl_type::image2DArray_type); + add_type(symbols, glsl_type::imageCubeArray_type); + add_type(symbols, glsl_type::image2DMS_type); + add_type(symbols, glsl_type::image2DMSArray_type); + add_type(symbols, glsl_type::iimage1D_type); + add_type(symbols, glsl_type::iimage2D_type); + add_type(symbols, glsl_type::iimage3D_type); + add_type(symbols, glsl_type::iimage2DRect_type); + add_type(symbols, glsl_type::iimageCube_type); + add_type(symbols, glsl_type::iimageBuffer_type); + add_type(symbols, glsl_type::iimage1DArray_type); + add_type(symbols, glsl_type::iimage2DArray_type); + add_type(symbols, glsl_type::iimageCubeArray_type); + add_type(symbols, glsl_type::iimage2DMS_type); + add_type(symbols, glsl_type::iimage2DMSArray_type); + add_type(symbols, glsl_type::uimage1D_type); + add_type(symbols, glsl_type::uimage2D_type); + add_type(symbols, glsl_type::uimage3D_type); + add_type(symbols, glsl_type::uimage2DRect_type); + add_type(symbols, glsl_type::uimageCube_type); + add_type(symbols, glsl_type::uimageBuffer_type); + add_type(symbols, glsl_type::uimage1DArray_type); + add_type(symbols, glsl_type::uimage2DArray_type); + add_type(symbols, glsl_type::uimageCubeArray_type); + add_type(symbols, glsl_type::uimage2DMS_type); + add_type(symbols, glsl_type::uimage2DMSArray_type); + } + + if (state->has_atomic_counters()) { + add_type(symbols, glsl_type::atomic_uint_type); + } + + if (state->ARB_gpu_shader_fp64_enable) { + add_type(symbols, glsl_type::double_type); + add_type(symbols, glsl_type::dvec2_type); + add_type(symbols, glsl_type::dvec3_type); + add_type(symbols, glsl_type::dvec4_type); + add_type(symbols, glsl_type::dmat2_type); + add_type(symbols, glsl_type::dmat3_type); + add_type(symbols, glsl_type::dmat4_type); + add_type(symbols, glsl_type::dmat2x3_type); + add_type(symbols, glsl_type::dmat2x4_type); + add_type(symbols, glsl_type::dmat3x2_type); + add_type(symbols, glsl_type::dmat3x4_type); + add_type(symbols, glsl_type::dmat4x2_type); + add_type(symbols, glsl_type::dmat4x3_type); + } +} +/** @} */ diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp new file mode 100644 index 0000000..ccc04c0 --- /dev/null +++ b/src/compiler/glsl/builtin_variables.cpp @@ -0,0 +1,1394 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_builder.h" +#include "linker.h" +#include "glsl_parser_extras.h" +#include "glsl_symbol_table.h" +#include "main/core.h" +#include "main/uniforms.h" +#include "program/prog_statevars.h" +#include "program/prog_instruction.h" + +using namespace ir_builder; + +static const struct gl_builtin_uniform_element gl_NumSamples_elements[] = { + {NULL, {STATE_NUM_SAMPLES, 0, 0}, SWIZZLE_XXXX} +}; + +static const struct gl_builtin_uniform_element gl_DepthRange_elements[] = { + {"near", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_XXXX}, + {"far", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_YYYY}, + {"diff", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_ZZZZ}, +}; + +static const struct gl_builtin_uniform_element gl_ClipPlane_elements[] = { + {NULL, {STATE_CLIPPLANE, 0, 0}, SWIZZLE_XYZW} +}; + +static const struct gl_builtin_uniform_element gl_Point_elements[] = { + {"size", {STATE_POINT_SIZE}, SWIZZLE_XXXX}, + {"sizeMin", {STATE_POINT_SIZE}, SWIZZLE_YYYY}, + {"sizeMax", {STATE_POINT_SIZE}, SWIZZLE_ZZZZ}, + {"fadeThresholdSize", {STATE_POINT_SIZE}, SWIZZLE_WWWW}, + {"distanceConstantAttenuation", {STATE_POINT_ATTENUATION}, SWIZZLE_XXXX}, + {"distanceLinearAttenuation", {STATE_POINT_ATTENUATION}, SWIZZLE_YYYY}, + {"distanceQuadraticAttenuation", {STATE_POINT_ATTENUATION}, SWIZZLE_ZZZZ}, +}; + +static const struct gl_builtin_uniform_element gl_FrontMaterial_elements[] = { + {"emission", {STATE_MATERIAL, 0, STATE_EMISSION}, SWIZZLE_XYZW}, + {"ambient", {STATE_MATERIAL, 0, STATE_AMBIENT}, SWIZZLE_XYZW}, + {"diffuse", {STATE_MATERIAL, 0, STATE_DIFFUSE}, SWIZZLE_XYZW}, + {"specular", {STATE_MATERIAL, 0, STATE_SPECULAR}, SWIZZLE_XYZW}, + {"shininess", {STATE_MATERIAL, 0, STATE_SHININESS}, SWIZZLE_XXXX}, +}; + +static const struct gl_builtin_uniform_element gl_BackMaterial_elements[] = { + {"emission", {STATE_MATERIAL, 1, STATE_EMISSION}, SWIZZLE_XYZW}, + {"ambient", {STATE_MATERIAL, 1, STATE_AMBIENT}, SWIZZLE_XYZW}, + {"diffuse", {STATE_MATERIAL, 1, STATE_DIFFUSE}, SWIZZLE_XYZW}, + {"specular", {STATE_MATERIAL, 1, STATE_SPECULAR}, SWIZZLE_XYZW}, + {"shininess", {STATE_MATERIAL, 1, STATE_SHININESS}, SWIZZLE_XXXX}, +}; + +static const struct gl_builtin_uniform_element gl_LightSource_elements[] = { + {"ambient", {STATE_LIGHT, 0, STATE_AMBIENT}, SWIZZLE_XYZW}, + {"diffuse", {STATE_LIGHT, 0, STATE_DIFFUSE}, SWIZZLE_XYZW}, + {"specular", {STATE_LIGHT, 0, STATE_SPECULAR}, SWIZZLE_XYZW}, + {"position", {STATE_LIGHT, 0, STATE_POSITION}, SWIZZLE_XYZW}, + {"halfVector", {STATE_LIGHT, 0, STATE_HALF_VECTOR}, SWIZZLE_XYZW}, + {"spotDirection", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, + MAKE_SWIZZLE4(SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_Z)}, + {"spotCosCutoff", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW}, + {"spotCutoff", {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX}, + {"spotExponent", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW}, + {"constantAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX}, + {"linearAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY}, + {"quadraticAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ}, +}; + +static const struct gl_builtin_uniform_element gl_LightModel_elements[] = { + {"ambient", {STATE_LIGHTMODEL_AMBIENT, 0}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_FrontLightModelProduct_elements[] = { + {"sceneColor", {STATE_LIGHTMODEL_SCENECOLOR, 0}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_BackLightModelProduct_elements[] = { + {"sceneColor", {STATE_LIGHTMODEL_SCENECOLOR, 1}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_FrontLightProduct_elements[] = { + {"ambient", {STATE_LIGHTPROD, 0, 0, STATE_AMBIENT}, SWIZZLE_XYZW}, + {"diffuse", {STATE_LIGHTPROD, 0, 0, STATE_DIFFUSE}, SWIZZLE_XYZW}, + {"specular", {STATE_LIGHTPROD, 0, 0, STATE_SPECULAR}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_BackLightProduct_elements[] = { + {"ambient", {STATE_LIGHTPROD, 0, 1, STATE_AMBIENT}, SWIZZLE_XYZW}, + {"diffuse", {STATE_LIGHTPROD, 0, 1, STATE_DIFFUSE}, SWIZZLE_XYZW}, + {"specular", {STATE_LIGHTPROD, 0, 1, STATE_SPECULAR}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_TextureEnvColor_elements[] = { + {NULL, {STATE_TEXENV_COLOR, 0}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_EyePlaneS_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_S}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_EyePlaneT_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_T}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_EyePlaneR_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_R}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_EyePlaneQ_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_Q}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_ObjectPlaneS_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_S}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_ObjectPlaneT_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_T}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_ObjectPlaneR_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_R}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_ObjectPlaneQ_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_Q}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_Fog_elements[] = { + {"color", {STATE_FOG_COLOR}, SWIZZLE_XYZW}, + {"density", {STATE_FOG_PARAMS}, SWIZZLE_XXXX}, + {"start", {STATE_FOG_PARAMS}, SWIZZLE_YYYY}, + {"end", {STATE_FOG_PARAMS}, SWIZZLE_ZZZZ}, + {"scale", {STATE_FOG_PARAMS}, SWIZZLE_WWWW}, +}; + +static const struct gl_builtin_uniform_element gl_NormalScale_elements[] = { + {NULL, {STATE_NORMAL_SCALE}, SWIZZLE_XXXX}, +}; + +static const struct gl_builtin_uniform_element gl_FogParamsOptimizedMESA_elements[] = { + {NULL, {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_CurrentAttribVertMESA_elements[] = { + {NULL, {STATE_INTERNAL, STATE_CURRENT_ATTRIB, 0}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_CurrentAttribFragMESA_elements[] = { + {NULL, {STATE_INTERNAL, STATE_CURRENT_ATTRIB_MAYBE_VP_CLAMPED, 0}, SWIZZLE_XYZW}, +}; + +#define MATRIX(name, statevar, modifier) \ + static const struct gl_builtin_uniform_element name ## _elements[] = { \ + { NULL, { statevar, 0, 0, 0, modifier}, SWIZZLE_XYZW }, \ + { NULL, { statevar, 0, 1, 1, modifier}, SWIZZLE_XYZW }, \ + { NULL, { statevar, 0, 2, 2, modifier}, SWIZZLE_XYZW }, \ + { NULL, { statevar, 0, 3, 3, modifier}, SWIZZLE_XYZW }, \ + } + +MATRIX(gl_ModelViewMatrix, + STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE); +MATRIX(gl_ModelViewMatrixInverse, + STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS); +MATRIX(gl_ModelViewMatrixTranspose, + STATE_MODELVIEW_MATRIX, 0); +MATRIX(gl_ModelViewMatrixInverseTranspose, + STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE); + +MATRIX(gl_ProjectionMatrix, + STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE); +MATRIX(gl_ProjectionMatrixInverse, + STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS); +MATRIX(gl_ProjectionMatrixTranspose, + STATE_PROJECTION_MATRIX, 0); +MATRIX(gl_ProjectionMatrixInverseTranspose, + STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE); + +MATRIX(gl_ModelViewProjectionMatrix, + STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE); +MATRIX(gl_ModelViewProjectionMatrixInverse, + STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS); +MATRIX(gl_ModelViewProjectionMatrixTranspose, + STATE_MVP_MATRIX, 0); +MATRIX(gl_ModelViewProjectionMatrixInverseTranspose, + STATE_MVP_MATRIX, STATE_MATRIX_INVERSE); + +MATRIX(gl_TextureMatrix, + STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE); +MATRIX(gl_TextureMatrixInverse, + STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS); +MATRIX(gl_TextureMatrixTranspose, + STATE_TEXTURE_MATRIX, 0); +MATRIX(gl_TextureMatrixInverseTranspose, + STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE); + +static const struct gl_builtin_uniform_element gl_NormalMatrix_elements[] = { + { NULL, { STATE_MODELVIEW_MATRIX, 0, 0, 0, STATE_MATRIX_INVERSE}, + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) }, + { NULL, { STATE_MODELVIEW_MATRIX, 0, 1, 1, STATE_MATRIX_INVERSE}, + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) }, + { NULL, { STATE_MODELVIEW_MATRIX, 0, 2, 2, STATE_MATRIX_INVERSE}, + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) }, +}; + +#undef MATRIX + +#define STATEVAR(name) {#name, name ## _elements, ARRAY_SIZE(name ## _elements)} + +static const struct gl_builtin_uniform_desc _mesa_builtin_uniform_desc[] = { + STATEVAR(gl_NumSamples), + STATEVAR(gl_DepthRange), + STATEVAR(gl_ClipPlane), + STATEVAR(gl_Point), + STATEVAR(gl_FrontMaterial), + STATEVAR(gl_BackMaterial), + STATEVAR(gl_LightSource), + STATEVAR(gl_LightModel), + STATEVAR(gl_FrontLightModelProduct), + STATEVAR(gl_BackLightModelProduct), + STATEVAR(gl_FrontLightProduct), + STATEVAR(gl_BackLightProduct), + STATEVAR(gl_TextureEnvColor), + STATEVAR(gl_EyePlaneS), + STATEVAR(gl_EyePlaneT), + STATEVAR(gl_EyePlaneR), + STATEVAR(gl_EyePlaneQ), + STATEVAR(gl_ObjectPlaneS), + STATEVAR(gl_ObjectPlaneT), + STATEVAR(gl_ObjectPlaneR), + STATEVAR(gl_ObjectPlaneQ), + STATEVAR(gl_Fog), + + STATEVAR(gl_ModelViewMatrix), + STATEVAR(gl_ModelViewMatrixInverse), + STATEVAR(gl_ModelViewMatrixTranspose), + STATEVAR(gl_ModelViewMatrixInverseTranspose), + + STATEVAR(gl_ProjectionMatrix), + STATEVAR(gl_ProjectionMatrixInverse), + STATEVAR(gl_ProjectionMatrixTranspose), + STATEVAR(gl_ProjectionMatrixInverseTranspose), + + STATEVAR(gl_ModelViewProjectionMatrix), + STATEVAR(gl_ModelViewProjectionMatrixInverse), + STATEVAR(gl_ModelViewProjectionMatrixTranspose), + STATEVAR(gl_ModelViewProjectionMatrixInverseTranspose), + + STATEVAR(gl_TextureMatrix), + STATEVAR(gl_TextureMatrixInverse), + STATEVAR(gl_TextureMatrixTranspose), + STATEVAR(gl_TextureMatrixInverseTranspose), + + STATEVAR(gl_NormalMatrix), + STATEVAR(gl_NormalScale), + + STATEVAR(gl_FogParamsOptimizedMESA), + STATEVAR(gl_CurrentAttribVertMESA), + STATEVAR(gl_CurrentAttribFragMESA), + + {NULL, NULL, 0} +}; + + +namespace { + +/** + * Data structure that accumulates fields for the gl_PerVertex interface + * block. + */ +class per_vertex_accumulator +{ +public: + per_vertex_accumulator(); + void add_field(int slot, const glsl_type *type, const char *name); + const glsl_type *construct_interface_instance() const; + +private: + glsl_struct_field fields[10]; + unsigned num_fields; +}; + + +per_vertex_accumulator::per_vertex_accumulator() + : fields(), + num_fields(0) +{ +} + + +void +per_vertex_accumulator::add_field(int slot, const glsl_type *type, + const char *name) +{ + assert(this->num_fields < ARRAY_SIZE(this->fields)); + this->fields[this->num_fields].type = type; + this->fields[this->num_fields].name = name; + this->fields[this->num_fields].matrix_layout = GLSL_MATRIX_LAYOUT_INHERITED; + this->fields[this->num_fields].location = slot; + this->fields[this->num_fields].interpolation = INTERP_QUALIFIER_NONE; + this->fields[this->num_fields].centroid = 0; + this->fields[this->num_fields].sample = 0; + this->fields[this->num_fields].patch = 0; + this->fields[this->num_fields].precision = GLSL_PRECISION_NONE; + this->num_fields++; +} + + +const glsl_type * +per_vertex_accumulator::construct_interface_instance() const +{ + return glsl_type::get_interface_instance(this->fields, this->num_fields, + GLSL_INTERFACE_PACKING_STD140, + "gl_PerVertex"); +} + + +class builtin_variable_generator +{ +public: + builtin_variable_generator(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + void generate_constants(); + void generate_uniforms(); + void generate_vs_special_vars(); + void generate_tcs_special_vars(); + void generate_tes_special_vars(); + void generate_gs_special_vars(); + void generate_fs_special_vars(); + void generate_cs_special_vars(); + void generate_varyings(); + +private: + const glsl_type *array(const glsl_type *base, unsigned elements) + { + return glsl_type::get_array_instance(base, elements); + } + + const glsl_type *type(const char *name) + { + return symtab->get_type(name); + } + + ir_variable *add_input(int slot, const glsl_type *type, const char *name) + { + return add_variable(name, type, ir_var_shader_in, slot); + } + + ir_variable *add_output(int slot, const glsl_type *type, const char *name) + { + return add_variable(name, type, ir_var_shader_out, slot); + } + + ir_variable *add_index_output(int slot, int index, const glsl_type *type, const char *name) + { + return add_index_variable(name, type, ir_var_shader_out, slot, index); + } + + ir_variable *add_system_value(int slot, const glsl_type *type, + const char *name) + { + return add_variable(name, type, ir_var_system_value, slot); + } + + ir_variable *add_variable(const char *name, const glsl_type *type, + enum ir_variable_mode mode, int slot); + ir_variable *add_index_variable(const char *name, const glsl_type *type, + enum ir_variable_mode mode, int slot, int index); + ir_variable *add_uniform(const glsl_type *type, const char *name); + ir_variable *add_const(const char *name, int value); + ir_variable *add_const_ivec3(const char *name, int x, int y, int z); + void add_varying(int slot, const glsl_type *type, const char *name); + + exec_list * const instructions; + struct _mesa_glsl_parse_state * const state; + glsl_symbol_table * const symtab; + + /** + * True if compatibility-profile-only variables should be included. (In + * desktop GL, these are always included when the GLSL version is 1.30 and + * or below). + */ + const bool compatibility; + + const glsl_type * const bool_t; + const glsl_type * const int_t; + const glsl_type * const uint_t; + const glsl_type * const float_t; + const glsl_type * const vec2_t; + const glsl_type * const vec3_t; + const glsl_type * const vec4_t; + const glsl_type * const uvec3_t; + const glsl_type * const mat3_t; + const glsl_type * const mat4_t; + + per_vertex_accumulator per_vertex_in; + per_vertex_accumulator per_vertex_out; +}; + + +builtin_variable_generator::builtin_variable_generator( + exec_list *instructions, struct _mesa_glsl_parse_state *state) + : instructions(instructions), state(state), symtab(state->symbols), + compatibility(!state->is_version(140, 100)), + bool_t(glsl_type::bool_type), int_t(glsl_type::int_type), + uint_t(glsl_type::uint_type), + float_t(glsl_type::float_type), vec2_t(glsl_type::vec2_type), + vec3_t(glsl_type::vec3_type), vec4_t(glsl_type::vec4_type), + uvec3_t(glsl_type::uvec3_type), + mat3_t(glsl_type::mat3_type), mat4_t(glsl_type::mat4_type) +{ +} + +ir_variable * +builtin_variable_generator::add_index_variable(const char *name, + const glsl_type *type, + enum ir_variable_mode mode, int slot, int index) +{ + ir_variable *var = new(symtab) ir_variable(type, name, mode); + var->data.how_declared = ir_var_declared_implicitly; + + switch (var->data.mode) { + case ir_var_auto: + case ir_var_shader_in: + case ir_var_uniform: + case ir_var_system_value: + var->data.read_only = true; + break; + case ir_var_shader_out: + case ir_var_shader_storage: + break; + default: + /* The only variables that are added using this function should be + * uniforms, shader storage, shader inputs, and shader outputs, constants + * (which use ir_var_auto), and system values. + */ + assert(0); + break; + } + + var->data.location = slot; + var->data.explicit_location = (slot >= 0); + var->data.explicit_index = 1; + var->data.index = index; + + /* Once the variable is created an initialized, add it to the symbol table + * and add the declaration to the IR stream. + */ + instructions->push_tail(var); + + symtab->add_variable(var); + return var; +} + +ir_variable * +builtin_variable_generator::add_variable(const char *name, + const glsl_type *type, + enum ir_variable_mode mode, int slot) +{ + ir_variable *var = new(symtab) ir_variable(type, name, mode); + var->data.how_declared = ir_var_declared_implicitly; + + switch (var->data.mode) { + case ir_var_auto: + case ir_var_shader_in: + case ir_var_uniform: + case ir_var_system_value: + var->data.read_only = true; + break; + case ir_var_shader_out: + case ir_var_shader_storage: + break; + default: + /* The only variables that are added using this function should be + * uniforms, shader storage, shader inputs, and shader outputs, constants + * (which use ir_var_auto), and system values. + */ + assert(0); + break; + } + + var->data.location = slot; + var->data.explicit_location = (slot >= 0); + var->data.explicit_index = 0; + + /* Once the variable is created an initialized, add it to the symbol table + * and add the declaration to the IR stream. + */ + instructions->push_tail(var); + + symtab->add_variable(var); + return var; +} + + +ir_variable * +builtin_variable_generator::add_uniform(const glsl_type *type, + const char *name) +{ + ir_variable *const uni = add_variable(name, type, ir_var_uniform, -1); + + unsigned i; + for (i = 0; _mesa_builtin_uniform_desc[i].name != NULL; i++) { + if (strcmp(_mesa_builtin_uniform_desc[i].name, name) == 0) { + break; + } + } + + assert(_mesa_builtin_uniform_desc[i].name != NULL); + const struct gl_builtin_uniform_desc* const statevar = + &_mesa_builtin_uniform_desc[i]; + + const unsigned array_count = type->is_array() ? type->length : 1; + + ir_state_slot *slots = + uni->allocate_state_slots(array_count * statevar->num_elements); + + for (unsigned a = 0; a < array_count; a++) { + for (unsigned j = 0; j < statevar->num_elements; j++) { + const struct gl_builtin_uniform_element *element = + &statevar->elements[j]; + + memcpy(slots->tokens, element->tokens, sizeof(element->tokens)); + if (type->is_array()) { + if (strcmp(name, "gl_CurrentAttribVertMESA") == 0 || + strcmp(name, "gl_CurrentAttribFragMESA") == 0) { + slots->tokens[2] = a; + } else { + slots->tokens[1] = a; + } + } + + slots->swizzle = element->swizzle; + slots++; + } + } + + return uni; +} + + +ir_variable * +builtin_variable_generator::add_const(const char *name, int value) +{ + ir_variable *const var = add_variable(name, glsl_type::int_type, + ir_var_auto, -1); + var->constant_value = new(var) ir_constant(value); + var->constant_initializer = new(var) ir_constant(value); + var->data.has_initializer = true; + return var; +} + + +ir_variable * +builtin_variable_generator::add_const_ivec3(const char *name, int x, int y, + int z) +{ + ir_variable *const var = add_variable(name, glsl_type::ivec3_type, + ir_var_auto, -1); + ir_constant_data data; + memset(&data, 0, sizeof(data)); + data.i[0] = x; + data.i[1] = y; + data.i[2] = z; + var->constant_value = new(var) ir_constant(glsl_type::ivec3_type, &data); + var->constant_initializer = + new(var) ir_constant(glsl_type::ivec3_type, &data); + var->data.has_initializer = true; + return var; +} + + +void +builtin_variable_generator::generate_constants() +{ + add_const("gl_MaxVertexAttribs", state->Const.MaxVertexAttribs); + add_const("gl_MaxVertexTextureImageUnits", + state->Const.MaxVertexTextureImageUnits); + add_const("gl_MaxCombinedTextureImageUnits", + state->Const.MaxCombinedTextureImageUnits); + add_const("gl_MaxTextureImageUnits", state->Const.MaxTextureImageUnits); + add_const("gl_MaxDrawBuffers", state->Const.MaxDrawBuffers); + + /* Max uniforms/varyings: GLSL ES counts these in units of vectors; desktop + * GL counts them in units of "components" or "floats". + */ + if (state->es_shader) { + add_const("gl_MaxVertexUniformVectors", + state->Const.MaxVertexUniformComponents / 4); + add_const("gl_MaxFragmentUniformVectors", + state->Const.MaxFragmentUniformComponents / 4); + + /* In GLSL ES 3.00, gl_MaxVaryingVectors was split out to separate + * vertex and fragment shader constants. + */ + if (state->is_version(0, 300)) { + add_const("gl_MaxVertexOutputVectors", + state->ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4); + add_const("gl_MaxFragmentInputVectors", + state->ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents / 4); + } else { + add_const("gl_MaxVaryingVectors", + state->ctx->Const.MaxVarying); + } + + /* EXT_blend_func_extended brings a built in constant + * for determining number of dual source draw buffers + */ + if (state->EXT_blend_func_extended_enable) { + add_const("gl_MaxDualSourceDrawBuffersEXT", + state->Const.MaxDualSourceDrawBuffers); + } + } else { + add_const("gl_MaxVertexUniformComponents", + state->Const.MaxVertexUniformComponents); + + /* Note: gl_MaxVaryingFloats was deprecated in GLSL 1.30+, but not + * removed + */ + add_const("gl_MaxVaryingFloats", state->ctx->Const.MaxVarying * 4); + + add_const("gl_MaxFragmentUniformComponents", + state->Const.MaxFragmentUniformComponents); + } + + /* Texel offsets were introduced in ARB_shading_language_420pack (which + * requires desktop GLSL version 130), and adopted into desktop GLSL + * version 4.20 and GLSL ES version 3.00. + */ + if ((state->is_version(130, 0) && + state->ARB_shading_language_420pack_enable) || + state->is_version(420, 300)) { + add_const("gl_MinProgramTexelOffset", + state->Const.MinProgramTexelOffset); + add_const("gl_MaxProgramTexelOffset", + state->Const.MaxProgramTexelOffset); + } + + if (state->is_version(130, 0)) { + add_const("gl_MaxClipDistances", state->Const.MaxClipPlanes); + add_const("gl_MaxVaryingComponents", state->ctx->Const.MaxVarying * 4); + } + + if (state->has_geometry_shader()) { + add_const("gl_MaxVertexOutputComponents", + state->Const.MaxVertexOutputComponents); + add_const("gl_MaxGeometryInputComponents", + state->Const.MaxGeometryInputComponents); + add_const("gl_MaxGeometryOutputComponents", + state->Const.MaxGeometryOutputComponents); + add_const("gl_MaxFragmentInputComponents", + state->Const.MaxFragmentInputComponents); + add_const("gl_MaxGeometryTextureImageUnits", + state->Const.MaxGeometryTextureImageUnits); + add_const("gl_MaxGeometryOutputVertices", + state->Const.MaxGeometryOutputVertices); + add_const("gl_MaxGeometryTotalOutputComponents", + state->Const.MaxGeometryTotalOutputComponents); + add_const("gl_MaxGeometryUniformComponents", + state->Const.MaxGeometryUniformComponents); + + /* Note: the GLSL 1.50-4.40 specs require + * gl_MaxGeometryVaryingComponents to be present, and to be at least 64. + * But they do not define what it means (and there does not appear to be + * any corresponding constant in the GL specs). However, + * ARB_geometry_shader4 defines MAX_GEOMETRY_VARYING_COMPONENTS_ARB to + * be the maximum number of components available for use as geometry + * outputs. So we assume this is a synonym for + * gl_MaxGeometryOutputComponents. + */ + add_const("gl_MaxGeometryVaryingComponents", + state->Const.MaxGeometryOutputComponents); + } + + if (compatibility) { + /* Note: gl_MaxLights stopped being listed as an explicit constant in + * GLSL 1.30, however it continues to be referred to (as a minimum size + * for compatibility-mode uniforms) all the way up through GLSL 4.30, so + * this seems like it was probably an oversight. + */ + add_const("gl_MaxLights", state->Const.MaxLights); + + add_const("gl_MaxClipPlanes", state->Const.MaxClipPlanes); + + /* Note: gl_MaxTextureUnits wasn't made compatibility-only until GLSL + * 1.50, however this seems like it was probably an oversight. + */ + add_const("gl_MaxTextureUnits", state->Const.MaxTextureUnits); + + /* Note: gl_MaxTextureCoords was left out of GLSL 1.40, but it was + * re-introduced in GLSL 1.50, so this seems like it was probably an + * oversight. + */ + add_const("gl_MaxTextureCoords", state->Const.MaxTextureCoords); + } + + if (state->has_atomic_counters()) { + add_const("gl_MaxVertexAtomicCounters", + state->Const.MaxVertexAtomicCounters); + add_const("gl_MaxFragmentAtomicCounters", + state->Const.MaxFragmentAtomicCounters); + add_const("gl_MaxCombinedAtomicCounters", + state->Const.MaxCombinedAtomicCounters); + add_const("gl_MaxAtomicCounterBindings", + state->Const.MaxAtomicBufferBindings); + + if (state->has_geometry_shader()) { + add_const("gl_MaxGeometryAtomicCounters", + state->Const.MaxGeometryAtomicCounters); + } + if (!state->es_shader) { + add_const("gl_MaxTessControlAtomicCounters", + state->Const.MaxTessControlAtomicCounters); + add_const("gl_MaxTessEvaluationAtomicCounters", + state->Const.MaxTessEvaluationAtomicCounters); + } + } + + if (state->is_version(420, 310)) { + add_const("gl_MaxVertexAtomicCounterBuffers", + state->Const.MaxVertexAtomicCounterBuffers); + add_const("gl_MaxFragmentAtomicCounterBuffers", + state->Const.MaxFragmentAtomicCounterBuffers); + add_const("gl_MaxCombinedAtomicCounterBuffers", + state->Const.MaxCombinedAtomicCounterBuffers); + add_const("gl_MaxAtomicCounterBufferSize", + state->Const.MaxAtomicCounterBufferSize); + + if (state->has_geometry_shader()) { + add_const("gl_MaxGeometryAtomicCounterBuffers", + state->Const.MaxGeometryAtomicCounterBuffers); + } + if (!state->es_shader) { + add_const("gl_MaxTessControlAtomicCounterBuffers", + state->Const.MaxTessControlAtomicCounterBuffers); + add_const("gl_MaxTessEvaluationAtomicCounterBuffers", + state->Const.MaxTessEvaluationAtomicCounterBuffers); + } + } + + if (state->is_version(430, 310) || state->ARB_compute_shader_enable) { + add_const("gl_MaxComputeAtomicCounterBuffers", MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS); + add_const("gl_MaxComputeAtomicCounters", MAX_COMPUTE_ATOMIC_COUNTERS); + add_const("gl_MaxComputeImageUniforms", MAX_COMPUTE_IMAGE_UNIFORMS); + add_const("gl_MaxComputeTextureImageUnits", MAX_COMPUTE_TEXTURE_IMAGE_UNITS); + add_const("gl_MaxComputeUniformComponents", MAX_COMPUTE_UNIFORM_COMPONENTS); + + add_const_ivec3("gl_MaxComputeWorkGroupCount", + state->Const.MaxComputeWorkGroupCount[0], + state->Const.MaxComputeWorkGroupCount[1], + state->Const.MaxComputeWorkGroupCount[2]); + add_const_ivec3("gl_MaxComputeWorkGroupSize", + state->Const.MaxComputeWorkGroupSize[0], + state->Const.MaxComputeWorkGroupSize[1], + state->Const.MaxComputeWorkGroupSize[2]); + + /* From the GLSL 4.40 spec, section 7.1 (Built-In Language Variables): + * + * The built-in constant gl_WorkGroupSize is a compute-shader + * constant containing the local work-group size of the shader. The + * size of the work group in the X, Y, and Z dimensions is stored in + * the x, y, and z components. The constants values in + * gl_WorkGroupSize will match those specified in the required + * local_size_x, local_size_y, and local_size_z layout qualifiers + * for the current shader. This is a constant so that it can be + * used to size arrays of memory that can be shared within the local + * work group. It is a compile-time error to use gl_WorkGroupSize + * in a shader that does not declare a fixed local group size, or + * before that shader has declared a fixed local group size, using + * local_size_x, local_size_y, and local_size_z. + * + * To prevent the shader from trying to refer to gl_WorkGroupSize before + * the layout declaration, we don't define it here. Intead we define it + * in ast_cs_input_layout::hir(). + */ + } + + if (state->is_version(420, 310) || + state->ARB_shader_image_load_store_enable) { + add_const("gl_MaxImageUnits", + state->Const.MaxImageUnits); + add_const("gl_MaxVertexImageUniforms", + state->Const.MaxVertexImageUniforms); + add_const("gl_MaxFragmentImageUniforms", + state->Const.MaxFragmentImageUniforms); + add_const("gl_MaxCombinedImageUniforms", + state->Const.MaxCombinedImageUniforms); + + if (state->has_geometry_shader()) { + add_const("gl_MaxGeometryImageUniforms", + state->Const.MaxGeometryImageUniforms); + } + + if (!state->es_shader) { + add_const("gl_MaxCombinedImageUnitsAndFragmentOutputs", + state->Const.MaxCombinedShaderOutputResources); + add_const("gl_MaxImageSamples", + state->Const.MaxImageSamples); + } + + if (state->is_version(450, 310)) { + add_const("gl_MaxCombinedShaderOutputResources", + state->Const.MaxCombinedShaderOutputResources); + } + + if (state->is_version(400, 0) || + state->ARB_tessellation_shader_enable) { + add_const("gl_MaxTessControlImageUniforms", + state->Const.MaxTessControlImageUniforms); + add_const("gl_MaxTessEvaluationImageUniforms", + state->Const.MaxTessEvaluationImageUniforms); + } + } + + if (state->is_version(410, 0) || + state->ARB_viewport_array_enable) + add_const("gl_MaxViewports", state->Const.MaxViewports); + + if (state->is_version(400, 0) || + state->ARB_tessellation_shader_enable) { + add_const("gl_MaxPatchVertices", state->Const.MaxPatchVertices); + add_const("gl_MaxTessGenLevel", state->Const.MaxTessGenLevel); + add_const("gl_MaxTessControlInputComponents", state->Const.MaxTessControlInputComponents); + add_const("gl_MaxTessControlOutputComponents", state->Const.MaxTessControlOutputComponents); + add_const("gl_MaxTessControlTextureImageUnits", state->Const.MaxTessControlTextureImageUnits); + add_const("gl_MaxTessEvaluationInputComponents", state->Const.MaxTessEvaluationInputComponents); + add_const("gl_MaxTessEvaluationOutputComponents", state->Const.MaxTessEvaluationOutputComponents); + add_const("gl_MaxTessEvaluationTextureImageUnits", state->Const.MaxTessEvaluationTextureImageUnits); + add_const("gl_MaxTessPatchComponents", state->Const.MaxTessPatchComponents); + add_const("gl_MaxTessControlTotalOutputComponents", state->Const.MaxTessControlTotalOutputComponents); + add_const("gl_MaxTessControlUniformComponents", state->Const.MaxTessControlUniformComponents); + add_const("gl_MaxTessEvaluationUniformComponents", state->Const.MaxTessEvaluationUniformComponents); + } +} + + +/** + * Generate uniform variables (which exist in all types of shaders). + */ +void +builtin_variable_generator::generate_uniforms() +{ + if (state->is_version(400, 0) || state->ARB_sample_shading_enable) + add_uniform(int_t, "gl_NumSamples"); + add_uniform(type("gl_DepthRangeParameters"), "gl_DepthRange"); + add_uniform(array(vec4_t, VERT_ATTRIB_MAX), "gl_CurrentAttribVertMESA"); + add_uniform(array(vec4_t, VARYING_SLOT_MAX), "gl_CurrentAttribFragMESA"); + + if (compatibility) { + add_uniform(mat4_t, "gl_ModelViewMatrix"); + add_uniform(mat4_t, "gl_ProjectionMatrix"); + add_uniform(mat4_t, "gl_ModelViewProjectionMatrix"); + add_uniform(mat3_t, "gl_NormalMatrix"); + add_uniform(mat4_t, "gl_ModelViewMatrixInverse"); + add_uniform(mat4_t, "gl_ProjectionMatrixInverse"); + add_uniform(mat4_t, "gl_ModelViewProjectionMatrixInverse"); + add_uniform(mat4_t, "gl_ModelViewMatrixTranspose"); + add_uniform(mat4_t, "gl_ProjectionMatrixTranspose"); + add_uniform(mat4_t, "gl_ModelViewProjectionMatrixTranspose"); + add_uniform(mat4_t, "gl_ModelViewMatrixInverseTranspose"); + add_uniform(mat4_t, "gl_ProjectionMatrixInverseTranspose"); + add_uniform(mat4_t, "gl_ModelViewProjectionMatrixInverseTranspose"); + add_uniform(float_t, "gl_NormalScale"); + add_uniform(type("gl_LightModelParameters"), "gl_LightModel"); + add_uniform(vec4_t, "gl_FogParamsOptimizedMESA"); + + const glsl_type *const mat4_array_type = + array(mat4_t, state->Const.MaxTextureCoords); + add_uniform(mat4_array_type, "gl_TextureMatrix"); + add_uniform(mat4_array_type, "gl_TextureMatrixInverse"); + add_uniform(mat4_array_type, "gl_TextureMatrixTranspose"); + add_uniform(mat4_array_type, "gl_TextureMatrixInverseTranspose"); + + add_uniform(array(vec4_t, state->Const.MaxClipPlanes), "gl_ClipPlane"); + add_uniform(type("gl_PointParameters"), "gl_Point"); + + const glsl_type *const material_parameters_type = + type("gl_MaterialParameters"); + add_uniform(material_parameters_type, "gl_FrontMaterial"); + add_uniform(material_parameters_type, "gl_BackMaterial"); + + add_uniform(array(type("gl_LightSourceParameters"), + state->Const.MaxLights), + "gl_LightSource"); + + const glsl_type *const light_model_products_type = + type("gl_LightModelProducts"); + add_uniform(light_model_products_type, "gl_FrontLightModelProduct"); + add_uniform(light_model_products_type, "gl_BackLightModelProduct"); + + const glsl_type *const light_products_type = + array(type("gl_LightProducts"), state->Const.MaxLights); + add_uniform(light_products_type, "gl_FrontLightProduct"); + add_uniform(light_products_type, "gl_BackLightProduct"); + + add_uniform(array(vec4_t, state->Const.MaxTextureUnits), + "gl_TextureEnvColor"); + + const glsl_type *const texcoords_vec4 = + array(vec4_t, state->Const.MaxTextureCoords); + add_uniform(texcoords_vec4, "gl_EyePlaneS"); + add_uniform(texcoords_vec4, "gl_EyePlaneT"); + add_uniform(texcoords_vec4, "gl_EyePlaneR"); + add_uniform(texcoords_vec4, "gl_EyePlaneQ"); + add_uniform(texcoords_vec4, "gl_ObjectPlaneS"); + add_uniform(texcoords_vec4, "gl_ObjectPlaneT"); + add_uniform(texcoords_vec4, "gl_ObjectPlaneR"); + add_uniform(texcoords_vec4, "gl_ObjectPlaneQ"); + + add_uniform(type("gl_FogParameters"), "gl_Fog"); + } +} + + +/** + * Generate variables which only exist in vertex shaders. + */ +void +builtin_variable_generator::generate_vs_special_vars() +{ + ir_variable *var; + + if (state->is_version(130, 300)) + add_system_value(SYSTEM_VALUE_VERTEX_ID, int_t, "gl_VertexID"); + if (state->ARB_draw_instanced_enable) + add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB"); + if (state->ARB_draw_instanced_enable || state->is_version(140, 300)) + add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceID"); + if (state->ARB_shader_draw_parameters_enable) { + add_system_value(SYSTEM_VALUE_BASE_VERTEX, int_t, "gl_BaseVertexARB"); + add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, "gl_BaseInstanceARB"); + add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB"); + } + if (state->AMD_vertex_shader_layer_enable) { + var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + if (state->AMD_vertex_shader_viewport_index_enable) { + var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + if (compatibility) { + add_input(VERT_ATTRIB_POS, vec4_t, "gl_Vertex"); + add_input(VERT_ATTRIB_NORMAL, vec3_t, "gl_Normal"); + add_input(VERT_ATTRIB_COLOR0, vec4_t, "gl_Color"); + add_input(VERT_ATTRIB_COLOR1, vec4_t, "gl_SecondaryColor"); + add_input(VERT_ATTRIB_TEX0, vec4_t, "gl_MultiTexCoord0"); + add_input(VERT_ATTRIB_TEX1, vec4_t, "gl_MultiTexCoord1"); + add_input(VERT_ATTRIB_TEX2, vec4_t, "gl_MultiTexCoord2"); + add_input(VERT_ATTRIB_TEX3, vec4_t, "gl_MultiTexCoord3"); + add_input(VERT_ATTRIB_TEX4, vec4_t, "gl_MultiTexCoord4"); + add_input(VERT_ATTRIB_TEX5, vec4_t, "gl_MultiTexCoord5"); + add_input(VERT_ATTRIB_TEX6, vec4_t, "gl_MultiTexCoord6"); + add_input(VERT_ATTRIB_TEX7, vec4_t, "gl_MultiTexCoord7"); + add_input(VERT_ATTRIB_FOG, float_t, "gl_FogCoord"); + } +} + + +/** + * Generate variables which only exist in tessellation control shaders. + */ +void +builtin_variable_generator::generate_tcs_special_vars() +{ + add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); + add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn"); + add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID"); + + add_output(VARYING_SLOT_TESS_LEVEL_OUTER, array(float_t, 4), + "gl_TessLevelOuter")->data.patch = 1; + add_output(VARYING_SLOT_TESS_LEVEL_INNER, array(float_t, 2), + "gl_TessLevelInner")->data.patch = 1; +} + + +/** + * Generate variables which only exist in tessellation evaluation shaders. + */ +void +builtin_variable_generator::generate_tes_special_vars() +{ + add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); + add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn"); + add_system_value(SYSTEM_VALUE_TESS_COORD, vec3_t, "gl_TessCoord"); + add_system_value(SYSTEM_VALUE_TESS_LEVEL_OUTER, array(float_t, 4), + "gl_TessLevelOuter"); + add_system_value(SYSTEM_VALUE_TESS_LEVEL_INNER, array(float_t, 2), + "gl_TessLevelInner"); +} + + +/** + * Generate variables which only exist in geometry shaders. + */ +void +builtin_variable_generator::generate_gs_special_vars() +{ + ir_variable *var; + + var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + if (state->is_version(410, 0) || state->ARB_viewport_array_enable) { + var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) + add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID"); + + /* Although gl_PrimitiveID appears in tessellation control and tessellation + * evaluation shaders, it has a different function there than it has in + * geometry shaders, so we treat it (and its counterpart gl_PrimitiveIDIn) + * as special geometry shader variables. + * + * Note that although the general convention of suffixing geometry shader + * input varyings with "In" was not adopted into GLSL 1.50, it is used in + * the specific case of gl_PrimitiveIDIn. So we don't need to treat + * gl_PrimitiveIDIn as an {ARB,EXT}_geometry_shader4-only variable. + */ + var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveIDIn"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + var = add_output(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; +} + + +/** + * Generate variables which only exist in fragment shaders. + */ +void +builtin_variable_generator::generate_fs_special_vars() +{ + ir_variable *var; + + if (this->state->ctx->Const.GLSLFragCoordIsSysVal) + add_system_value(SYSTEM_VALUE_FRAG_COORD, vec4_t, "gl_FragCoord"); + else + add_input(VARYING_SLOT_POS, vec4_t, "gl_FragCoord"); + + if (this->state->ctx->Const.GLSLFrontFacingIsSysVal) + add_system_value(SYSTEM_VALUE_FRONT_FACE, bool_t, "gl_FrontFacing"); + else + add_input(VARYING_SLOT_FACE, bool_t, "gl_FrontFacing"); + + if (state->is_version(120, 100)) + add_input(VARYING_SLOT_PNTC, vec2_t, "gl_PointCoord"); + + if (state->has_geometry_shader()) { + var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + + /* gl_FragColor and gl_FragData were deprecated starting in desktop GLSL + * 1.30, and were relegated to the compatibility profile in GLSL 4.20. + * They were removed from GLSL ES 3.00. + */ + if (compatibility || !state->is_version(420, 300)) { + add_output(FRAG_RESULT_COLOR, vec4_t, "gl_FragColor"); + add_output(FRAG_RESULT_DATA0, + array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData"); + } + + if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) { + /* We make an assumption here that there will only ever be one dual-source draw buffer + * In case this assumption is ever proven to be false, make sure to assert here + * since we don't handle this case. + * In practice, this issue will never arise since no hardware will support it. + */ + assert(state->Const.MaxDualSourceDrawBuffers <= 1); + add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, "gl_SecondaryFragColorEXT"); + add_index_output(FRAG_RESULT_DATA0, 1, + array(vec4_t, state->Const.MaxDualSourceDrawBuffers), + "gl_SecondaryFragDataEXT"); + } + + /* gl_FragDepth has always been in desktop GLSL, but did not appear in GLSL + * ES 1.00. + */ + if (state->is_version(110, 300)) + add_output(FRAG_RESULT_DEPTH, float_t, "gl_FragDepth"); + + if (state->ARB_shader_stencil_export_enable) { + ir_variable *const var = + add_output(FRAG_RESULT_STENCIL, int_t, "gl_FragStencilRefARB"); + if (state->ARB_shader_stencil_export_warn) + var->enable_extension_warning("GL_ARB_shader_stencil_export"); + } + + if (state->AMD_shader_stencil_export_enable) { + ir_variable *const var = + add_output(FRAG_RESULT_STENCIL, int_t, "gl_FragStencilRefAMD"); + if (state->AMD_shader_stencil_export_warn) + var->enable_extension_warning("GL_AMD_shader_stencil_export"); + } + + if (state->is_version(400, 0) || state->ARB_sample_shading_enable) { + add_system_value(SYSTEM_VALUE_SAMPLE_ID, int_t, "gl_SampleID"); + add_system_value(SYSTEM_VALUE_SAMPLE_POS, vec2_t, "gl_SamplePosition"); + /* From the ARB_sample_shading specification: + * "The number of elements in the array is ceil(<s>/32), where + * <s> is the maximum number of color samples supported by the + * implementation." + * Since no drivers expose more than 32x MSAA, we can simply set + * the array size to 1 rather than computing it. + */ + add_output(FRAG_RESULT_SAMPLE_MASK, array(int_t, 1), "gl_SampleMask"); + } + + if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) { + add_system_value(SYSTEM_VALUE_SAMPLE_MASK_IN, array(int_t, 1), "gl_SampleMaskIn"); + } + + if (state->is_version(430, 0) || state->ARB_fragment_layer_viewport_enable) { + var = add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + var = add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + + if (state->is_version(450, 310)/* || state->ARB_ES3_1_compatibility_enable*/) + add_system_value(SYSTEM_VALUE_HELPER_INVOCATION, bool_t, "gl_HelperInvocation"); +} + + +/** + * Generate variables which only exist in compute shaders. + */ +void +builtin_variable_generator::generate_cs_special_vars() +{ + add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, uvec3_t, + "gl_LocalInvocationID"); + add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID"); + add_system_value(SYSTEM_VALUE_NUM_WORK_GROUPS, uvec3_t, "gl_NumWorkGroups"); + add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0); + add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0); +} + + +/** + * Add a single "varying" variable. The variable's type and direction (input + * or output) are adjusted as appropriate for the type of shader being + * compiled. + */ +void +builtin_variable_generator::add_varying(int slot, const glsl_type *type, + const char *name) +{ + switch (state->stage) { + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + this->per_vertex_in.add_field(slot, type, name); + /* FALLTHROUGH */ + case MESA_SHADER_VERTEX: + this->per_vertex_out.add_field(slot, type, name); + break; + case MESA_SHADER_FRAGMENT: + add_input(slot, type, name); + break; + case MESA_SHADER_COMPUTE: + /* Compute shaders don't have varyings. */ + break; + } +} + + +/** + * Generate variables that are used to communicate data from one shader stage + * to the next ("varyings"). + */ +void +builtin_variable_generator::generate_varyings() +{ + /* gl_Position and gl_PointSize are not visible from fragment shaders. */ + if (state->stage != MESA_SHADER_FRAGMENT) { + add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position"); + add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize"); + } + + if (state->is_version(130, 0)) { + add_varying(VARYING_SLOT_CLIP_DIST0, array(float_t, 0), + "gl_ClipDistance"); + } + + if (compatibility) { + add_varying(VARYING_SLOT_TEX0, array(vec4_t, 0), "gl_TexCoord"); + add_varying(VARYING_SLOT_FOGC, float_t, "gl_FogFragCoord"); + if (state->stage == MESA_SHADER_FRAGMENT) { + add_varying(VARYING_SLOT_COL0, vec4_t, "gl_Color"); + add_varying(VARYING_SLOT_COL1, vec4_t, "gl_SecondaryColor"); + } else { + add_varying(VARYING_SLOT_CLIP_VERTEX, vec4_t, "gl_ClipVertex"); + add_varying(VARYING_SLOT_COL0, vec4_t, "gl_FrontColor"); + add_varying(VARYING_SLOT_BFC0, vec4_t, "gl_BackColor"); + add_varying(VARYING_SLOT_COL1, vec4_t, "gl_FrontSecondaryColor"); + add_varying(VARYING_SLOT_BFC1, vec4_t, "gl_BackSecondaryColor"); + } + } + + /* Section 7.1 (Built-In Language Variables) of the GLSL 4.00 spec + * says: + * + * "In the tessellation control language, built-in variables are + * intrinsically declared as: + * + * in gl_PerVertex { + * vec4 gl_Position; + * float gl_PointSize; + * float gl_ClipDistance[]; + * } gl_in[gl_MaxPatchVertices];" + */ + if (state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) { + const glsl_type *per_vertex_in_type = + this->per_vertex_in.construct_interface_instance(); + add_variable("gl_in", array(per_vertex_in_type, state->Const.MaxPatchVertices), + ir_var_shader_in, -1); + } + if (state->stage == MESA_SHADER_GEOMETRY) { + const glsl_type *per_vertex_in_type = + this->per_vertex_in.construct_interface_instance(); + add_variable("gl_in", array(per_vertex_in_type, 0), + ir_var_shader_in, -1); + } + if (state->stage == MESA_SHADER_TESS_CTRL) { + const glsl_type *per_vertex_out_type = + this->per_vertex_out.construct_interface_instance(); + add_variable("gl_out", array(per_vertex_out_type, 0), + ir_var_shader_out, -1); + } + if (state->stage == MESA_SHADER_VERTEX || + state->stage == MESA_SHADER_TESS_EVAL || + state->stage == MESA_SHADER_GEOMETRY) { + const glsl_type *per_vertex_out_type = + this->per_vertex_out.construct_interface_instance(); + const glsl_struct_field *fields = per_vertex_out_type->fields.structure; + for (unsigned i = 0; i < per_vertex_out_type->length; i++) { + ir_variable *var = + add_variable(fields[i].name, fields[i].type, ir_var_shader_out, + fields[i].location); + var->data.interpolation = fields[i].interpolation; + var->data.centroid = fields[i].centroid; + var->data.sample = fields[i].sample; + var->data.patch = fields[i].patch; + var->data.precision = fields[i].precision; + var->init_interface_type(per_vertex_out_type); + } + } +} + + +}; /* Anonymous namespace */ + + +void +_mesa_glsl_initialize_variables(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + builtin_variable_generator gen(instructions, state); + + gen.generate_constants(); + gen.generate_uniforms(); + + gen.generate_varyings(); + + switch (state->stage) { + case MESA_SHADER_VERTEX: + gen.generate_vs_special_vars(); + break; + case MESA_SHADER_TESS_CTRL: + gen.generate_tcs_special_vars(); + break; + case MESA_SHADER_TESS_EVAL: + gen.generate_tes_special_vars(); + break; + case MESA_SHADER_GEOMETRY: + gen.generate_gs_special_vars(); + break; + case MESA_SHADER_FRAGMENT: + gen.generate_fs_special_vars(); + break; + case MESA_SHADER_COMPUTE: + gen.generate_cs_special_vars(); + break; + } +} + + +/** + * Initialize compute shader variables with values that are derived from other + * compute shader variable. + */ +static void +initialize_cs_derived_variables(gl_shader *shader, + ir_function_signature *const main_sig) +{ + assert(shader->Stage == MESA_SHADER_COMPUTE); + + ir_variable *gl_GlobalInvocationID = + shader->symbols->get_variable("gl_GlobalInvocationID"); + assert(gl_GlobalInvocationID); + ir_variable *gl_WorkGroupID = + shader->symbols->get_variable("gl_WorkGroupID"); + assert(gl_WorkGroupID); + ir_variable *gl_WorkGroupSize = + shader->symbols->get_variable("gl_WorkGroupSize"); + if (gl_WorkGroupSize == NULL) { + void *const mem_ctx = ralloc_parent(shader->ir); + gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type, + "gl_WorkGroupSize", + ir_var_auto); + gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly; + gl_WorkGroupSize->data.read_only = true; + shader->ir->push_head(gl_WorkGroupSize); + } + ir_variable *gl_LocalInvocationID = + shader->symbols->get_variable("gl_LocalInvocationID"); + assert(gl_LocalInvocationID); + + /* gl_GlobalInvocationID = + * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID + */ + ir_instruction *inst = + assign(gl_GlobalInvocationID, + add(mul(gl_WorkGroupID, gl_WorkGroupSize), + gl_LocalInvocationID)); + main_sig->body.push_head(inst); + + /* gl_LocalInvocationIndex = + * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + + * gl_LocalInvocationID.y * gl_WorkGroupSize.x + + * gl_LocalInvocationID.x; + */ + ir_expression *index_z = + mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)), + swizzle_y(gl_WorkGroupSize)); + ir_expression *index_y = + mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)); + ir_expression *index_y_plus_z = add(index_y, index_z); + operand index_x(swizzle_x(gl_LocalInvocationID)); + ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x); + ir_variable *gl_LocalInvocationIndex = + shader->symbols->get_variable("gl_LocalInvocationIndex"); + assert(gl_LocalInvocationIndex); + inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z); + main_sig->body.push_head(inst); +} + + +/** + * Initialize builtin variables with values based on other builtin variables. + * These are initialized in the main function. + */ +void +_mesa_glsl_initialize_derived_variables(gl_shader *shader) +{ + /* We only need to set CS variables currently. */ + if (shader->Stage != MESA_SHADER_COMPUTE) + return; + + ir_function_signature *const main_sig = + _mesa_get_main_function_signature(shader); + if (main_sig == NULL) + return; + + initialize_cs_derived_variables(shader, main_sig); +} diff --git a/src/compiler/glsl/glcpp/.gitignore b/src/compiler/glsl/glcpp/.gitignore new file mode 100644 index 0000000..24a7119 --- /dev/null +++ b/src/compiler/glsl/glcpp/.gitignore @@ -0,0 +1,6 @@ +glcpp +glcpp-lex.c +glcpp-parse.output +glcpp-parse.c +glcpp-parse.h +tests/*.out diff --git a/src/compiler/glsl/glcpp/README b/src/compiler/glsl/glcpp/README new file mode 100644 index 0000000..0637935 --- /dev/null +++ b/src/compiler/glsl/glcpp/README @@ -0,0 +1,30 @@ +glcpp -- GLSL "C" preprocessor + +This is a simple preprocessor designed to provide the preprocessing +needs of the GLSL language. The requirements for this preprocessor are +specified in the GLSL 1.30 specification availble from: + +http://www.opengl.org/registry/doc/GLSLangSpec.Full.1.30.10.pdf + +This specification is not precise on some semantics, (for example, +#define and #if), defining these merely "as is standard for C++ +preprocessors". To fill in these details, I've been using a draft of +the C99 standard as available from: + +http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf + +Any downstream compiler accepting output from glcpp should be prepared +to encounter and deal with the following preprocessor macros: + + #line + #pragma + #extension + +All other macros will be handled according to the GLSL specification +and will not appear in the output. + +Known limitations +----------------- +A file that ends with a function-like macro name as the last +non-whitespace token will result in a parse error, (where it should be +passed through as is).
\ No newline at end of file diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l b/src/compiler/glsl/glcpp/glcpp-lex.l new file mode 100644 index 0000000..fa9aa50 --- /dev/null +++ b/src/compiler/glsl/glcpp/glcpp-lex.l @@ -0,0 +1,577 @@ +%{ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <string.h> +#include <ctype.h> + +#include "glcpp.h" +#include "glcpp-parse.h" + +/* Flex annoyingly generates some functions without making them + * static. Let's declare them here. */ +int glcpp_get_column (yyscan_t yyscanner); +void glcpp_set_column (int column_no , yyscan_t yyscanner); + +#ifdef _MSC_VER +#define YY_NO_UNISTD_H +#endif + +#define YY_NO_INPUT + +#define YY_USER_ACTION \ + do { \ + if (parser->has_new_line_number) \ + yylineno = parser->new_line_number; \ + if (parser->has_new_source_number) \ + yylloc->source = parser->new_source_number; \ + yylloc->first_column = yycolumn + 1; \ + yylloc->first_line = yylloc->last_line = yylineno; \ + yycolumn += yyleng; \ + yylloc->last_column = yycolumn + 1; \ + parser->has_new_line_number = 0; \ + parser->has_new_source_number = 0; \ + } while(0); + +#define YY_USER_INIT \ + do { \ + yylineno = 1; \ + yycolumn = 0; \ + yylloc->source = 0; \ + } while(0) + +/* It's ugly to have macros that have return statements inside of + * them, but flex-based lexer generation is all built around the + * return statement. + * + * To mitigate the ugliness, we defer as much of the logic as possible + * to an actual function, not a macro (see + * glcpplex_update_state_per_token) and we make the word RETURN + * prominent in all of the macros which may return. + * + * The most-commonly-used macro is RETURN_TOKEN which will perform all + * necessary state updates based on the provided token,, then + * conditionally return the token. It will not return a token if the + * parser is currently skipping tokens, (such as within #if + * 0...#else). + * + * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that + * makes the token returning unconditional. This is needed for things + * like #if and the tokens of its condition, (since these must be + * evaluated by the parser even when otherwise skipping). + * + * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top + * of RETURN_TOKEN that performs a string copy of yytext before the + * return. + */ +#define RETURN_TOKEN_NEVER_SKIP(token) \ + do { \ + if (glcpp_lex_update_state_per_token (parser, token)) \ + return token; \ + } while (0) + +#define RETURN_TOKEN(token) \ + do { \ + if (! parser->skipping) { \ + RETURN_TOKEN_NEVER_SKIP(token); \ + } \ + } while(0) + +#define RETURN_STRING_TOKEN(token) \ + do { \ + if (! parser->skipping) { \ + yylval->str = ralloc_strdup (yyextra, yytext); \ + RETURN_TOKEN_NEVER_SKIP (token); \ + } \ + } while(0) + + +/* Update all state necessary for each token being returned. + * + * Here we'll be tracking newlines and spaces so that the lexer can + * alter its behavior as necessary, (for example, '#' has special + * significance if it is the first non-whitespace, non-comment token + * in a line, but does not otherwise). + * + * NOTE: If this function returns FALSE, then no token should be + * returned at all. This is used to suprress duplicate SPACE tokens. + */ +static int +glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token) +{ + /* After the first non-space token in a line, we won't + * allow any '#' to introduce a directive. */ + if (token == NEWLINE) { + parser->first_non_space_token_this_line = 1; + } else if (token != SPACE) { + parser->first_non_space_token_this_line = 0; + } + + /* Track newlines just to know whether a newline needs + * to be inserted if end-of-file comes early. */ + if (token == NEWLINE) { + parser->last_token_was_newline = 1; + } else { + parser->last_token_was_newline = 0; + } + + /* Track spaces to avoid emitting multiple SPACE + * tokens in a row. */ + if (token == SPACE) { + if (! parser->last_token_was_space) { + parser->last_token_was_space = 1; + return 1; + } else { + parser->last_token_was_space = 1; + return 0; + } + } else { + parser->last_token_was_space = 0; + return 1; + } +} + + +%} + +%option bison-bridge bison-locations reentrant noyywrap +%option extra-type="glcpp_parser_t *" +%option prefix="glcpp_" +%option stack +%option never-interactive +%option warn nodefault + + /* Note: When adding any start conditions to this list, you must also + * update the "Internal compiler error" catch-all rule near the end of + * this file. */ + +%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE + +SPACE [[:space:]] +NONSPACE [^[:space:]] +HSPACE [ \t] +HASH # +NEWLINE (\r\n|\n\r|\r|\n) +IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* +PP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])* +PUNCTUATION [][(){}.&*~!/%<>^|;,=+-] + +/* The OTHER class is simply a catch-all for things that the CPP +parser just doesn't care about. Since flex regular expressions that +match longer strings take priority over those matching shorter +strings, we have to be careful to avoid OTHER matching and hiding +something that CPP does care about. So we simply exclude all +characters that appear in any other expressions. */ + +OTHER [^][_#[:space:]#a-zA-Z0-9(){}.&*~!/%<>^|;,=+-] + +DIGITS [0-9][0-9]* +DECIMAL_INTEGER [1-9][0-9]*[uU]? +OCTAL_INTEGER 0[0-7]*[uU]? +HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? + +%% + + glcpp_parser_t *parser = yyextra; + + /* When we lex a multi-line comment, we replace it (as + * specified) with a single space. But if the comment spanned + * multiple lines, then subsequent parsing stages will not + * count correct line numbers. To avoid this problem we keep + * track of all newlines that were commented out by a + * multi-line comment, and we emit a NEWLINE token for each at + * the next legal opportunity, (which is when the lexer would + * be emitting a NEWLINE token anyway). + */ + if (YY_START == NEWLINE_CATCHUP) { + if (parser->commented_newlines) + parser->commented_newlines--; + if (parser->commented_newlines == 0) + BEGIN INITIAL; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); + } + + /* Set up the parser->skipping bit here before doing any lexing. + * + * This bit controls whether tokens are skipped, (as implemented by + * RETURN_TOKEN), such as between "#if 0" and "#endif". + * + * The parser maintains a skip_stack indicating whether we should be + * skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will + * push and pop items from the stack. + * + * Here are the rules for determining whether we are skipping: + * + * 1. If the skip stack is NULL, we are outside of all #if blocks + * and we are not skipping. + * + * 2. If the skip stack is non-NULL, the type of the top node in + * the stack determines whether to skip. A type of + * SKIP_NO_SKIP is used for blocks wheere we are emitting + * tokens, (such as between #if 1 and #endif, or after the + * #else of an #if 0, etc.). + * + * 3. The lexing_directive bit overrides the skip stack. This bit + * is set when we are actively lexing the expression for a + * pre-processor condition, (such as #if, #elif, or #else). In + * this case, even if otherwise skipping, we need to emit the + * tokens for this condition so that the parser can evaluate + * the expression. (For, #else, there's no expression, but we + * emit tokens so the parser can generate a nice error message + * if there are any tokens here). + */ + if (parser->skip_stack && + parser->skip_stack->type != SKIP_NO_SKIP && + ! parser->lexing_directive) + { + parser->skipping = 1; + } else { + parser->skipping = 0; + } + + /* Single-line comments */ +<INITIAL,DEFINE,HASH>"//"[^\r\n]* { +} + + /* Multi-line comments */ +<INITIAL,DEFINE,HASH>"/*" { yy_push_state(COMMENT, yyscanner); } +<COMMENT>[^*\r\n]* +<COMMENT>[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } +<COMMENT>"*"+[^*/\r\n]* +<COMMENT>"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } +<COMMENT>"*"+"/" { + yy_pop_state(yyscanner); + /* In the <HASH> start condition, we don't want any SPACE token. */ + if (yyextra->space_tokens && YY_START != HASH) + RETURN_TOKEN (SPACE); +} + +{HASH} { + + /* If the '#' is the first non-whitespace, non-comment token on this + * line, then it introduces a directive, switch to the <HASH> start + * condition. + * + * Otherwise, this is just punctuation, so return the HASH_TOKEN + * token. */ + if (parser->first_non_space_token_this_line) { + BEGIN HASH; + } + + RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN); +} + +<HASH>version{HSPACE}+ { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_STRING_TOKEN (VERSION_TOKEN); +} + + /* Swallow empty #pragma directives, (to avoid confusing the + * downstream compiler). + * + * Note: We use a simple regular expression for the lookahead + * here. Specifically, we cannot use the complete {NEWLINE} expression + * since it uses alternation and we've found that there's a flex bug + * where using alternation in the lookahead portion of a pattern + * triggers a buffer overrun. */ +<HASH>pragma{HSPACE}*/[\r\n] { + BEGIN INITIAL; +} + + /* glcpp doesn't handle #extension, #version, or #pragma directives. + * Simply pass them through to the main compiler's lexer/parser. */ +<HASH>(extension|pragma)[^\r\n]* { + BEGIN INITIAL; + RETURN_STRING_TOKEN (PRAGMA); +} + +<HASH>line{HSPACE}+ { + BEGIN INITIAL; + RETURN_TOKEN (LINE); +} + +<HASH>{NEWLINE} { + BEGIN INITIAL; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); +} + + /* For the pre-processor directives, we return these tokens + * even when we are otherwise skipping. */ +<HASH>ifdef { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (IFDEF); +} + +<HASH>ifndef { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (IFNDEF); +} + +<HASH>if/[^_a-zA-Z0-9] { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (IF); +} + +<HASH>elif/[^_a-zA-Z0-9] { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (ELIF); +} + +<HASH>else { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (ELSE); +} + +<HASH>endif { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (ENDIF); +} + +<HASH>error[^\r\n]* { + BEGIN INITIAL; + RETURN_STRING_TOKEN (ERROR_TOKEN); +} + + /* After we see a "#define" we enter the <DEFINE> start state + * for the lexer. Within <DEFINE> we are looking for the first + * identifier and specifically checking whether the identifier + * is followed by a '(' or not, (to lex either a + * FUNC_IDENTIFIER or an OBJ_IDENITIFIER token). + * + * While in the <DEFINE> state we also need to explicitly + * handle a few other things that may appear before the + * identifier: + * + * * Comments, (handled above with the main support for + * comments). + * + * * Whitespace (simply ignored) + * + * * Anything else, (not an identifier, not a comment, + * and not whitespace). This will generate an error. + */ +<HASH>define{HSPACE}* { + if (! parser->skipping) { + BEGIN DEFINE; + yyextra->space_tokens = 0; + RETURN_TOKEN (DEFINE_TOKEN); + } +} + +<HASH>undef { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_TOKEN (UNDEF); +} + +<HASH>{HSPACE}+ { + /* Nothing to do here. Importantly, don't leave the <HASH> + * start condition, since it's legal to have space between the + * '#' and the directive.. */ +} + + /* This will catch any non-directive garbage after a HASH */ +<HASH>{NONSPACE} { + BEGIN INITIAL; + RETURN_TOKEN (GARBAGE); +} + + /* An identifier immediately followed by '(' */ +<DEFINE>{IDENTIFIER}/"(" { + BEGIN INITIAL; + RETURN_STRING_TOKEN (FUNC_IDENTIFIER); +} + + /* An identifier not immediately followed by '(' */ +<DEFINE>{IDENTIFIER} { + BEGIN INITIAL; + RETURN_STRING_TOKEN (OBJ_IDENTIFIER); +} + + /* Whitespace */ +<DEFINE>{HSPACE}+ { + /* Just ignore it. Nothing to do here. */ +} + + /* '/' not followed by '*', so not a comment. This is an error. */ +<DEFINE>[/][^*]{NONSPACE}* { + BEGIN INITIAL; + glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); + RETURN_STRING_TOKEN (INTEGER_STRING); +} + + /* A character that can't start an identifier, comment, or + * space. This is an error. */ +<DEFINE>[^_a-zA-Z/[:space:]]{NONSPACE}* { + BEGIN INITIAL; + glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); + RETURN_STRING_TOKEN (INTEGER_STRING); +} + +{DECIMAL_INTEGER} { + RETURN_STRING_TOKEN (INTEGER_STRING); +} + +{OCTAL_INTEGER} { + RETURN_STRING_TOKEN (INTEGER_STRING); +} + +{HEXADECIMAL_INTEGER} { + RETURN_STRING_TOKEN (INTEGER_STRING); +} + +"<<" { + RETURN_TOKEN (LEFT_SHIFT); +} + +">>" { + RETURN_TOKEN (RIGHT_SHIFT); +} + +"<=" { + RETURN_TOKEN (LESS_OR_EQUAL); +} + +">=" { + RETURN_TOKEN (GREATER_OR_EQUAL); +} + +"==" { + RETURN_TOKEN (EQUAL); +} + +"!=" { + RETURN_TOKEN (NOT_EQUAL); +} + +"&&" { + RETURN_TOKEN (AND); +} + +"||" { + RETURN_TOKEN (OR); +} + +"++" { + RETURN_TOKEN (PLUS_PLUS); +} + +"--" { + RETURN_TOKEN (MINUS_MINUS); +} + +"##" { + if (! parser->skipping) { + if (parser->is_gles) + glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES"); + RETURN_TOKEN (PASTE); + } +} + +"defined" { + RETURN_TOKEN (DEFINED); +} + +{IDENTIFIER} { + RETURN_STRING_TOKEN (IDENTIFIER); +} + +{PP_NUMBER} { + RETURN_STRING_TOKEN (OTHER); +} + +{PUNCTUATION} { + RETURN_TOKEN (yytext[0]); +} + +{OTHER}+ { + RETURN_STRING_TOKEN (OTHER); +} + +{HSPACE} { + if (yyextra->space_tokens) { + RETURN_TOKEN (SPACE); + } +} + + /* We preserve all newlines, even between #if 0..#endif, so no + skipping.. */ +<*>{NEWLINE} { + if (parser->commented_newlines) { + BEGIN NEWLINE_CATCHUP; + } else { + BEGIN INITIAL; + } + yyextra->space_tokens = 1; + yyextra->lexing_directive = 0; + yylineno++; + yycolumn = 0; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); +} + +<INITIAL,COMMENT,DEFINE,HASH><<EOF>> { + if (YY_START == COMMENT) + glcpp_error(yylloc, yyextra, "Unterminated comment"); + BEGIN DONE; /* Don't keep matching this rule forever. */ + yyextra->lexing_directive = 0; + if (! parser->last_token_was_newline) + RETURN_TOKEN (NEWLINE); +} + + /* This is a catch-all to avoid the annoying default flex action which + * matches any character and prints it. If any input ever matches this + * rule, then we have made a mistake above and need to fix one or more + * of the preceding patterns to match that input. */ + +<*>. { + glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext); + + /* We don't actually use the UNREACHABLE start condition. We + only have this block here so that we can pretend to call some + generated functions, (to avoid "defined but not used" + warnings. */ + if (YY_START == UNREACHABLE) { + unput('.'); + yy_top_state(yyextra); + } +} + +%% + +void +glcpp_lex_set_source_string(glcpp_parser_t *parser, const char *shader) +{ + yy_scan_string(shader, parser->scanner); +} diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y new file mode 100644 index 0000000..ef1a657 --- /dev/null +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -0,0 +1,2557 @@ +%{ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <inttypes.h> + +#include "glcpp.h" +#include "main/core.h" /* for struct gl_extensions */ +#include "main/mtypes.h" /* for gl_api enum */ + +static void +yyerror (YYLTYPE *locp, glcpp_parser_t *parser, const char *error); + +static void +_define_object_macro (glcpp_parser_t *parser, + YYLTYPE *loc, + const char *macro, + token_list_t *replacements); + +static void +_define_function_macro (glcpp_parser_t *parser, + YYLTYPE *loc, + const char *macro, + string_list_t *parameters, + token_list_t *replacements); + +static string_list_t * +_string_list_create (void *ctx); + +static void +_string_list_append_item (string_list_t *list, const char *str); + +static int +_string_list_contains (string_list_t *list, const char *member, int *index); + +static const char * +_string_list_has_duplicate (string_list_t *list); + +static int +_string_list_length (string_list_t *list); + +static int +_string_list_equal (string_list_t *a, string_list_t *b); + +static argument_list_t * +_argument_list_create (void *ctx); + +static void +_argument_list_append (argument_list_t *list, token_list_t *argument); + +static int +_argument_list_length (argument_list_t *list); + +static token_list_t * +_argument_list_member_at (argument_list_t *list, int index); + +/* Note: This function ralloc_steal()s the str pointer. */ +static token_t * +_token_create_str (void *ctx, int type, char *str); + +static token_t * +_token_create_ival (void *ctx, int type, int ival); + +static token_list_t * +_token_list_create (void *ctx); + +static void +_token_list_append (token_list_t *list, token_t *token); + +static void +_token_list_append_list (token_list_t *list, token_list_t *tail); + +static int +_token_list_equal_ignoring_space (token_list_t *a, token_list_t *b); + +static void +_parser_active_list_push (glcpp_parser_t *parser, + const char *identifier, + token_node_t *marker); + +static void +_parser_active_list_pop (glcpp_parser_t *parser); + +static int +_parser_active_list_contains (glcpp_parser_t *parser, const char *identifier); + +typedef enum { + EXPANSION_MODE_IGNORE_DEFINED, + EXPANSION_MODE_EVALUATE_DEFINED +} expansion_mode_t; + +/* Expand list, and begin lexing from the result (after first + * prefixing a token of type 'head_token_type'). + */ +static void +_glcpp_parser_expand_and_lex_from (glcpp_parser_t *parser, + int head_token_type, + token_list_t *list, + expansion_mode_t mode); + +/* Perform macro expansion in-place on the given list. */ +static void +_glcpp_parser_expand_token_list (glcpp_parser_t *parser, + token_list_t *list, + expansion_mode_t mode); + +static void +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list); + +static void +_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, YYLTYPE *loc, + int condition); + +static void +_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, YYLTYPE *loc, + const char *type, int condition); + +static void +_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser, YYLTYPE *loc); + +static void +_glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t version, + const char *ident, bool explicitly_set); + +static int +glcpp_parser_lex (YYSTYPE *yylval, YYLTYPE *yylloc, glcpp_parser_t *parser); + +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); + +static void +add_builtin_define(glcpp_parser_t *parser, const char *name, int value); + +%} + +%pure-parser +%error-verbose + +%locations +%initial-action { + @$.first_line = 1; + @$.first_column = 1; + @$.last_line = 1; + @$.last_column = 1; + @$.source = 0; +} + +%parse-param {glcpp_parser_t *parser} +%lex-param {glcpp_parser_t *parser} + +%expect 0 + + /* We use HASH_TOKEN, DEFINE_TOKEN and VERSION_TOKEN (as opposed to + * HASH, DEFINE, and VERSION) to avoid conflicts with other symbols, + * (such as the <HASH> and <DEFINE> start conditions in the lexer). */ +%token DEFINED ELIF_EXPANDED HASH_TOKEN DEFINE_TOKEN FUNC_IDENTIFIER OBJ_IDENTIFIER ELIF ELSE ENDIF ERROR_TOKEN IF IFDEF IFNDEF LINE PRAGMA UNDEF VERSION_TOKEN GARBAGE IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING LINE_EXPANDED NEWLINE OTHER PLACEHOLDER SPACE PLUS_PLUS MINUS_MINUS +%token PASTE +%type <ival> INTEGER operator SPACE integer_constant +%type <expression_value> expression +%type <str> IDENTIFIER FUNC_IDENTIFIER OBJ_IDENTIFIER INTEGER_STRING OTHER ERROR_TOKEN PRAGMA +%type <string_list> identifier_list +%type <token> preprocessing_token +%type <token_list> pp_tokens replacement_list text_line +%left OR +%left AND +%left '|' +%left '^' +%left '&' +%left EQUAL NOT_EQUAL +%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL +%left LEFT_SHIFT RIGHT_SHIFT +%left '+' '-' +%left '*' '/' '%' +%right UNARY + +%debug + +%% + +input: + /* empty */ +| input line +; + +line: + control_line +| SPACE control_line +| text_line { + _glcpp_parser_print_expanded_token_list (parser, $1); + ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n"); + ralloc_free ($1); + } +| expanded_line +; + +expanded_line: + IF_EXPANDED expression NEWLINE { + if (parser->is_gles && $2.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); + _glcpp_parser_skip_stack_push_if (parser, & @1, $2.value); + } +| ELIF_EXPANDED expression NEWLINE { + if (parser->is_gles && $2.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); + _glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value); + } +| LINE_EXPANDED integer_constant NEWLINE { + parser->has_new_line_number = 1; + parser->new_line_number = $2; + ralloc_asprintf_rewrite_tail (&parser->output, + &parser->output_length, + "#line %" PRIiMAX "\n", + $2); + } +| LINE_EXPANDED integer_constant integer_constant NEWLINE { + parser->has_new_line_number = 1; + parser->new_line_number = $2; + parser->has_new_source_number = 1; + parser->new_source_number = $3; + ralloc_asprintf_rewrite_tail (&parser->output, + &parser->output_length, + "#line %" PRIiMAX " %" PRIiMAX "\n", + $2, $3); + } +; + +define: + OBJ_IDENTIFIER replacement_list NEWLINE { + _define_object_macro (parser, & @1, $1, $2); + } +| FUNC_IDENTIFIER '(' ')' replacement_list NEWLINE { + _define_function_macro (parser, & @1, $1, NULL, $4); + } +| FUNC_IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE { + _define_function_macro (parser, & @1, $1, $3, $5); + } +; + +control_line: + control_line_success { + ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n"); + } +| control_line_error +| HASH_TOKEN LINE { + glcpp_parser_resolve_implicit_version(parser); + } pp_tokens NEWLINE { + + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + _glcpp_parser_expand_and_lex_from (parser, + LINE_EXPANDED, $4, + EXPANSION_MODE_IGNORE_DEFINED); + } + } +; + +control_line_success: + HASH_TOKEN DEFINE_TOKEN { + glcpp_parser_resolve_implicit_version(parser); + } define +| HASH_TOKEN UNDEF { + glcpp_parser_resolve_implicit_version(parser); + } IDENTIFIER NEWLINE { + macro_t *macro; + if (strcmp("__LINE__", $4) == 0 + || strcmp("__FILE__", $4) == 0 + || strcmp("__VERSION__", $4) == 0 + || strncmp("GL_", $4, 3) == 0) + glcpp_error(& @1, parser, "Built-in (pre-defined)" + " macro names cannot be undefined."); + + macro = hash_table_find (parser->defines, $4); + if (macro) { + hash_table_remove (parser->defines, $4); + ralloc_free (macro); + } + ralloc_free ($4); + } +| HASH_TOKEN IF { + glcpp_parser_resolve_implicit_version(parser); + } pp_tokens NEWLINE { + /* Be careful to only evaluate the 'if' expression if + * we are not skipping. When we are skipping, we + * simply push a new 0-valued 'if' onto the skip + * stack. + * + * This avoids generating diagnostics for invalid + * expressions that are being skipped. */ + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + _glcpp_parser_expand_and_lex_from (parser, + IF_EXPANDED, $4, + EXPANSION_MODE_EVALUATE_DEFINED); + } + else + { + _glcpp_parser_skip_stack_push_if (parser, & @1, 0); + parser->skip_stack->type = SKIP_TO_ENDIF; + } + } +| HASH_TOKEN IF NEWLINE { + /* #if without an expression is only an error if we + * are not skipping */ + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + glcpp_error(& @1, parser, "#if with no expression"); + } + _glcpp_parser_skip_stack_push_if (parser, & @1, 0); + } +| HASH_TOKEN IFDEF { + glcpp_parser_resolve_implicit_version(parser); + } IDENTIFIER junk NEWLINE { + macro_t *macro = hash_table_find (parser->defines, $4); + ralloc_free ($4); + _glcpp_parser_skip_stack_push_if (parser, & @1, macro != NULL); + } +| HASH_TOKEN IFNDEF { + glcpp_parser_resolve_implicit_version(parser); + } IDENTIFIER junk NEWLINE { + macro_t *macro = hash_table_find (parser->defines, $4); + ralloc_free ($4); + _glcpp_parser_skip_stack_push_if (parser, & @3, macro == NULL); + } +| HASH_TOKEN ELIF pp_tokens NEWLINE { + /* Be careful to only evaluate the 'elif' expression + * if we are not skipping. When we are skipping, we + * simply change to a 0-valued 'elif' on the skip + * stack. + * + * This avoids generating diagnostics for invalid + * expressions that are being skipped. */ + if (parser->skip_stack && + parser->skip_stack->type == SKIP_TO_ELSE) + { + _glcpp_parser_expand_and_lex_from (parser, + ELIF_EXPANDED, $3, + EXPANSION_MODE_EVALUATE_DEFINED); + } + else if (parser->skip_stack && + parser->skip_stack->has_else) + { + glcpp_error(& @1, parser, "#elif after #else"); + } + else + { + _glcpp_parser_skip_stack_change_if (parser, & @1, + "elif", 0); + } + } +| HASH_TOKEN ELIF NEWLINE { + /* #elif without an expression is an error unless we + * are skipping. */ + if (parser->skip_stack && + parser->skip_stack->type == SKIP_TO_ELSE) + { + glcpp_error(& @1, parser, "#elif with no expression"); + } + else if (parser->skip_stack && + parser->skip_stack->has_else) + { + glcpp_error(& @1, parser, "#elif after #else"); + } + else + { + _glcpp_parser_skip_stack_change_if (parser, & @1, + "elif", 0); + glcpp_warning(& @1, parser, "ignoring illegal #elif without expression"); + } + } +| HASH_TOKEN ELSE { parser->lexing_directive = 1; } NEWLINE { + if (parser->skip_stack && + parser->skip_stack->has_else) + { + glcpp_error(& @1, parser, "multiple #else"); + } + else + { + _glcpp_parser_skip_stack_change_if (parser, & @1, "else", 1); + if (parser->skip_stack) + parser->skip_stack->has_else = true; + } + } +| HASH_TOKEN ENDIF { + _glcpp_parser_skip_stack_pop (parser, & @1); + } NEWLINE +| HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE { + if (parser->version_resolved) { + glcpp_error(& @1, parser, "#version must appear on the first line"); + } + _glcpp_parser_handle_version_declaration(parser, $3, NULL, true); + } +| HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE { + if (parser->version_resolved) { + glcpp_error(& @1, parser, "#version must appear on the first line"); + } + _glcpp_parser_handle_version_declaration(parser, $3, $4, true); + } +| HASH_TOKEN NEWLINE { + glcpp_parser_resolve_implicit_version(parser); + } +| HASH_TOKEN PRAGMA NEWLINE { + ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "#%s", $2); + } +; + +control_line_error: + HASH_TOKEN ERROR_TOKEN NEWLINE { + glcpp_error(& @1, parser, "#%s", $2); + } +| HASH_TOKEN DEFINE_TOKEN NEWLINE { + glcpp_error (& @1, parser, "#define without macro name"); + } +| HASH_TOKEN GARBAGE pp_tokens NEWLINE { + glcpp_error (& @1, parser, "Illegal non-directive after #"); + } +; + +integer_constant: + INTEGER_STRING { + if (strlen ($1) >= 3 && strncmp ($1, "0x", 2) == 0) { + $$ = strtoll ($1 + 2, NULL, 16); + } else if ($1[0] == '0') { + $$ = strtoll ($1, NULL, 8); + } else { + $$ = strtoll ($1, NULL, 10); + } + } +| INTEGER { + $$ = $1; + } + +expression: + integer_constant { + $$.value = $1; + $$.undefined_macro = NULL; + } +| IDENTIFIER { + $$.value = 0; + if (parser->is_gles) + $$.undefined_macro = ralloc_strdup (parser, $1); + else + $$.undefined_macro = NULL; + } +| expression OR expression { + $$.value = $1.value || $3.value; + + /* Short-circuit: Only flag undefined from right side + * if left side evaluates to false. + */ + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else if (! $1.value) + $$.undefined_macro = $3.undefined_macro; + } +| expression AND expression { + $$.value = $1.value && $3.value; + + /* Short-circuit: Only flag undefined from right-side + * if left side evaluates to true. + */ + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else if ($1.value) + $$.undefined_macro = $3.undefined_macro; + } +| expression '|' expression { + $$.value = $1.value | $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '^' expression { + $$.value = $1.value ^ $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '&' expression { + $$.value = $1.value & $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression NOT_EQUAL expression { + $$.value = $1.value != $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression EQUAL expression { + $$.value = $1.value == $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression GREATER_OR_EQUAL expression { + $$.value = $1.value >= $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression LESS_OR_EQUAL expression { + $$.value = $1.value <= $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '>' expression { + $$.value = $1.value > $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '<' expression { + $$.value = $1.value < $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression RIGHT_SHIFT expression { + $$.value = $1.value >> $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression LEFT_SHIFT expression { + $$.value = $1.value << $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '-' expression { + $$.value = $1.value - $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '+' expression { + $$.value = $1.value + $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '%' expression { + if ($3.value == 0) { + yyerror (& @1, parser, + "zero modulus in preprocessor directive"); + } else { + $$.value = $1.value % $3.value; + } + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '/' expression { + if ($3.value == 0) { + yyerror (& @1, parser, + "division by 0 in preprocessor directive"); + } else { + $$.value = $1.value / $3.value; + } + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '*' expression { + $$.value = $1.value * $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| '!' expression %prec UNARY { + $$.value = ! $2.value; + $$.undefined_macro = $2.undefined_macro; + } +| '~' expression %prec UNARY { + $$.value = ~ $2.value; + $$.undefined_macro = $2.undefined_macro; + } +| '-' expression %prec UNARY { + $$.value = - $2.value; + $$.undefined_macro = $2.undefined_macro; + } +| '+' expression %prec UNARY { + $$.value = + $2.value; + $$.undefined_macro = $2.undefined_macro; + } +| '(' expression ')' { + $$ = $2; + } +; + +identifier_list: + IDENTIFIER { + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); + ralloc_steal ($$, $1); + } +| identifier_list ',' IDENTIFIER { + $$ = $1; + _string_list_append_item ($$, $3); + ralloc_steal ($$, $3); + } +; + +text_line: + NEWLINE { $$ = NULL; } +| pp_tokens NEWLINE +; + +replacement_list: + /* empty */ { $$ = NULL; } +| pp_tokens +; + +junk: + /* empty */ +| pp_tokens { + glcpp_error(&@1, parser, "extra tokens at end of directive"); + } +; + +pp_tokens: + preprocessing_token { + parser->space_tokens = 1; + $$ = _token_list_create (parser); + _token_list_append ($$, $1); + } +| pp_tokens preprocessing_token { + $$ = $1; + _token_list_append ($$, $2); + } +; + +preprocessing_token: + IDENTIFIER { + $$ = _token_create_str (parser, IDENTIFIER, $1); + $$->location = yylloc; + } +| INTEGER_STRING { + $$ = _token_create_str (parser, INTEGER_STRING, $1); + $$->location = yylloc; + } +| operator { + $$ = _token_create_ival (parser, $1, $1); + $$->location = yylloc; + } +| DEFINED { + $$ = _token_create_ival (parser, DEFINED, DEFINED); + $$->location = yylloc; + } +| OTHER { + $$ = _token_create_str (parser, OTHER, $1); + $$->location = yylloc; + } +| SPACE { + $$ = _token_create_ival (parser, SPACE, SPACE); + $$->location = yylloc; + } +; + +operator: + '[' { $$ = '['; } +| ']' { $$ = ']'; } +| '(' { $$ = '('; } +| ')' { $$ = ')'; } +| '{' { $$ = '{'; } +| '}' { $$ = '}'; } +| '.' { $$ = '.'; } +| '&' { $$ = '&'; } +| '*' { $$ = '*'; } +| '+' { $$ = '+'; } +| '-' { $$ = '-'; } +| '~' { $$ = '~'; } +| '!' { $$ = '!'; } +| '/' { $$ = '/'; } +| '%' { $$ = '%'; } +| LEFT_SHIFT { $$ = LEFT_SHIFT; } +| RIGHT_SHIFT { $$ = RIGHT_SHIFT; } +| '<' { $$ = '<'; } +| '>' { $$ = '>'; } +| LESS_OR_EQUAL { $$ = LESS_OR_EQUAL; } +| GREATER_OR_EQUAL { $$ = GREATER_OR_EQUAL; } +| EQUAL { $$ = EQUAL; } +| NOT_EQUAL { $$ = NOT_EQUAL; } +| '^' { $$ = '^'; } +| '|' { $$ = '|'; } +| AND { $$ = AND; } +| OR { $$ = OR; } +| ';' { $$ = ';'; } +| ',' { $$ = ','; } +| '=' { $$ = '='; } +| PASTE { $$ = PASTE; } +| PLUS_PLUS { $$ = PLUS_PLUS; } +| MINUS_MINUS { $$ = MINUS_MINUS; } +; + +%% + +string_list_t * +_string_list_create (void *ctx) +{ + string_list_t *list; + + list = ralloc (ctx, string_list_t); + list->head = NULL; + list->tail = NULL; + + return list; +} + +void +_string_list_append_item (string_list_t *list, const char *str) +{ + string_node_t *node; + + node = ralloc (list, string_node_t); + node->str = ralloc_strdup (node, str); + + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; +} + +int +_string_list_contains (string_list_t *list, const char *member, int *index) +{ + string_node_t *node; + int i; + + if (list == NULL) + return 0; + + for (i = 0, node = list->head; node; i++, node = node->next) { + if (strcmp (node->str, member) == 0) { + if (index) + *index = i; + return 1; + } + } + + return 0; +} + +/* Return duplicate string in list (if any), NULL otherwise. */ +const char * +_string_list_has_duplicate (string_list_t *list) +{ + string_node_t *node, *dup; + + if (list == NULL) + return NULL; + + for (node = list->head; node; node = node->next) { + for (dup = node->next; dup; dup = dup->next) { + if (strcmp (node->str, dup->str) == 0) + return node->str; + } + } + + return NULL; +} + +int +_string_list_length (string_list_t *list) +{ + int length = 0; + string_node_t *node; + + if (list == NULL) + return 0; + + for (node = list->head; node; node = node->next) + length++; + + return length; +} + +int +_string_list_equal (string_list_t *a, string_list_t *b) +{ + string_node_t *node_a, *node_b; + + if (a == NULL && b == NULL) + return 1; + + if (a == NULL || b == NULL) + return 0; + + for (node_a = a->head, node_b = b->head; + node_a && node_b; + node_a = node_a->next, node_b = node_b->next) + { + if (strcmp (node_a->str, node_b->str)) + return 0; + } + + /* Catch the case of lists being different lengths, (which + * would cause the loop above to terminate after the shorter + * list). */ + return node_a == node_b; +} + +argument_list_t * +_argument_list_create (void *ctx) +{ + argument_list_t *list; + + list = ralloc (ctx, argument_list_t); + list->head = NULL; + list->tail = NULL; + + return list; +} + +void +_argument_list_append (argument_list_t *list, token_list_t *argument) +{ + argument_node_t *node; + + node = ralloc (list, argument_node_t); + node->argument = argument; + + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; +} + +int +_argument_list_length (argument_list_t *list) +{ + int length = 0; + argument_node_t *node; + + if (list == NULL) + return 0; + + for (node = list->head; node; node = node->next) + length++; + + return length; +} + +token_list_t * +_argument_list_member_at (argument_list_t *list, int index) +{ + argument_node_t *node; + int i; + + if (list == NULL) + return NULL; + + node = list->head; + for (i = 0; i < index; i++) { + node = node->next; + if (node == NULL) + break; + } + + if (node) + return node->argument; + + return NULL; +} + +/* Note: This function ralloc_steal()s the str pointer. */ +token_t * +_token_create_str (void *ctx, int type, char *str) +{ + token_t *token; + + token = ralloc (ctx, token_t); + token->type = type; + token->value.str = str; + + ralloc_steal (token, str); + + return token; +} + +token_t * +_token_create_ival (void *ctx, int type, int ival) +{ + token_t *token; + + token = ralloc (ctx, token_t); + token->type = type; + token->value.ival = ival; + + return token; +} + +token_list_t * +_token_list_create (void *ctx) +{ + token_list_t *list; + + list = ralloc (ctx, token_list_t); + list->head = NULL; + list->tail = NULL; + list->non_space_tail = NULL; + + return list; +} + +void +_token_list_append (token_list_t *list, token_t *token) +{ + token_node_t *node; + + node = ralloc (list, token_node_t); + node->token = token; + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; + if (token->type != SPACE) + list->non_space_tail = node; +} + +void +_token_list_append_list (token_list_t *list, token_list_t *tail) +{ + if (tail == NULL || tail->head == NULL) + return; + + if (list->head == NULL) { + list->head = tail->head; + } else { + list->tail->next = tail->head; + } + + list->tail = tail->tail; + list->non_space_tail = tail->non_space_tail; +} + +static token_list_t * +_token_list_copy (void *ctx, token_list_t *other) +{ + token_list_t *copy; + token_node_t *node; + + if (other == NULL) + return NULL; + + copy = _token_list_create (ctx); + for (node = other->head; node; node = node->next) { + token_t *new_token = ralloc (copy, token_t); + *new_token = *node->token; + _token_list_append (copy, new_token); + } + + return copy; +} + +static void +_token_list_trim_trailing_space (token_list_t *list) +{ + token_node_t *tail, *next; + + if (list->non_space_tail) { + tail = list->non_space_tail->next; + list->non_space_tail->next = NULL; + list->tail = list->non_space_tail; + + while (tail) { + next = tail->next; + ralloc_free (tail); + tail = next; + } + } +} + +static int +_token_list_is_empty_ignoring_space (token_list_t *l) +{ + token_node_t *n; + + if (l == NULL) + return 1; + + n = l->head; + while (n != NULL && n->token->type == SPACE) + n = n->next; + + return n == NULL; +} + +int +_token_list_equal_ignoring_space (token_list_t *a, token_list_t *b) +{ + token_node_t *node_a, *node_b; + + if (a == NULL || b == NULL) { + int a_empty = _token_list_is_empty_ignoring_space(a); + int b_empty = _token_list_is_empty_ignoring_space(b); + return a_empty == b_empty; + } + + node_a = a->head; + node_b = b->head; + + while (1) + { + if (node_a == NULL && node_b == NULL) + break; + + if (node_a == NULL || node_b == NULL) + return 0; + /* Make sure whitespace appears in the same places in both. + * It need not be exactly the same amount of whitespace, + * though. + */ + if (node_a->token->type == SPACE + && node_b->token->type == SPACE) { + while (node_a && node_a->token->type == SPACE) + node_a = node_a->next; + while (node_b && node_b->token->type == SPACE) + node_b = node_b->next; + continue; + } + + if (node_a->token->type != node_b->token->type) + return 0; + + switch (node_a->token->type) { + case INTEGER: + if (node_a->token->value.ival != + node_b->token->value.ival) + { + return 0; + } + break; + case IDENTIFIER: + case INTEGER_STRING: + case OTHER: + if (strcmp (node_a->token->value.str, + node_b->token->value.str)) + { + return 0; + } + break; + } + + node_a = node_a->next; + node_b = node_b->next; + } + + return 1; +} + +static void +_token_print (char **out, size_t *len, token_t *token) +{ + if (token->type < 256) { + ralloc_asprintf_rewrite_tail (out, len, "%c", token->type); + return; + } + + switch (token->type) { + case INTEGER: + ralloc_asprintf_rewrite_tail (out, len, "%" PRIiMAX, token->value.ival); + break; + case IDENTIFIER: + case INTEGER_STRING: + case OTHER: + ralloc_asprintf_rewrite_tail (out, len, "%s", token->value.str); + break; + case SPACE: + ralloc_asprintf_rewrite_tail (out, len, " "); + break; + case LEFT_SHIFT: + ralloc_asprintf_rewrite_tail (out, len, "<<"); + break; + case RIGHT_SHIFT: + ralloc_asprintf_rewrite_tail (out, len, ">>"); + break; + case LESS_OR_EQUAL: + ralloc_asprintf_rewrite_tail (out, len, "<="); + break; + case GREATER_OR_EQUAL: + ralloc_asprintf_rewrite_tail (out, len, ">="); + break; + case EQUAL: + ralloc_asprintf_rewrite_tail (out, len, "=="); + break; + case NOT_EQUAL: + ralloc_asprintf_rewrite_tail (out, len, "!="); + break; + case AND: + ralloc_asprintf_rewrite_tail (out, len, "&&"); + break; + case OR: + ralloc_asprintf_rewrite_tail (out, len, "||"); + break; + case PASTE: + ralloc_asprintf_rewrite_tail (out, len, "##"); + break; + case PLUS_PLUS: + ralloc_asprintf_rewrite_tail (out, len, "++"); + break; + case MINUS_MINUS: + ralloc_asprintf_rewrite_tail (out, len, "--"); + break; + case DEFINED: + ralloc_asprintf_rewrite_tail (out, len, "defined"); + break; + case PLACEHOLDER: + /* Nothing to print. */ + break; + default: + assert(!"Error: Don't know how to print token."); + + break; + } +} + +/* Return a new token (ralloc()ed off of 'token') formed by pasting + * 'token' and 'other'. Note that this function may return 'token' or + * 'other' directly rather than allocating anything new. + * + * Caution: Only very cursory error-checking is performed to see if + * the final result is a valid single token. */ +static token_t * +_token_paste (glcpp_parser_t *parser, token_t *token, token_t *other) +{ + token_t *combined = NULL; + + /* Pasting a placeholder onto anything makes no change. */ + if (other->type == PLACEHOLDER) + return token; + + /* When 'token' is a placeholder, just return 'other'. */ + if (token->type == PLACEHOLDER) + return other; + + /* A very few single-character punctuators can be combined + * with another to form a multi-character punctuator. */ + switch (token->type) { + case '<': + if (other->type == '<') + combined = _token_create_ival (token, LEFT_SHIFT, LEFT_SHIFT); + else if (other->type == '=') + combined = _token_create_ival (token, LESS_OR_EQUAL, LESS_OR_EQUAL); + break; + case '>': + if (other->type == '>') + combined = _token_create_ival (token, RIGHT_SHIFT, RIGHT_SHIFT); + else if (other->type == '=') + combined = _token_create_ival (token, GREATER_OR_EQUAL, GREATER_OR_EQUAL); + break; + case '=': + if (other->type == '=') + combined = _token_create_ival (token, EQUAL, EQUAL); + break; + case '!': + if (other->type == '=') + combined = _token_create_ival (token, NOT_EQUAL, NOT_EQUAL); + break; + case '&': + if (other->type == '&') + combined = _token_create_ival (token, AND, AND); + break; + case '|': + if (other->type == '|') + combined = _token_create_ival (token, OR, OR); + break; + } + + if (combined != NULL) { + /* Inherit the location from the first token */ + combined->location = token->location; + return combined; + } + + /* Two string-valued (or integer) tokens can usually just be + * mashed together. (We also handle a string followed by an + * integer here as well.) + * + * There are some exceptions here. Notably, if the first token + * is an integer (or a string representing an integer), then + * the second token must also be an integer or must be a + * string representing an integer that begins with a digit. + */ + if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING || token->type == INTEGER) && + (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING || other->type == INTEGER)) + { + char *str; + int combined_type; + + /* Check that pasting onto an integer doesn't create a + * non-integer, (that is, only digits can be + * pasted. */ + if (token->type == INTEGER_STRING || token->type == INTEGER) + { + switch (other->type) { + case INTEGER_STRING: + if (other->value.str[0] < '0' || + other->value.str[0] > '9') + goto FAIL; + break; + case INTEGER: + if (other->value.ival < 0) + goto FAIL; + break; + default: + goto FAIL; + } + } + + if (token->type == INTEGER) + str = ralloc_asprintf (token, "%" PRIiMAX, + token->value.ival); + else + str = ralloc_strdup (token, token->value.str); + + + if (other->type == INTEGER) + ralloc_asprintf_append (&str, "%" PRIiMAX, + other->value.ival); + else + ralloc_strcat (&str, other->value.str); + + /* New token is same type as original token, unless we + * started with an integer, in which case we will be + * creating an integer-string. */ + combined_type = token->type; + if (combined_type == INTEGER) + combined_type = INTEGER_STRING; + + combined = _token_create_str (token, combined_type, str); + combined->location = token->location; + return combined; + } + + FAIL: + glcpp_error (&token->location, parser, ""); + ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "Pasting \""); + _token_print (&parser->info_log, &parser->info_log_length, token); + ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" and \""); + _token_print (&parser->info_log, &parser->info_log_length, other); + ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" does not give a valid preprocessing token.\n"); + + return token; +} + +static void +_token_list_print (glcpp_parser_t *parser, token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) + _token_print (&parser->output, &parser->output_length, node->token); +} + +void +yyerror (YYLTYPE *locp, glcpp_parser_t *parser, const char *error) +{ + glcpp_error(locp, parser, "%s", error); +} + +static void add_builtin_define(glcpp_parser_t *parser, + const char *name, int value) +{ + token_t *tok; + token_list_t *list; + + tok = _token_create_ival (parser, INTEGER, value); + + list = _token_list_create(parser); + _token_list_append(list, tok); + _define_object_macro(parser, NULL, name, list); +} + +glcpp_parser_t * +glcpp_parser_create (const struct gl_extensions *extensions, gl_api api) +{ + glcpp_parser_t *parser; + + parser = ralloc (NULL, glcpp_parser_t); + + glcpp_lex_init_extra (parser, &parser->scanner); + parser->defines = hash_table_ctor (32, hash_table_string_hash, + hash_table_string_compare); + parser->active = NULL; + parser->lexing_directive = 0; + parser->space_tokens = 1; + parser->last_token_was_newline = 0; + parser->last_token_was_space = 0; + parser->first_non_space_token_this_line = 1; + parser->newline_as_space = 0; + parser->in_control_line = 0; + parser->paren_count = 0; + parser->commented_newlines = 0; + + parser->skip_stack = NULL; + parser->skipping = 0; + + parser->lex_from_list = NULL; + parser->lex_from_node = NULL; + + parser->output = ralloc_strdup(parser, ""); + parser->output_length = 0; + parser->info_log = ralloc_strdup(parser, ""); + parser->info_log_length = 0; + parser->error = 0; + + parser->extensions = extensions; + parser->api = api; + parser->version_resolved = false; + + parser->has_new_line_number = 0; + parser->new_line_number = 1; + parser->has_new_source_number = 0; + parser->new_source_number = 0; + + return parser; +} + +void +glcpp_parser_destroy (glcpp_parser_t *parser) +{ + glcpp_lex_destroy (parser->scanner); + hash_table_dtor (parser->defines); + ralloc_free (parser); +} + +typedef enum function_status +{ + FUNCTION_STATUS_SUCCESS, + FUNCTION_NOT_A_FUNCTION, + FUNCTION_UNBALANCED_PARENTHESES +} function_status_t; + +/* Find a set of function-like macro arguments by looking for a + * balanced set of parentheses. + * + * When called, 'node' should be the opening-parenthesis token, (or + * perhaps preceeding SPACE tokens). Upon successful return *last will + * be the last consumed node, (corresponding to the closing right + * parenthesis). + * + * Return values: + * + * FUNCTION_STATUS_SUCCESS: + * + * Successfully parsed a set of function arguments. + * + * FUNCTION_NOT_A_FUNCTION: + * + * Macro name not followed by a '('. This is not an error, but + * simply that the macro name should be treated as a non-macro. + * + * FUNCTION_UNBALANCED_PARENTHESES + * + * Macro name is not followed by a balanced set of parentheses. + */ +static function_status_t +_arguments_parse (argument_list_t *arguments, + token_node_t *node, + token_node_t **last) +{ + token_list_t *argument; + int paren_count; + + node = node->next; + + /* Ignore whitespace before first parenthesis. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL || node->token->type != '(') + return FUNCTION_NOT_A_FUNCTION; + + node = node->next; + + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); + + for (paren_count = 1; node; node = node->next) { + if (node->token->type == '(') + { + paren_count++; + } + else if (node->token->type == ')') + { + paren_count--; + if (paren_count == 0) + break; + } + + if (node->token->type == ',' && + paren_count == 1) + { + _token_list_trim_trailing_space (argument); + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); + } + else { + if (argument->head == NULL) { + /* Don't treat initial whitespace as + * part of the argument. */ + if (node->token->type == SPACE) + continue; + } + _token_list_append (argument, node->token); + } + } + + if (paren_count) + return FUNCTION_UNBALANCED_PARENTHESES; + + *last = node; + + return FUNCTION_STATUS_SUCCESS; +} + +static token_list_t * +_token_list_create_with_one_ival (void *ctx, int type, int ival) +{ + token_list_t *list; + token_t *node; + + list = _token_list_create (ctx); + node = _token_create_ival (list, type, ival); + _token_list_append (list, node); + + return list; +} + +static token_list_t * +_token_list_create_with_one_space (void *ctx) +{ + return _token_list_create_with_one_ival (ctx, SPACE, SPACE); +} + +static token_list_t * +_token_list_create_with_one_integer (void *ctx, int ival) +{ + return _token_list_create_with_one_ival (ctx, INTEGER, ival); +} + +/* Evaluate a DEFINED token node (based on subsequent tokens in the list). + * + * Note: This function must only be called when "node" is a DEFINED token, + * (and will abort with an assertion failure otherwise). + * + * If "node" is followed, (ignoring any SPACE tokens), by an IDENTIFIER token + * (optionally preceded and followed by '(' and ')' tokens) then the following + * occurs: + * + * If the identifier is a defined macro, this function returns 1. + * + * If the identifier is not a defined macro, this function returns 0. + * + * In either case, *last will be updated to the last node in the list + * consumed by the evaluation, (either the token of the identifier or the + * token of the closing parenthesis). + * + * In all other cases, (such as "node is the final node of the list", or + * "missing closing parenthesis", etc.), this function generates a + * preprocessor error, returns -1 and *last will not be set. + */ +static int +_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last) +{ + token_node_t *argument, *defined = node; + + assert (node->token->type == DEFINED); + + node = node->next; + + /* Ignore whitespace after DEFINED token. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL) + goto FAIL; + + if (node->token->type == IDENTIFIER || node->token->type == OTHER) { + argument = node; + } else if (node->token->type == '(') { + node = node->next; + + /* Ignore whitespace after '(' token. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL || (node->token->type != IDENTIFIER && + node->token->type != OTHER)) + { + goto FAIL; + } + + argument = node; + + node = node->next; + + /* Ignore whitespace after identifier, before ')' token. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL || node->token->type != ')') + goto FAIL; + } else { + goto FAIL; + } + + *last = node; + + return hash_table_find (parser->defines, + argument->token->value.str) ? 1 : 0; + +FAIL: + glcpp_error (&defined->token->location, parser, + "\"defined\" not followed by an identifier"); + return -1; +} + +/* Evaluate all DEFINED nodes in a given list, modifying the list in place. + */ +static void +_glcpp_parser_evaluate_defined_in_list (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node, *node_prev, *replacement, *last = NULL; + int value; + + if (list == NULL) + return; + + node_prev = NULL; + node = list->head; + + while (node) { + + if (node->token->type != DEFINED) + goto NEXT; + + value = _glcpp_parser_evaluate_defined (parser, node, &last); + if (value == -1) + goto NEXT; + + replacement = ralloc (list, token_node_t); + replacement->token = _token_create_ival (list, INTEGER, value); + + /* Splice replacement node into list, replacing from "node" + * through "last". */ + if (node_prev) + node_prev->next = replacement; + else + list->head = replacement; + replacement->next = last->next; + if (last == list->tail) + list->tail = replacement; + + node = replacement; + + NEXT: + node_prev = node; + node = node->next; + } +} + +/* Perform macro expansion on 'list', placing the resulting tokens + * into a new list which is initialized with a first token of type + * 'head_token_type'. Then begin lexing from the resulting list, + * (return to the current lexing source when this list is exhausted). + * + * See the documentation of _glcpp_parser_expand_token_list for a description + * of the "mode" parameter. + */ +static void +_glcpp_parser_expand_and_lex_from (glcpp_parser_t *parser, + int head_token_type, + token_list_t *list, + expansion_mode_t mode) +{ + token_list_t *expanded; + token_t *token; + + expanded = _token_list_create (parser); + token = _token_create_ival (parser, head_token_type, head_token_type); + _token_list_append (expanded, token); + _glcpp_parser_expand_token_list (parser, list, mode); + _token_list_append_list (expanded, list); + glcpp_parser_lex_from (parser, expanded); +} + +static void +_glcpp_parser_apply_pastes (glcpp_parser_t *parser, token_list_t *list) +{ + token_node_t *node; + + node = list->head; + while (node) + { + token_node_t *next_non_space; + + /* Look ahead for a PASTE token, skipping space. */ + next_non_space = node->next; + while (next_non_space && next_non_space->token->type == SPACE) + next_non_space = next_non_space->next; + + if (next_non_space == NULL) + break; + + if (next_non_space->token->type != PASTE) { + node = next_non_space; + continue; + } + + /* Now find the next non-space token after the PASTE. */ + next_non_space = next_non_space->next; + while (next_non_space && next_non_space->token->type == SPACE) + next_non_space = next_non_space->next; + + if (next_non_space == NULL) { + yyerror (&node->token->location, parser, "'##' cannot appear at either end of a macro expansion\n"); + return; + } + + node->token = _token_paste (parser, node->token, next_non_space->token); + node->next = next_non_space->next; + if (next_non_space == list->tail) + list->tail = node; + } + + list->non_space_tail = list->tail; +} + +/* This is a helper function that's essentially part of the + * implementation of _glcpp_parser_expand_node. It shouldn't be called + * except for by that function. + * + * Returns NULL if node is a simple token with no expansion, (that is, + * although 'node' corresponds to an identifier defined as a + * function-like macro, it is not followed with a parenthesized + * argument list). + * + * Compute the complete expansion of node (which is a function-like + * macro) and subsequent nodes which are arguments. + * + * Returns the token list that results from the expansion and sets + * *last to the last node in the list that was consumed by the + * expansion. Specifically, *last will be set as follows: as the + * token of the closing right parenthesis. + * + * See the documentation of _glcpp_parser_expand_token_list for a description + * of the "mode" parameter. + */ +static token_list_t * +_glcpp_parser_expand_function (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last, + expansion_mode_t mode) +{ + macro_t *macro; + const char *identifier; + argument_list_t *arguments; + function_status_t status; + token_list_t *substituted; + int parameter_index; + + identifier = node->token->value.str; + + macro = hash_table_find (parser->defines, identifier); + + assert (macro->is_function); + + arguments = _argument_list_create (parser); + status = _arguments_parse (arguments, node, last); + + switch (status) { + case FUNCTION_STATUS_SUCCESS: + break; + case FUNCTION_NOT_A_FUNCTION: + return NULL; + case FUNCTION_UNBALANCED_PARENTHESES: + glcpp_error (&node->token->location, parser, "Macro %s call has unbalanced parentheses\n", identifier); + return NULL; + } + + /* Replace a macro defined as empty with a SPACE token. */ + if (macro->replacements == NULL) { + ralloc_free (arguments); + return _token_list_create_with_one_space (parser); + } + + if (! ((_argument_list_length (arguments) == + _string_list_length (macro->parameters)) || + (_string_list_length (macro->parameters) == 0 && + _argument_list_length (arguments) == 1 && + arguments->head->argument->head == NULL))) + { + glcpp_error (&node->token->location, parser, + "Error: macro %s invoked with %d arguments (expected %d)\n", + identifier, + _argument_list_length (arguments), + _string_list_length (macro->parameters)); + return NULL; + } + + /* Perform argument substitution on the replacement list. */ + substituted = _token_list_create (arguments); + + for (node = macro->replacements->head; node; node = node->next) + { + if (node->token->type == IDENTIFIER && + _string_list_contains (macro->parameters, + node->token->value.str, + ¶meter_index)) + { + token_list_t *argument; + argument = _argument_list_member_at (arguments, + parameter_index); + /* Before substituting, we expand the argument + * tokens, or append a placeholder token for + * an empty argument. */ + if (argument->head) { + token_list_t *expanded_argument; + expanded_argument = _token_list_copy (parser, + argument); + _glcpp_parser_expand_token_list (parser, + expanded_argument, + mode); + _token_list_append_list (substituted, + expanded_argument); + } else { + token_t *new_token; + + new_token = _token_create_ival (substituted, + PLACEHOLDER, + PLACEHOLDER); + _token_list_append (substituted, new_token); + } + } else { + _token_list_append (substituted, node->token); + } + } + + /* After argument substitution, and before further expansion + * below, implement token pasting. */ + + _token_list_trim_trailing_space (substituted); + + _glcpp_parser_apply_pastes (parser, substituted); + + return substituted; +} + +/* Compute the complete expansion of node, (and subsequent nodes after + * 'node' in the case that 'node' is a function-like macro and + * subsequent nodes are arguments). + * + * Returns NULL if node is a simple token with no expansion. + * + * Otherwise, returns the token list that results from the expansion + * and sets *last to the last node in the list that was consumed by + * the expansion. Specifically, *last will be set as follows: + * + * As 'node' in the case of object-like macro expansion. + * + * As the token of the closing right parenthesis in the case of + * function-like macro expansion. + * + * See the documentation of _glcpp_parser_expand_token_list for a description + * of the "mode" parameter. + */ +static token_list_t * +_glcpp_parser_expand_node (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last, + expansion_mode_t mode) +{ + token_t *token = node->token; + const char *identifier; + macro_t *macro; + + /* We only expand identifiers */ + if (token->type != IDENTIFIER) { + return NULL; + } + + *last = node; + identifier = token->value.str; + + /* Special handling for __LINE__ and __FILE__, (not through + * the hash table). */ + if (strcmp(identifier, "__LINE__") == 0) + return _token_list_create_with_one_integer (parser, node->token->location.first_line); + + if (strcmp(identifier, "__FILE__") == 0) + return _token_list_create_with_one_integer (parser, node->token->location.source); + + /* Look up this identifier in the hash table. */ + macro = hash_table_find (parser->defines, identifier); + + /* Not a macro, so no expansion needed. */ + if (macro == NULL) + return NULL; + + /* Finally, don't expand this macro if we're already actively + * expanding it, (to avoid infinite recursion). */ + if (_parser_active_list_contains (parser, identifier)) { + /* We change the token type here from IDENTIFIER to + * OTHER to prevent any future expansion of this + * unexpanded token. */ + char *str; + token_list_t *expansion; + token_t *final; + + str = ralloc_strdup (parser, token->value.str); + final = _token_create_str (parser, OTHER, str); + expansion = _token_list_create (parser); + _token_list_append (expansion, final); + return expansion; + } + + if (! macro->is_function) + { + token_list_t *replacement; + + /* Replace a macro defined as empty with a SPACE token. */ + if (macro->replacements == NULL) + return _token_list_create_with_one_space (parser); + + replacement = _token_list_copy (parser, macro->replacements); + _glcpp_parser_apply_pastes (parser, replacement); + return replacement; + } + + return _glcpp_parser_expand_function (parser, node, last, mode); +} + +/* Push a new identifier onto the parser's active list. + * + * Here, 'marker' is the token node that appears in the list after the + * expansion of 'identifier'. That is, when the list iterator begins + * examining 'marker', then it is time to pop this node from the + * active stack. + */ +static void +_parser_active_list_push (glcpp_parser_t *parser, + const char *identifier, + token_node_t *marker) +{ + active_list_t *node; + + node = ralloc (parser->active, active_list_t); + node->identifier = ralloc_strdup (node, identifier); + node->marker = marker; + node->next = parser->active; + + parser->active = node; +} + +static void +_parser_active_list_pop (glcpp_parser_t *parser) +{ + active_list_t *node = parser->active; + + if (node == NULL) { + parser->active = NULL; + return; + } + + node = parser->active->next; + ralloc_free (parser->active); + + parser->active = node; +} + +static int +_parser_active_list_contains (glcpp_parser_t *parser, const char *identifier) +{ + active_list_t *node; + + if (parser->active == NULL) + return 0; + + for (node = parser->active; node; node = node->next) + if (strcmp (node->identifier, identifier) == 0) + return 1; + + return 0; +} + +/* Walk over the token list replacing nodes with their expansion. + * Whenever nodes are expanded the walking will walk over the new + * nodes, continuing to expand as necessary. The results are placed in + * 'list' itself. + * + * The "mode" argument controls the handling of any DEFINED tokens that + * result from expansion as follows: + * + * EXPANSION_MODE_IGNORE_DEFINED: Any resulting DEFINED tokens will be + * left in the final list, unevaluated. This is the correct mode + * for expanding any list in any context other than a + * preprocessor conditional, (#if or #elif). + * + * EXPANSION_MODE_EVALUATE_DEFINED: Any resulting DEFINED tokens will be + * evaluated to 0 or 1 tokens depending on whether the following + * token is the name of a defined macro. If the DEFINED token is + * not followed by an (optionally parenthesized) identifier, then + * an error will be generated. This the correct mode for + * expanding any list in the context of a preprocessor + * conditional, (#if or #elif). + */ +static void +_glcpp_parser_expand_token_list (glcpp_parser_t *parser, + token_list_t *list, + expansion_mode_t mode) +{ + token_node_t *node_prev; + token_node_t *node, *last = NULL; + token_list_t *expansion; + active_list_t *active_initial = parser->active; + + if (list == NULL) + return; + + _token_list_trim_trailing_space (list); + + node_prev = NULL; + node = list->head; + + if (mode == EXPANSION_MODE_EVALUATE_DEFINED) + _glcpp_parser_evaluate_defined_in_list (parser, list); + + while (node) { + + while (parser->active && parser->active->marker == node) + _parser_active_list_pop (parser); + + expansion = _glcpp_parser_expand_node (parser, node, &last, mode); + if (expansion) { + token_node_t *n; + + if (mode == EXPANSION_MODE_EVALUATE_DEFINED) { + _glcpp_parser_evaluate_defined_in_list (parser, + expansion); + } + + for (n = node; n != last->next; n = n->next) + while (parser->active && + parser->active->marker == n) + { + _parser_active_list_pop (parser); + } + + _parser_active_list_push (parser, + node->token->value.str, + last->next); + + /* Splice expansion into list, supporting a + * simple deletion if the expansion is + * empty. */ + if (expansion->head) { + if (node_prev) + node_prev->next = expansion->head; + else + list->head = expansion->head; + expansion->tail->next = last->next; + if (last == list->tail) + list->tail = expansion->tail; + } else { + if (node_prev) + node_prev->next = last->next; + else + list->head = last->next; + if (last == list->tail) + list->tail = NULL; + } + } else { + node_prev = node; + } + node = node_prev ? node_prev->next : list->head; + } + + /* Remove any lingering effects of this invocation on the + * active list. That is, pop until the list looks like it did + * at the beginning of this function. */ + while (parser->active && parser->active != active_initial) + _parser_active_list_pop (parser); + + list->non_space_tail = list->tail; +} + +void +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list) +{ + if (list == NULL) + return; + + _glcpp_parser_expand_token_list (parser, list, EXPANSION_MODE_IGNORE_DEFINED); + + _token_list_trim_trailing_space (list); + + _token_list_print (parser, list); +} + +static void +_check_for_reserved_macro_name (glcpp_parser_t *parser, YYLTYPE *loc, + const char *identifier) +{ + /* Section 3.3 (Preprocessor) of the GLSL 1.30 spec (and later) and + * the GLSL ES spec (all versions) say: + * + * "All macro names containing two consecutive underscores ( __ ) + * are reserved for future use as predefined macro names. All + * macro names prefixed with "GL_" ("GL" followed by a single + * underscore) are also reserved." + * + * The intention is that names containing __ are reserved for internal + * use by the implementation, and names prefixed with GL_ are reserved + * for use by Khronos. Since every extension adds a name prefixed + * with GL_ (i.e., the name of the extension), that should be an + * error. Names simply containing __ are dangerous to use, but should + * be allowed. + * + * A future version of the GLSL specification will clarify this. + */ + if (strstr(identifier, "__")) { + glcpp_warning(loc, parser, + "Macro names containing \"__\" are reserved " + "for use by the implementation.\n"); + } + if (strncmp(identifier, "GL_", 3) == 0) { + glcpp_error (loc, parser, "Macro names starting with \"GL_\" are reserved.\n"); + } +} + +static int +_macro_equal (macro_t *a, macro_t *b) +{ + if (a->is_function != b->is_function) + return 0; + + if (a->is_function) { + if (! _string_list_equal (a->parameters, b->parameters)) + return 0; + } + + return _token_list_equal_ignoring_space (a->replacements, + b->replacements); +} + +void +_define_object_macro (glcpp_parser_t *parser, + YYLTYPE *loc, + const char *identifier, + token_list_t *replacements) +{ + macro_t *macro, *previous; + + /* We define pre-defined macros before we've started parsing the + * actual file. So if there's no location defined yet, that's what + * were doing and we don't want to generate an error for using the + * reserved names. */ + if (loc != NULL) + _check_for_reserved_macro_name(parser, loc, identifier); + + macro = ralloc (parser, macro_t); + + macro->is_function = 0; + macro->parameters = NULL; + macro->identifier = ralloc_strdup (macro, identifier); + macro->replacements = replacements; + ralloc_steal (macro, replacements); + + previous = hash_table_find (parser->defines, identifier); + if (previous) { + if (_macro_equal (macro, previous)) { + ralloc_free (macro); + return; + } + glcpp_error (loc, parser, "Redefinition of macro %s\n", + identifier); + } + + hash_table_insert (parser->defines, macro, identifier); +} + +void +_define_function_macro (glcpp_parser_t *parser, + YYLTYPE *loc, + const char *identifier, + string_list_t *parameters, + token_list_t *replacements) +{ + macro_t *macro, *previous; + const char *dup; + + _check_for_reserved_macro_name(parser, loc, identifier); + + /* Check for any duplicate parameter names. */ + if ((dup = _string_list_has_duplicate (parameters)) != NULL) { + glcpp_error (loc, parser, "Duplicate macro parameter \"%s\"", + dup); + } + + macro = ralloc (parser, macro_t); + ralloc_steal (macro, parameters); + ralloc_steal (macro, replacements); + + macro->is_function = 1; + macro->parameters = parameters; + macro->identifier = ralloc_strdup (macro, identifier); + macro->replacements = replacements; + previous = hash_table_find (parser->defines, identifier); + if (previous) { + if (_macro_equal (macro, previous)) { + ralloc_free (macro); + return; + } + glcpp_error (loc, parser, "Redefinition of macro %s\n", + identifier); + } + + hash_table_insert (parser->defines, macro, identifier); +} + +static int +glcpp_parser_lex (YYSTYPE *yylval, YYLTYPE *yylloc, glcpp_parser_t *parser) +{ + token_node_t *node; + int ret; + + if (parser->lex_from_list == NULL) { + ret = glcpp_lex (yylval, yylloc, parser->scanner); + + /* XXX: This ugly block of code exists for the sole + * purpose of converting a NEWLINE token into a SPACE + * token, but only in the case where we have seen a + * function-like macro name, but have not yet seen its + * closing parenthesis. + * + * There's perhaps a more compact way to do this with + * mid-rule actions in the grammar. + * + * I'm definitely not pleased with the complexity of + * this code here. + */ + if (parser->newline_as_space) + { + if (ret == '(') { + parser->paren_count++; + } else if (ret == ')') { + parser->paren_count--; + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } else if (ret == NEWLINE) { + ret = SPACE; + } else if (ret != SPACE) { + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } + } + else if (parser->in_control_line) + { + if (ret == NEWLINE) + parser->in_control_line = 0; + } + else if (ret == DEFINE_TOKEN || + ret == UNDEF || ret == IF || + ret == IFDEF || ret == IFNDEF || + ret == ELIF || ret == ELSE || + ret == ENDIF || ret == HASH_TOKEN) + { + parser->in_control_line = 1; + } + else if (ret == IDENTIFIER) + { + macro_t *macro; + macro = hash_table_find (parser->defines, + yylval->str); + if (macro && macro->is_function) { + parser->newline_as_space = 1; + parser->paren_count = 0; + } + } + + return ret; + } + + node = parser->lex_from_node; + + if (node == NULL) { + ralloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + return NEWLINE; + } + + *yylval = node->token->value; + ret = node->token->type; + + parser->lex_from_node = node->next; + + return ret; +} + +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list) +{ + token_node_t *node; + + assert (parser->lex_from_list == NULL); + + /* Copy list, eliminating any space tokens. */ + parser->lex_from_list = _token_list_create (parser); + + for (node = list->head; node; node = node->next) { + if (node->token->type == SPACE) + continue; + _token_list_append (parser->lex_from_list, node->token); + } + + ralloc_free (list); + + parser->lex_from_node = parser->lex_from_list->head; + + /* It's possible the list consisted of nothing but whitespace. */ + if (parser->lex_from_node == NULL) { + ralloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + } +} + +static void +_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, YYLTYPE *loc, + int condition) +{ + skip_type_t current = SKIP_NO_SKIP; + skip_node_t *node; + + if (parser->skip_stack) + current = parser->skip_stack->type; + + node = ralloc (parser, skip_node_t); + node->loc = *loc; + + if (current == SKIP_NO_SKIP) { + if (condition) + node->type = SKIP_NO_SKIP; + else + node->type = SKIP_TO_ELSE; + } else { + node->type = SKIP_TO_ENDIF; + } + + node->has_else = false; + node->next = parser->skip_stack; + parser->skip_stack = node; +} + +static void +_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, YYLTYPE *loc, + const char *type, int condition) +{ + if (parser->skip_stack == NULL) { + glcpp_error (loc, parser, "#%s without #if\n", type); + return; + } + + if (parser->skip_stack->type == SKIP_TO_ELSE) { + if (condition) + parser->skip_stack->type = SKIP_NO_SKIP; + } else { + parser->skip_stack->type = SKIP_TO_ENDIF; + } +} + +static void +_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser, YYLTYPE *loc) +{ + skip_node_t *node; + + if (parser->skip_stack == NULL) { + glcpp_error (loc, parser, "#endif without #if\n"); + return; + } + + node = parser->skip_stack; + parser->skip_stack = node->next; + ralloc_free (node); +} + +static void +_glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t version, + const char *es_identifier, + bool explicitly_set) +{ + const struct gl_extensions *extensions = parser->extensions; + + if (parser->version_resolved) + return; + + parser->version_resolved = true; + + add_builtin_define (parser, "__VERSION__", version); + + parser->is_gles = (version == 100) || + (es_identifier && + (strcmp(es_identifier, "es") == 0)); + + /* Add pre-defined macros. */ + if (parser->is_gles) { + add_builtin_define(parser, "GL_ES", 1); + add_builtin_define(parser, "GL_EXT_separate_shader_objects", 1); + add_builtin_define(parser, "GL_EXT_draw_buffers", 1); + + if (extensions != NULL) { + if (extensions->OES_EGL_image_external) + add_builtin_define(parser, "GL_OES_EGL_image_external", 1); + if (extensions->OES_standard_derivatives) + add_builtin_define(parser, "GL_OES_standard_derivatives", 1); + if (extensions->ARB_texture_multisample) + add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1); + if (extensions->ARB_blend_func_extended) + add_builtin_define(parser, "GL_EXT_blend_func_extended", 1); + } + } else { + add_builtin_define(parser, "GL_ARB_draw_buffers", 1); + add_builtin_define(parser, "GL_ARB_enhanced_layouts", 1); + add_builtin_define(parser, "GL_ARB_separate_shader_objects", 1); + add_builtin_define(parser, "GL_ARB_texture_rectangle", 1); + add_builtin_define(parser, "GL_AMD_shader_trinary_minmax", 1); + + + if (extensions != NULL) { + if (extensions->EXT_texture_array) + add_builtin_define(parser, "GL_EXT_texture_array", 1); + + if (extensions->ARB_arrays_of_arrays) + add_builtin_define(parser, "GL_ARB_arrays_of_arrays", 1); + + if (extensions->ARB_fragment_coord_conventions) + add_builtin_define(parser, "GL_ARB_fragment_coord_conventions", + 1); + + if (extensions->ARB_fragment_layer_viewport) + add_builtin_define(parser, "GL_ARB_fragment_layer_viewport", 1); + + if (extensions->ARB_explicit_attrib_location) + add_builtin_define(parser, "GL_ARB_explicit_attrib_location", 1); + + if (extensions->ARB_explicit_uniform_location) + add_builtin_define(parser, "GL_ARB_explicit_uniform_location", 1); + + if (extensions->ARB_shader_texture_lod) + add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1); + + if (extensions->ARB_draw_instanced) + add_builtin_define(parser, "GL_ARB_draw_instanced", 1); + + if (extensions->ARB_conservative_depth) { + add_builtin_define(parser, "GL_AMD_conservative_depth", 1); + add_builtin_define(parser, "GL_ARB_conservative_depth", 1); + } + + if (extensions->ARB_shader_bit_encoding) + add_builtin_define(parser, "GL_ARB_shader_bit_encoding", 1); + + if (extensions->ARB_shader_clock) + add_builtin_define(parser, "GL_ARB_shader_clock", 1); + + if (extensions->ARB_uniform_buffer_object) + add_builtin_define(parser, "GL_ARB_uniform_buffer_object", 1); + + if (extensions->ARB_texture_cube_map_array) + add_builtin_define(parser, "GL_ARB_texture_cube_map_array", 1); + + if (extensions->ARB_shading_language_packing) + add_builtin_define(parser, "GL_ARB_shading_language_packing", 1); + + if (extensions->ARB_texture_multisample) + add_builtin_define(parser, "GL_ARB_texture_multisample", 1); + + if (extensions->ARB_texture_query_levels) + add_builtin_define(parser, "GL_ARB_texture_query_levels", 1); + + if (extensions->ARB_texture_query_lod) + add_builtin_define(parser, "GL_ARB_texture_query_lod", 1); + + if (extensions->ARB_gpu_shader5) + add_builtin_define(parser, "GL_ARB_gpu_shader5", 1); + + if (extensions->ARB_gpu_shader_fp64) + add_builtin_define(parser, "GL_ARB_gpu_shader_fp64", 1); + + if (extensions->ARB_vertex_attrib_64bit) + add_builtin_define(parser, "GL_ARB_vertex_attrib_64bit", 1); + + if (extensions->AMD_vertex_shader_layer) + add_builtin_define(parser, "GL_AMD_vertex_shader_layer", 1); + + if (extensions->AMD_vertex_shader_viewport_index) + add_builtin_define(parser, "GL_AMD_vertex_shader_viewport_index", 1); + + if (extensions->ARB_shading_language_420pack) + add_builtin_define(parser, "GL_ARB_shading_language_420pack", 1); + + if (extensions->ARB_sample_shading) + add_builtin_define(parser, "GL_ARB_sample_shading", 1); + + if (extensions->ARB_texture_gather) + add_builtin_define(parser, "GL_ARB_texture_gather", 1); + + if (extensions->ARB_shader_atomic_counters) + add_builtin_define(parser, "GL_ARB_shader_atomic_counters", 1); + + if (extensions->ARB_viewport_array) + add_builtin_define(parser, "GL_ARB_viewport_array", 1); + + if (extensions->ARB_compute_shader) + add_builtin_define(parser, "GL_ARB_compute_shader", 1); + + if (extensions->ARB_shader_image_load_store) + add_builtin_define(parser, "GL_ARB_shader_image_load_store", 1); + + if (extensions->ARB_shader_image_size) + add_builtin_define(parser, "GL_ARB_shader_image_size", 1); + + if (extensions->ARB_shader_texture_image_samples) + add_builtin_define(parser, "GL_ARB_shader_texture_image_samples", 1); + + if (extensions->ARB_derivative_control) + add_builtin_define(parser, "GL_ARB_derivative_control", 1); + + if (extensions->ARB_shader_precision) + add_builtin_define(parser, "GL_ARB_shader_precision", 1); + + if (extensions->ARB_shader_storage_buffer_object) + add_builtin_define(parser, "GL_ARB_shader_storage_buffer_object", 1); + + if (extensions->ARB_tessellation_shader) + add_builtin_define(parser, "GL_ARB_tessellation_shader", 1); + + if (extensions->ARB_shader_subroutine) + add_builtin_define(parser, "GL_ARB_shader_subroutine", 1); + + if (extensions->ARB_shader_draw_parameters) + add_builtin_define(parser, "GL_ARB_shader_draw_parameters", 1); + } + } + + if (extensions != NULL) { + if (extensions->EXT_shader_integer_mix) + add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1); + + if (extensions->EXT_shader_samples_identical) + add_builtin_define(parser, "GL_EXT_shader_samples_identical", 1); + } + + if (version >= 150) + add_builtin_define(parser, "GL_core_profile", 1); + + /* Currently, all ES2/ES3 implementations support highp in the + * fragment shader, so we always define this macro in ES2/ES3. + * If we ever get a driver that doesn't support highp, we'll + * need to add a flag to the gl_context and check that here. + */ + if (version >= 130 || parser->is_gles) + add_builtin_define (parser, "GL_FRAGMENT_PRECISION_HIGH", 1); + + if (explicitly_set) { + ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, + "#version %" PRIiMAX "%s%s", version, + es_identifier ? " " : "", + es_identifier ? es_identifier : ""); + } +} + +/* GLSL version if no version is explicitly specified. */ +#define IMPLICIT_GLSL_VERSION 110 + +/* GLSL ES version if no version is explicitly specified. */ +#define IMPLICIT_GLSL_ES_VERSION 100 + +void +glcpp_parser_resolve_implicit_version(glcpp_parser_t *parser) +{ + int language_version = parser->api == API_OPENGLES2 ? + IMPLICIT_GLSL_ES_VERSION : + IMPLICIT_GLSL_VERSION; + + _glcpp_parser_handle_version_declaration(parser, language_version, + NULL, false); +} diff --git a/src/compiler/glsl/glcpp/glcpp.c b/src/compiler/glsl/glcpp/glcpp.c new file mode 100644 index 0000000..c62f4ef --- /dev/null +++ b/src/compiler/glsl/glcpp/glcpp.c @@ -0,0 +1,182 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <getopt.h> + +#include "glcpp.h" +#include "main/mtypes.h" +#include "main/shaderobj.h" +#include "util/strtod.h" + +extern int glcpp_parser_debug; + +void +_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh) +{ + (void) ctx; + *ptr = sh; +} + +/* Read from fp until EOF and return a string of everything read. + */ +static char * +load_text_fp (void *ctx, FILE *fp) +{ +#define CHUNK 4096 + char *text = NULL; + size_t text_size = 0; + size_t total_read = 0; + size_t bytes; + + while (1) { + if (total_read + CHUNK + 1 > text_size) { + text_size = text_size ? text_size * 2 : CHUNK + 1; + text = reralloc_size (ctx, text, text_size); + if (text == NULL) { + fprintf (stderr, "Out of memory\n"); + return NULL; + } + } + bytes = fread (text + total_read, 1, CHUNK, fp); + total_read += bytes; + + if (bytes < CHUNK) { + break; + } + } + + text[total_read] = '\0'; + + return text; +} + +static char * +load_text_file(void *ctx, const char *filename) +{ + char *text; + FILE *fp; + + if (filename == NULL || strcmp (filename, "-") == 0) + return load_text_fp (ctx, stdin); + + fp = fopen (filename, "r"); + if (fp == NULL) { + fprintf (stderr, "Failed to open file %s: %s\n", + filename, strerror (errno)); + return NULL; + } + + text = load_text_fp (ctx, fp); + + fclose(fp); + + return text; +} + +/* Initialize only those things that glcpp cares about. + */ +static void +init_fake_gl_context (struct gl_context *gl_ctx) +{ + gl_ctx->API = API_OPENGL_COMPAT; + gl_ctx->Const.DisableGLSLLineContinuations = false; +} + +static void +usage (void) +{ + fprintf (stderr, + "Usage: glcpp [OPTIONS] [--] [<filename>]\n" + "\n" + "Pre-process the given filename (stdin if no filename given).\n" + "The following options are supported:\n" + " --disable-line-continuations Do not interpret lines ending with a\n" + " backslash ('\\') as a line continuation.\n"); +} + +enum { + DISABLE_LINE_CONTINUATIONS_OPT = CHAR_MAX + 1 +}; + +static const struct option +long_options[] = { + {"disable-line-continuations", no_argument, 0, DISABLE_LINE_CONTINUATIONS_OPT }, + {"debug", no_argument, 0, 'd'}, + {0, 0, 0, 0 } +}; + +int +main (int argc, char *argv[]) +{ + char *filename = NULL; + void *ctx = ralloc(NULL, void*); + char *info_log = ralloc_strdup(ctx, ""); + const char *shader; + int ret; + struct gl_context gl_ctx; + int c; + + init_fake_gl_context (&gl_ctx); + + while ((c = getopt_long(argc, argv, "d", long_options, NULL)) != -1) { + switch (c) { + case DISABLE_LINE_CONTINUATIONS_OPT: + gl_ctx.Const.DisableGLSLLineContinuations = true; + break; + case 'd': + glcpp_parser_debug = 1; + break; + default: + usage (); + exit (1); + } + } + + if (optind + 1 < argc) { + printf ("Unexpected argument: %s\n", argv[optind+1]); + usage (); + exit (1); + } + if (optind < argc) { + filename = argv[optind]; + } + + shader = load_text_file (ctx, filename); + if (shader == NULL) + return 1; + + _mesa_locale_init(); + + ret = glcpp_preprocess(ctx, &shader, &info_log, NULL, &gl_ctx); + + printf("%s", shader); + fprintf(stderr, "%s", info_log); + + ralloc_free(ctx); + + return ret; +} diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h new file mode 100644 index 0000000..70aa14b --- /dev/null +++ b/src/compiler/glsl/glcpp/glcpp.h @@ -0,0 +1,251 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef GLCPP_H +#define GLCPP_H + +#include <stdint.h> +#include <stdbool.h> + +#include "main/mtypes.h" + +#include "util/ralloc.h" + +#include "program/hash_table.h" + +#define yyscan_t void* + +/* Some data types used for parser values. */ + +typedef struct expression_value { + intmax_t value; + char *undefined_macro; +} expression_value_t; + + +typedef struct string_node { + const char *str; + struct string_node *next; +} string_node_t; + +typedef struct string_list { + string_node_t *head; + string_node_t *tail; +} string_list_t; + +typedef struct token token_t; +typedef struct token_list token_list_t; + +typedef union YYSTYPE +{ + intmax_t ival; + expression_value_t expression_value; + char *str; + string_list_t *string_list; + token_t *token; + token_list_t *token_list; +} YYSTYPE; + +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 + +typedef struct YYLTYPE { + int first_line; + int first_column; + int last_line; + int last_column; + unsigned source; +} YYLTYPE; +# define YYLTYPE_IS_DECLARED 1 +# define YYLTYPE_IS_TRIVIAL 1 + +# define YYLLOC_DEFAULT(Current, Rhs, N) \ +do { \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC(Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC(Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC(Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC(Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC(Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC(Rhs, 0).last_column; \ + } \ + (Current).source = 0; \ +} while (0) + +struct token { + int type; + YYSTYPE value; + YYLTYPE location; +}; + +typedef struct token_node { + token_t *token; + struct token_node *next; +} token_node_t; + +struct token_list { + token_node_t *head; + token_node_t *tail; + token_node_t *non_space_tail; +}; + +typedef struct argument_node { + token_list_t *argument; + struct argument_node *next; +} argument_node_t; + +typedef struct argument_list { + argument_node_t *head; + argument_node_t *tail; +} argument_list_t; + +typedef struct glcpp_parser glcpp_parser_t; + +typedef enum { + TOKEN_CLASS_IDENTIFIER, + TOKEN_CLASS_IDENTIFIER_FINALIZED, + TOKEN_CLASS_FUNC_MACRO, + TOKEN_CLASS_OBJ_MACRO +} token_class_t; + +token_class_t +glcpp_parser_classify_token (glcpp_parser_t *parser, + const char *identifier, + int *parameter_index); + +typedef struct { + int is_function; + string_list_t *parameters; + const char *identifier; + token_list_t *replacements; +} macro_t; + +typedef struct expansion_node { + macro_t *macro; + token_node_t *replacements; + struct expansion_node *next; +} expansion_node_t; + +typedef enum skip_type { + SKIP_NO_SKIP, + SKIP_TO_ELSE, + SKIP_TO_ENDIF +} skip_type_t; + +typedef struct skip_node { + skip_type_t type; + bool has_else; + YYLTYPE loc; /* location of the initial #if/#elif/... */ + struct skip_node *next; +} skip_node_t; + +typedef struct active_list { + const char *identifier; + token_node_t *marker; + struct active_list *next; +} active_list_t; + +struct glcpp_parser { + yyscan_t scanner; + struct hash_table *defines; + active_list_t *active; + int lexing_directive; + int space_tokens; + int last_token_was_newline; + int last_token_was_space; + int first_non_space_token_this_line; + int newline_as_space; + int in_control_line; + int paren_count; + int commented_newlines; + skip_node_t *skip_stack; + int skipping; + token_list_t *lex_from_list; + token_node_t *lex_from_node; + char *output; + char *info_log; + size_t output_length; + size_t info_log_length; + int error; + const struct gl_extensions *extensions; + gl_api api; + bool version_resolved; + bool has_new_line_number; + int new_line_number; + bool has_new_source_number; + int new_source_number; + bool is_gles; +}; + +struct gl_extensions; + +glcpp_parser_t * +glcpp_parser_create (const struct gl_extensions *extensions, gl_api api); + +int +glcpp_parser_parse (glcpp_parser_t *parser); + +void +glcpp_parser_destroy (glcpp_parser_t *parser); + +void +glcpp_parser_resolve_implicit_version(glcpp_parser_t *parser); + +int +glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log, + const struct gl_extensions *extensions, struct gl_context *g_ctx); + +/* Functions for writing to the info log */ + +void +glcpp_error (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...); + +void +glcpp_warning (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...); + +/* Generated by glcpp-lex.l to glcpp-lex.c */ + +int +glcpp_lex_init_extra (glcpp_parser_t *parser, yyscan_t* scanner); + +void +glcpp_lex_set_source_string(glcpp_parser_t *parser, const char *shader); + +int +glcpp_lex (YYSTYPE *lvalp, YYLTYPE *llocp, yyscan_t scanner); + +int +glcpp_lex_destroy (yyscan_t scanner); + +/* Generated by glcpp-parse.y to glcpp-parse.c */ + +int +yyparse (glcpp_parser_t *parser); + +#endif diff --git a/src/compiler/glsl/glcpp/pp.c b/src/compiler/glsl/glcpp/pp.c new file mode 100644 index 0000000..160c666 --- /dev/null +++ b/src/compiler/glsl/glcpp/pp.c @@ -0,0 +1,241 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <string.h> +#include <ctype.h> +#include "glcpp.h" + +void +glcpp_error (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...) +{ + va_list ap; + + parser->error = 1; + ralloc_asprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, + "%u:%u(%u): " + "preprocessor error: ", + locp->source, + locp->first_line, + locp->first_column); + va_start(ap, fmt); + ralloc_vasprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, + fmt, ap); + va_end(ap); + ralloc_asprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, "\n"); +} + +void +glcpp_warning (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...) +{ + va_list ap; + + ralloc_asprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, + "%u:%u(%u): " + "preprocessor warning: ", + locp->source, + locp->first_line, + locp->first_column); + va_start(ap, fmt); + ralloc_vasprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, + fmt, ap); + va_end(ap); + ralloc_asprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, "\n"); +} + +/* Given str, (that's expected to start with a newline terminator of some + * sort), return a pointer to the first character in str after the newline. + * + * A newline terminator can be any of the following sequences: + * + * "\r\n" + * "\n\r" + * "\n" + * "\r" + * + * And the longest such sequence will be skipped. + */ +static const char * +skip_newline (const char *str) +{ + const char *ret = str; + + if (ret == NULL) + return ret; + + if (*ret == '\0') + return ret; + + if (*ret == '\r') { + ret++; + if (*ret && *ret == '\n') + ret++; + } else if (*ret == '\n') { + ret++; + if (*ret && *ret == '\r') + ret++; + } + + return ret; +} + +/* Remove any line continuation characters in the shader, (whether in + * preprocessing directives or in GLSL code). + */ +static char * +remove_line_continuations(glcpp_parser_t *ctx, const char *shader) +{ + char *clean = ralloc_strdup(ctx, ""); + const char *backslash, *newline, *search_start; + const char *cr, *lf; + char newline_separator[3]; + int collapsed_newlines = 0; + + search_start = shader; + + /* Determine what flavor of newlines this shader is using. GLSL + * provides for 4 different possible ways to separate lines, (using + * one or two characters): + * + * "\n" (line-feed, like Linux, Unix, and new Mac OS) + * "\r" (carriage-return, like old Mac files) + * "\r\n" (carriage-return + line-feed, like DOS files) + * "\n\r" (line-feed + carriage-return, like nothing, really) + * + * This code explicitly supports a shader that uses a mixture of + * newline terminators and will properly handle line continuation + * backslashes followed by any of the above. + * + * But, since we must also insert additional newlines in the output + * (for any collapsed lines) we attempt to maintain consistency by + * examining the first encountered newline terminator, and using the + * same terminator for any newlines we insert. + */ + cr = strchr(search_start, '\r'); + lf = strchr(search_start, '\n'); + + newline_separator[0] = '\n'; + newline_separator[1] = '\0'; + newline_separator[2] = '\0'; + + if (cr == NULL) { + /* Nothing to do. */ + } else if (lf == NULL) { + newline_separator[0] = '\r'; + } else if (lf == cr + 1) { + newline_separator[0] = '\r'; + newline_separator[1] = '\n'; + } else if (cr == lf + 1) { + newline_separator[0] = '\n'; + newline_separator[1] = '\r'; + } + + while (true) { + backslash = strchr(search_start, '\\'); + + /* If we have previously collapsed any line-continuations, + * then we want to insert additional newlines at the next + * occurrence of a newline character to avoid changing any + * line numbers. + */ + if (collapsed_newlines) { + cr = strchr (search_start, '\r'); + lf = strchr (search_start, '\n'); + if (cr && lf) + newline = cr < lf ? cr : lf; + else if (cr) + newline = cr; + else + newline = lf; + if (newline && + (backslash == NULL || newline < backslash)) + { + ralloc_strncat(&clean, shader, + newline - shader + 1); + while (collapsed_newlines) { + ralloc_strcat(&clean, newline_separator); + collapsed_newlines--; + } + shader = skip_newline (newline); + search_start = shader; + } + } + + search_start = backslash + 1; + + if (backslash == NULL) + break; + + /* At each line continuation, (backslash followed by a + * newline), copy all preceding text to the output, then + * advance the shader pointer to the character after the + * newline. + */ + if (backslash[1] == '\r' || backslash[1] == '\n') + { + collapsed_newlines++; + ralloc_strncat(&clean, shader, backslash - shader); + shader = skip_newline (backslash + 1); + search_start = shader; + } + } + + ralloc_strcat(&clean, shader); + + return clean; +} + +int +glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log, + const struct gl_extensions *extensions, struct gl_context *gl_ctx) +{ + int errors; + glcpp_parser_t *parser = glcpp_parser_create (extensions, gl_ctx->API); + + if (! gl_ctx->Const.DisableGLSLLineContinuations) + *shader = remove_line_continuations(parser, *shader); + + glcpp_lex_set_source_string (parser, *shader); + + glcpp_parser_parse (parser); + + if (parser->skip_stack) + glcpp_error (&parser->skip_stack->loc, parser, "Unterminated #if\n"); + + glcpp_parser_resolve_implicit_version(parser); + + ralloc_strcat(info_log, parser->info_log); + + ralloc_steal(ralloc_ctx, parser->output); + *shader = parser->output; + + errors = parser->error; + glcpp_parser_destroy (parser); + return errors; +} diff --git a/src/compiler/glsl/glcpp/tests/.gitignore b/src/compiler/glsl/glcpp/tests/.gitignore new file mode 100644 index 0000000..3802c85 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/.gitignore @@ -0,0 +1,4 @@ +subtest-cr/ +subtest-lf/ +subtest-cr-lf/ +subtest-lf-cr/ diff --git a/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c b/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c new file mode 100644 index 0000000..1f2320e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c @@ -0,0 +1 @@ + this is four tokens with spaces diff --git a/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c.expected b/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c.expected new file mode 100644 index 0000000..0079191 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c.expected @@ -0,0 +1 @@ + this is four tokens with spaces diff --git a/src/compiler/glsl/glcpp/tests/001-define.c b/src/compiler/glsl/glcpp/tests/001-define.c new file mode 100644 index 0000000..cbf2fee --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/001-define.c @@ -0,0 +1,2 @@ +#define foo 1 +foo diff --git a/src/compiler/glsl/glcpp/tests/001-define.c.expected b/src/compiler/glsl/glcpp/tests/001-define.c.expected new file mode 100644 index 0000000..a464d9d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/001-define.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/src/compiler/glsl/glcpp/tests/002-define-chain.c b/src/compiler/glsl/glcpp/tests/002-define-chain.c new file mode 100644 index 0000000..87d75c6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/002-define-chain.c @@ -0,0 +1,3 @@ +#define foo 1 +#define bar foo +bar diff --git a/src/compiler/glsl/glcpp/tests/002-define-chain.c.expected b/src/compiler/glsl/glcpp/tests/002-define-chain.c.expected new file mode 100644 index 0000000..c6c9ee3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/002-define-chain.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c b/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c new file mode 100644 index 0000000..a18b724 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c @@ -0,0 +1,3 @@ +#define bar foo +#define foo 1 +bar diff --git a/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c.expected b/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c.expected new file mode 100644 index 0000000..c6c9ee3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/src/compiler/glsl/glcpp/tests/004-define-recursive.c b/src/compiler/glsl/glcpp/tests/004-define-recursive.c new file mode 100644 index 0000000..2ac56ea --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/004-define-recursive.c @@ -0,0 +1,6 @@ +#define foo bar +#define bar baz +#define baz foo +foo +bar +baz diff --git a/src/compiler/glsl/glcpp/tests/004-define-recursive.c.expected b/src/compiler/glsl/glcpp/tests/004-define-recursive.c.expected new file mode 100644 index 0000000..2d07687 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/004-define-recursive.c.expected @@ -0,0 +1,6 @@ + + + +foo +bar +baz diff --git a/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c b/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c new file mode 100644 index 0000000..f5521df --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c @@ -0,0 +1,3 @@ +#define foo 1 +#define bar a foo +bar diff --git a/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c.expected b/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c.expected new file mode 100644 index 0000000..892975c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c.expected @@ -0,0 +1,3 @@ + + +a 1 diff --git a/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c b/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c new file mode 100644 index 0000000..4bb91a1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c @@ -0,0 +1,3 @@ +#define bar a foo +#define foo 1 +bar diff --git a/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected b/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected new file mode 100644 index 0000000..892975c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected @@ -0,0 +1,3 @@ + + +a 1 diff --git a/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c b/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c new file mode 100644 index 0000000..5784565 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c @@ -0,0 +1,6 @@ +#define foo a bar +#define bar b baz +#define baz c foo +foo +bar +baz diff --git a/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c.expected b/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c.expected new file mode 100644 index 0000000..0b0b477 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c.expected @@ -0,0 +1,6 @@ + + + +a b c foo +b c a bar +c a b baz diff --git a/src/compiler/glsl/glcpp/tests/008-define-empty.c b/src/compiler/glsl/glcpp/tests/008-define-empty.c new file mode 100644 index 0000000..b1bd17e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/008-define-empty.c @@ -0,0 +1,2 @@ +#define foo +foo diff --git a/src/compiler/glsl/glcpp/tests/008-define-empty.c.expected b/src/compiler/glsl/glcpp/tests/008-define-empty.c.expected new file mode 100644 index 0000000..d148bc8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/008-define-empty.c.expected @@ -0,0 +1,2 @@ + + diff --git a/src/compiler/glsl/glcpp/tests/009-undef.c b/src/compiler/glsl/glcpp/tests/009-undef.c new file mode 100644 index 0000000..3fc1fb4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/009-undef.c @@ -0,0 +1,4 @@ +#define foo 1 +foo +#undef foo +foo diff --git a/src/compiler/glsl/glcpp/tests/009-undef.c.expected b/src/compiler/glsl/glcpp/tests/009-undef.c.expected new file mode 100644 index 0000000..9c0b35a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/009-undef.c.expected @@ -0,0 +1,4 @@ + +1 + +foo diff --git a/src/compiler/glsl/glcpp/tests/010-undef-re-define.c b/src/compiler/glsl/glcpp/tests/010-undef-re-define.c new file mode 100644 index 0000000..32ff737 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/010-undef-re-define.c @@ -0,0 +1,6 @@ +#define foo 1 +foo +#undef foo +foo +#define foo 2 +foo diff --git a/src/compiler/glsl/glcpp/tests/010-undef-re-define.c.expected b/src/compiler/glsl/glcpp/tests/010-undef-re-define.c.expected new file mode 100644 index 0000000..5970f49 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/010-undef-re-define.c.expected @@ -0,0 +1,6 @@ + +1 + +foo + +2 diff --git a/src/compiler/glsl/glcpp/tests/011-define-func-empty.c b/src/compiler/glsl/glcpp/tests/011-define-func-empty.c new file mode 100644 index 0000000..d9ce13c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/011-define-func-empty.c @@ -0,0 +1,2 @@ +#define foo() +foo() diff --git a/src/compiler/glsl/glcpp/tests/011-define-func-empty.c.expected b/src/compiler/glsl/glcpp/tests/011-define-func-empty.c.expected new file mode 100644 index 0000000..d148bc8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/011-define-func-empty.c.expected @@ -0,0 +1,2 @@ + + diff --git a/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c b/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c new file mode 100644 index 0000000..c2bb730 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c @@ -0,0 +1,2 @@ +#define foo() bar +foo() diff --git a/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c.expected b/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c.expected new file mode 100644 index 0000000..9f075f2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c.expected @@ -0,0 +1,2 @@ + +bar diff --git a/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c b/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c new file mode 100644 index 0000000..f78fb8b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c @@ -0,0 +1,2 @@ +#define foo(x) 1 +foo(bar) diff --git a/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected b/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected new file mode 100644 index 0000000..a464d9d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c b/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c new file mode 100644 index 0000000..11feb26 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c @@ -0,0 +1,2 @@ +#define foo(x,y) 1 +foo(bar,baz) diff --git a/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected b/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected new file mode 100644 index 0000000..a464d9d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c b/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c new file mode 100644 index 0000000..558da9c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c @@ -0,0 +1,4 @@ +#define foo ()1 +foo() +#define bar ()2 +bar() diff --git a/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c.expected b/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c.expected new file mode 100644 index 0000000..a70321a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c.expected @@ -0,0 +1,4 @@ + +()1() + +()2() diff --git a/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c b/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c new file mode 100644 index 0000000..a2e2404 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c @@ -0,0 +1,2 @@ +#define foo(x) ((x)+1) +foo(bar) diff --git a/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c.expected b/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c.expected new file mode 100644 index 0000000..6bfe04f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c.expected @@ -0,0 +1,2 @@ + +((bar)+1) diff --git a/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c b/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c new file mode 100644 index 0000000..c725383 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c @@ -0,0 +1,2 @@ +#define foo(x,y) ((x)*(y)) +foo(bar,baz) diff --git a/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c.expected b/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c.expected new file mode 100644 index 0000000..f7a2b8c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c.expected @@ -0,0 +1,2 @@ + +((bar)*(baz)) diff --git a/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c b/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c new file mode 100644 index 0000000..668130b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c @@ -0,0 +1,3 @@ +#define x 0 +#define foo(x) x +foo(1) diff --git a/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected b/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected new file mode 100644 index 0000000..c6c9ee3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c b/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c new file mode 100644 index 0000000..c4e62b2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c @@ -0,0 +1,2 @@ +#define foo(x) (x) +foo(this is more than one word) diff --git a/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected b/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected new file mode 100644 index 0000000..1e89b8c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected @@ -0,0 +1,2 @@ + +(this is more than one word) diff --git a/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c b/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c new file mode 100644 index 0000000..3049ad1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c @@ -0,0 +1,2 @@ +#define foo(x,y) x,two fish,red fish,y +foo(one fish, blue fish) diff --git a/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected b/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected new file mode 100644 index 0000000..19f59f5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected @@ -0,0 +1,2 @@ + +one fish,two fish,red fish,blue fish diff --git a/src/compiler/glsl/glcpp/tests/021-define-func-compose.c b/src/compiler/glsl/glcpp/tests/021-define-func-compose.c new file mode 100644 index 0000000..21ddd0e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/021-define-func-compose.c @@ -0,0 +1,3 @@ +#define bar(x) (1+(x)) +#define foo(y) (2*(y)) +foo(bar(3)) diff --git a/src/compiler/glsl/glcpp/tests/021-define-func-compose.c.expected b/src/compiler/glsl/glcpp/tests/021-define-func-compose.c.expected new file mode 100644 index 0000000..87f51f0 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/021-define-func-compose.c.expected @@ -0,0 +1,3 @@ + + +(2*((1+(3)))) diff --git a/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c b/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c new file mode 100644 index 0000000..c20d73a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c @@ -0,0 +1,2 @@ +#define foo(x) (x) +foo(argument(including parens)for the win) diff --git a/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected b/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected new file mode 100644 index 0000000..1dfc669 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected @@ -0,0 +1,2 @@ + +(argument(including parens)for the win) diff --git a/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c b/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c new file mode 100644 index 0000000..7ebfed6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c @@ -0,0 +1,8 @@ +#define noargs() 1 +# define onearg(foo) foo + # define twoargs( x , y ) x y + # define threeargs( a , b , c ) a b c +noargs ( ) +onearg ( 2 ) +twoargs ( 3 , 4 ) +threeargs ( 5 , 6 , 7 ) diff --git a/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c.expected b/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c.expected new file mode 100644 index 0000000..9c58275 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c.expected @@ -0,0 +1,8 @@ + + + + +1 +2 +3 4 +5 6 7 diff --git a/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c b/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c new file mode 100644 index 0000000..e788adc --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c @@ -0,0 +1,3 @@ +#define foo foo +#define bar foo +bar diff --git a/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected b/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected new file mode 100644 index 0000000..15600af --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected @@ -0,0 +1,3 @@ + + +foo diff --git a/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c b/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c new file mode 100644 index 0000000..b433671 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo bar diff --git a/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected b/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected new file mode 100644 index 0000000..4a59f05 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected @@ -0,0 +1,2 @@ + +foo bar diff --git a/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c b/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c new file mode 100644 index 0000000..0d83740 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c @@ -0,0 +1,6 @@ +#define foo(a) bar + +foo +( +1 +) diff --git a/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected b/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected new file mode 100644 index 0000000..5e3c70f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected @@ -0,0 +1,3 @@ + + +bar diff --git a/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c b/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c new file mode 100644 index 0000000..5ccb52c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure() +foo diff --git a/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected b/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected new file mode 100644 index 0000000..94c15f9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c b/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c new file mode 100644 index 0000000..44962a7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c @@ -0,0 +1,3 @@ +#define success() failure +#define foo success +foo diff --git a/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected b/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected new file mode 100644 index 0000000..94c15f9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c b/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c new file mode 100644 index 0000000..261f7d2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c @@ -0,0 +1,3 @@ +#define bar(failure) failure +#define foo bar(success) +foo diff --git a/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected b/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected new file mode 100644 index 0000000..94c15f9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c b/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c new file mode 100644 index 0000000..e56fbef --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c @@ -0,0 +1,4 @@ +#define baz(failure) failure +#define bar(failure) failure +#define foo bar(baz(success)) +foo diff --git a/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected b/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected new file mode 100644 index 0000000..bed826e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c b/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c new file mode 100644 index 0000000..3f4c874 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c @@ -0,0 +1,4 @@ +#define baz(failure) failure +#define bar(failure) failure +#define foo() bar(baz(success)) +foo() diff --git a/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected b/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected new file mode 100644 index 0000000..bed826e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c b/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c new file mode 100644 index 0000000..b3ac70f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c @@ -0,0 +1,2 @@ +#define foo(a) foo(2*(a)) +foo(3) diff --git a/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c.expected b/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c.expected new file mode 100644 index 0000000..983f941 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c.expected @@ -0,0 +1,2 @@ + +foo(2*(3)) diff --git a/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c b/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c new file mode 100644 index 0000000..f65e482 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c @@ -0,0 +1,2 @@ +#define foo(a) foo(2*(a)) +foo(foo(3)) diff --git a/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c.expected b/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c.expected new file mode 100644 index 0000000..0818362 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c.expected @@ -0,0 +1,2 @@ + +foo(2*(foo(2*(3)))) diff --git a/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c b/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c new file mode 100644 index 0000000..209a5f7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo(foo) diff --git a/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected b/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected new file mode 100644 index 0000000..3f808fe --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected @@ -0,0 +1,2 @@ + +foo diff --git a/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c b/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c new file mode 100644 index 0000000..c307fbe --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo(1+foo) diff --git a/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected b/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected new file mode 100644 index 0000000..09dfdd6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected @@ -0,0 +1,2 @@ + +1+foo diff --git a/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c b/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c new file mode 100644 index 0000000..b21ff33 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c @@ -0,0 +1,3 @@ +#define bar success +#define foo(x) x +foo(more bar) diff --git a/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected b/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected new file mode 100644 index 0000000..580ed95 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected @@ -0,0 +1,3 @@ + + +more success diff --git a/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c b/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c new file mode 100644 index 0000000..b3a2f37 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c @@ -0,0 +1,3 @@ +#define expand(x) expand(x once) +#define foo(x) x +foo(expand(just)) diff --git a/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected b/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected new file mode 100644 index 0000000..e804d7e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected @@ -0,0 +1,3 @@ + + +expand(just once) diff --git a/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c b/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c new file mode 100644 index 0000000..1407c7d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c @@ -0,0 +1,2 @@ +#define foo(x) success +foo(argument (with,embedded , commas) -- tricky) diff --git a/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c.expected b/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c.expected new file mode 100644 index 0000000..6544adb --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c.expected @@ -0,0 +1,2 @@ + +success diff --git a/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c b/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c new file mode 100644 index 0000000..a7c053b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c @@ -0,0 +1,24 @@ +/* This works. */ +#define foo(a) (a) +#define bar two,words +foo(bar) + +/* So does this. */ +#define foo2(a,b) (a separate b) +#define foo2_wrap(a) foo2(a) +foo2_wrap(bar) + +/* But this generates an error. */ +#define foo_wrap(a) foo(a) +foo_wrap(bar) + +/* Adding parentheses to foo_wrap fixes it. */ +#define foo_wrap_parens(a) foo((a)) +foo_wrap_parens(bar) + +/* As does adding parentheses to bar */ +#define bar_parens (two,words) +foo_wrap(bar_parens) +foo_wrap_parens(bar_parens) + + diff --git a/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected b/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected new file mode 100644 index 0000000..4cc7953 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected @@ -0,0 +1,26 @@ +0:12(21): preprocessor error: Error: macro foo invoked with 2 arguments (expected 1) + + + + +(two,words) + + + + +(two separate words) + + + +foo(two,words) + + + +((two,words)) + + + +((two,words)) +(((two,words))) + + diff --git a/src/compiler/glsl/glcpp/tests/040-token-pasting.c b/src/compiler/glsl/glcpp/tests/040-token-pasting.c new file mode 100644 index 0000000..caab3ba --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/040-token-pasting.c @@ -0,0 +1,2 @@ +#define paste(a,b) a ## b +paste(one , token) diff --git a/src/compiler/glsl/glcpp/tests/040-token-pasting.c.expected b/src/compiler/glsl/glcpp/tests/040-token-pasting.c.expected new file mode 100644 index 0000000..48e836e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/040-token-pasting.c.expected @@ -0,0 +1,2 @@ + +onetoken diff --git a/src/compiler/glsl/glcpp/tests/041-if-0.c b/src/compiler/glsl/glcpp/tests/041-if-0.c new file mode 100644 index 0000000..2cab677 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/041-if-0.c @@ -0,0 +1,5 @@ +success_1 +#if 0 +failure +#endif +success_2 diff --git a/src/compiler/glsl/glcpp/tests/041-if-0.c.expected b/src/compiler/glsl/glcpp/tests/041-if-0.c.expected new file mode 100644 index 0000000..8b506b3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/041-if-0.c.expected @@ -0,0 +1,5 @@ +success_1 + + + +success_2 diff --git a/src/compiler/glsl/glcpp/tests/042-if-1.c b/src/compiler/glsl/glcpp/tests/042-if-1.c new file mode 100644 index 0000000..874a25c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/042-if-1.c @@ -0,0 +1,5 @@ +success_1 +#if 1 +success_2 +#endif +success_3 diff --git a/src/compiler/glsl/glcpp/tests/042-if-1.c.expected b/src/compiler/glsl/glcpp/tests/042-if-1.c.expected new file mode 100644 index 0000000..a6ae946 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/042-if-1.c.expected @@ -0,0 +1,5 @@ +success_1 + +success_2 + +success_3 diff --git a/src/compiler/glsl/glcpp/tests/043-if-0-else.c b/src/compiler/glsl/glcpp/tests/043-if-0-else.c new file mode 100644 index 0000000..323351f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/043-if-0-else.c @@ -0,0 +1,7 @@ +success_1 +#if 0 +failure +#else +success_2 +#endif +success_3 diff --git a/src/compiler/glsl/glcpp/tests/043-if-0-else.c.expected b/src/compiler/glsl/glcpp/tests/043-if-0-else.c.expected new file mode 100644 index 0000000..3d7e6be --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/043-if-0-else.c.expected @@ -0,0 +1,7 @@ +success_1 + + + +success_2 + +success_3 diff --git a/src/compiler/glsl/glcpp/tests/044-if-1-else.c b/src/compiler/glsl/glcpp/tests/044-if-1-else.c new file mode 100644 index 0000000..28dfc25 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/044-if-1-else.c @@ -0,0 +1,7 @@ +success_1 +#if 1 +success_2 +#else +failure +#endif +success_3 diff --git a/src/compiler/glsl/glcpp/tests/044-if-1-else.c.expected b/src/compiler/glsl/glcpp/tests/044-if-1-else.c.expected new file mode 100644 index 0000000..4a31e1c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/044-if-1-else.c.expected @@ -0,0 +1,7 @@ +success_1 + +success_2 + + + +success_3 diff --git a/src/compiler/glsl/glcpp/tests/045-if-0-elif.c b/src/compiler/glsl/glcpp/tests/045-if-0-elif.c new file mode 100644 index 0000000..e50f686 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/045-if-0-elif.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#elif 0 +failure_2 +#elif 1 +success_3 +#elif 1 +failure_3 +#endif +success_4 diff --git a/src/compiler/glsl/glcpp/tests/045-if-0-elif.c.expected b/src/compiler/glsl/glcpp/tests/045-if-0-elif.c.expected new file mode 100644 index 0000000..a9bb158 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/045-if-0-elif.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + +success_3 + + + +success_4 diff --git a/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c b/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c new file mode 100644 index 0000000..130515a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c @@ -0,0 +1,11 @@ +success_1 +#if 1 +success_2 +#elif 0 +failure_1 +#elif 1 +failure_2 +#elif 0 +failure_3 +#endif +success_3 diff --git a/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c.expected b/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c.expected new file mode 100644 index 0000000..a499571 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c.expected @@ -0,0 +1,11 @@ +success_1 + +success_2 + + + + + + + +success_3 diff --git a/src/compiler/glsl/glcpp/tests/047-if-elif-else.c b/src/compiler/glsl/glcpp/tests/047-if-elif-else.c new file mode 100644 index 0000000..e8f0838 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/047-if-elif-else.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#elif 0 +failure_2 +#elif 0 +failure_3 +#else +success_2 +#endif +success_3 diff --git a/src/compiler/glsl/glcpp/tests/047-if-elif-else.c.expected b/src/compiler/glsl/glcpp/tests/047-if-elif-else.c.expected new file mode 100644 index 0000000..54d3086 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/047-if-elif-else.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + + + +success_2 + +success_3 diff --git a/src/compiler/glsl/glcpp/tests/048-if-nested.c b/src/compiler/glsl/glcpp/tests/048-if-nested.c new file mode 100644 index 0000000..fc4679c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/048-if-nested.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#if 1 +failure_2 +#else +failure_3 +#endif +failure_4 +#endif +success_2 diff --git a/src/compiler/glsl/glcpp/tests/048-if-nested.c.expected b/src/compiler/glsl/glcpp/tests/048-if-nested.c.expected new file mode 100644 index 0000000..8beb9c3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/048-if-nested.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + + + + + +success_2 diff --git a/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c b/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c new file mode 100644 index 0000000..833ea03 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c @@ -0,0 +1,5 @@ +#if 1 + 2 * 3 + - (25 % 17 - + 1) +failure with operator precedence +#else +success +#endif diff --git a/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c.expected b/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c.expected new file mode 100644 index 0000000..729bdd1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c.expected @@ -0,0 +1,5 @@ + + + +success + diff --git a/src/compiler/glsl/glcpp/tests/050-if-defined.c b/src/compiler/glsl/glcpp/tests/050-if-defined.c new file mode 100644 index 0000000..34f0f95 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/050-if-defined.c @@ -0,0 +1,17 @@ +#if defined foo +failure_1 +#else +success_1 +#endif +#define foo +#if defined foo +success_2 +#else +failure_2 +#endif +#undef foo +#if defined foo +failure_3 +#else +success_3 +#endif diff --git a/src/compiler/glsl/glcpp/tests/050-if-defined.c.expected b/src/compiler/glsl/glcpp/tests/050-if-defined.c.expected new file mode 100644 index 0000000..737eb8d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/050-if-defined.c.expected @@ -0,0 +1,17 @@ + + + +success_1 + + + +success_2 + + + + + + + +success_3 + diff --git a/src/compiler/glsl/glcpp/tests/051-if-relational.c b/src/compiler/glsl/glcpp/tests/051-if-relational.c new file mode 100644 index 0000000..c3db488 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/051-if-relational.c @@ -0,0 +1,35 @@ +#if 3 < 2 +failure_1 +#else +success_1 +#endif + +#if 3 >= 2 +success_2 +#else +failure_2 +#endif + +#if 2 + 3 <= 5 +success_3 +#else +failure_3 +#endif + +#if 3 - 2 == 1 +success_3 +#else +failure_3 +#endif + +#if 1 > 3 +failure_4 +#else +success_4 +#endif + +#if 1 != 5 +success_5 +#else +failure_5 +#endif diff --git a/src/compiler/glsl/glcpp/tests/051-if-relational.c.expected b/src/compiler/glsl/glcpp/tests/051-if-relational.c.expected new file mode 100644 index 0000000..652fefd --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/051-if-relational.c.expected @@ -0,0 +1,35 @@ + + + +success_1 + + + +success_2 + + + + + +success_3 + + + + + +success_3 + + + + + + + +success_4 + + + +success_5 + + + diff --git a/src/compiler/glsl/glcpp/tests/052-if-bitwise.c b/src/compiler/glsl/glcpp/tests/052-if-bitwise.c new file mode 100644 index 0000000..2d8e45e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/052-if-bitwise.c @@ -0,0 +1,20 @@ +#if (0xaaaaaaaa | 0x55555555) != 4294967295 +failure_1 +#else +success_1 +#endif +#if (0x12345678 ^ 0xfdecba98) == 4023971040 +success_2 +#else +failure_2 +#endif +#if (~ 0xdeadbeef) != -3735928560 +failure_3 +#else +success_3 +#endif +#if (0667 & 0733) == 403 +success_4 +#else +failure_4 +#endif diff --git a/src/compiler/glsl/glcpp/tests/052-if-bitwise.c.expected b/src/compiler/glsl/glcpp/tests/052-if-bitwise.c.expected new file mode 100644 index 0000000..44e52b2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/052-if-bitwise.c.expected @@ -0,0 +1,20 @@ + + + +success_1 + + +success_2 + + + + + + +success_3 + + +success_4 + + + diff --git a/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c b/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c new file mode 100644 index 0000000..d24c54a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c @@ -0,0 +1,15 @@ +#if (15 / 2) != 7 +failure_1 +#else +success_1 +#endif +#if (1 << 12) == 4096 +success_2 +#else +failure_2 +#endif +#if (31762 >> 8) != 124 +failure_3 +#else +success_3 +#endif diff --git a/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c.expected b/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c.expected new file mode 100644 index 0000000..7e78e04 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c.expected @@ -0,0 +1,15 @@ + + + +success_1 + + +success_2 + + + + + + +success_3 + diff --git a/src/compiler/glsl/glcpp/tests/054-if-with-macros.c b/src/compiler/glsl/glcpp/tests/054-if-with-macros.c new file mode 100644 index 0000000..3da79a0 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/054-if-with-macros.c @@ -0,0 +1,34 @@ +#define one 1 +#define two 2 +#define three 3 +#define five 5 +#if five < two +failure_1 +#else +success_1 +#endif +#if three >= two +success_2 +#else +failure_2 +#endif +#if two + three <= five +success_3 +#else +failure_3 +#endif +#if five - two == three +success_4 +#else +failure_4 +#endif +#if one > three +failure_5 +#else +success_5 +#endif +#if one != five +success_6 +#else +failure_6 +#endif diff --git a/src/compiler/glsl/glcpp/tests/054-if-with-macros.c.expected b/src/compiler/glsl/glcpp/tests/054-if-with-macros.c.expected new file mode 100644 index 0000000..70f737c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/054-if-with-macros.c.expected @@ -0,0 +1,34 @@ + + + + + + + +success_1 + + +success_2 + + + + +success_3 + + + + +success_4 + + + + + + +success_5 + + +success_6 + + + diff --git a/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c b/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c new file mode 100644 index 0000000..00f2c23 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure +foo() diff --git a/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected b/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected new file mode 100644 index 0000000..94c15f9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c b/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c new file mode 100644 index 0000000..58701d1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c @@ -0,0 +1,4 @@ +#define bar with,embedded,commas +#define function(x) success +#define foo function +foo(bar) diff --git a/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected b/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected new file mode 100644 index 0000000..bed826e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/src/compiler/glsl/glcpp/tests/057-empty-arguments.c b/src/compiler/glsl/glcpp/tests/057-empty-arguments.c new file mode 100644 index 0000000..6140232 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/057-empty-arguments.c @@ -0,0 +1,6 @@ +#define zero() success +zero() +#define one(x) success +one() +#define two(x,y) success +two(,) diff --git a/src/compiler/glsl/glcpp/tests/057-empty-arguments.c.expected b/src/compiler/glsl/glcpp/tests/057-empty-arguments.c.expected new file mode 100644 index 0000000..7d97e15 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/057-empty-arguments.c.expected @@ -0,0 +1,6 @@ + +success + +success + +success diff --git a/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c b/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c new file mode 100644 index 0000000..8ac260c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c @@ -0,0 +1,5 @@ +#define paste(x,y) x ## y +paste(a,b) +paste(a,) +paste(,b) +paste(,) diff --git a/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected b/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected new file mode 100644 index 0000000..e0967a1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected @@ -0,0 +1,5 @@ + +ab +a +b + diff --git a/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c b/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c new file mode 100644 index 0000000..37b895a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c @@ -0,0 +1,4 @@ +#define paste(x,y) x ## y +paste(1,2) +paste(1,000) +paste(identifier,2) diff --git a/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c.expected b/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c.expected new file mode 100644 index 0000000..f1288aa --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c.expected @@ -0,0 +1,4 @@ + +12 +1000 +identifier2 diff --git a/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c b/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c new file mode 100644 index 0000000..ed80ea8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c @@ -0,0 +1,3 @@ +#define double(a) a*2 +#define foo double( +foo 5) diff --git a/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected b/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected new file mode 100644 index 0000000..3e5501a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected @@ -0,0 +1,3 @@ + + +5*2 diff --git a/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c b/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c new file mode 100644 index 0000000..6dbfd1f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c @@ -0,0 +1,5 @@ +#define foo(x) success +#define bar foo +#define baz bar +#define joe baz +joe (failure) diff --git a/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected b/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected new file mode 100644 index 0000000..15eb64b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected @@ -0,0 +1,5 @@ + + + + +success diff --git a/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c b/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c new file mode 100644 index 0000000..d9e439b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c @@ -0,0 +1,5 @@ +#define foo(a,b) +#if 0 +foo(bar) +foo( +#endif diff --git a/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected b/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected new file mode 100644 index 0000000..3f2ff2d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected @@ -0,0 +1,5 @@ + + + + + diff --git a/src/compiler/glsl/glcpp/tests/063-comments.c b/src/compiler/glsl/glcpp/tests/063-comments.c new file mode 100644 index 0000000..e641d2f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/063-comments.c @@ -0,0 +1,20 @@ +/* this is a comment */ +// so is this +// */ +f = g/**//h; +/*//*/l(); +m = n//**/o ++ p; +/* this +comment spans +multiple lines and +contains *** stars +and slashes / *** / +and other stuff. +****/ +more code here +/* Test that /* nested + comments */ +are not treated like comments. +/*/ this is a comment */ +/*/*/ diff --git a/src/compiler/glsl/glcpp/tests/063-comments.c.expected b/src/compiler/glsl/glcpp/tests/063-comments.c.expected new file mode 100644 index 0000000..f6e10ce --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/063-comments.c.expected @@ -0,0 +1,20 @@ + + + +f = g /h; + l(); +m = n ++ p; + + + + + + + +more code here + + +are not treated like comments. + + diff --git a/src/compiler/glsl/glcpp/tests/064-version.c b/src/compiler/glsl/glcpp/tests/064-version.c new file mode 100644 index 0000000..2132648 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/064-version.c @@ -0,0 +1,2 @@ +#version 130 +#define FOO diff --git a/src/compiler/glsl/glcpp/tests/064-version.c.expected b/src/compiler/glsl/glcpp/tests/064-version.c.expected new file mode 100644 index 0000000..4036b1e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/064-version.c.expected @@ -0,0 +1,2 @@ +#version 130 + diff --git a/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c b/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c new file mode 100644 index 0000000..48aa0f8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c @@ -0,0 +1,17 @@ +#if defined(foo) +failure_1 +#else +success_1 +#endif +#define foo +#if defined ( foo ) +success_2 +#else +failure_2 +#endif +#undef foo +#if defined (foo) +failure_3 +#else +success_3 +#endif diff --git a/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c.expected b/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c.expected new file mode 100644 index 0000000..737eb8d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c.expected @@ -0,0 +1,17 @@ + + + +success_1 + + + +success_2 + + + + + + + +success_3 + diff --git a/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c b/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c new file mode 100644 index 0000000..3b0b473 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c @@ -0,0 +1,3 @@ +#if(1) +success +#endif diff --git a/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c.expected b/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c.expected new file mode 100644 index 0000000..5a28fb3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c.expected @@ -0,0 +1,3 @@ + +success + diff --git a/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c b/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c new file mode 100644 index 0000000..f46cce4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c @@ -0,0 +1,40 @@ +#define D1 +#define D2 + +#define result success + +#ifdef U1 +#ifdef U2 +#undef result +#define result failure +#endif +#endif +result + +#ifndef D1 +#ifndef D2 +#undef result +#define result failure +#endif +#endif +result + +#undef result +#define result failure +#ifdef D1 +#ifdef D2 +#undef result +#define result success +#endif +#endif +result + +#undef result +#define result failure +#ifndef U1 +#ifndef U2 +#undef result +#define result success +#endif +#endif +result diff --git a/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected b/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected new file mode 100644 index 0000000..9a5ed2e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected @@ -0,0 +1,40 @@ + + + + + + + + + + + +success + + + + + + + +success + + + + + + + + + +success + + + + + + + + + +success diff --git a/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c b/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c new file mode 100644 index 0000000..699ac51 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c @@ -0,0 +1,11 @@ +#define empty +<empty< +<empty= +>empty> +>empty= +=empty= +!empty= +&empty& +|empty| ++empty+ +-empty- diff --git a/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c.expected b/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c.expected new file mode 100644 index 0000000..27582cd --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c.expected @@ -0,0 +1,11 @@ + +< < +< = +> > +> = += = +! = +& & +| | ++ + +- - diff --git a/src/compiler/glsl/glcpp/tests/069-repeated-argument.c b/src/compiler/glsl/glcpp/tests/069-repeated-argument.c new file mode 100644 index 0000000..2b46ead --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/069-repeated-argument.c @@ -0,0 +1,2 @@ +#define double(x) x x +double(1) diff --git a/src/compiler/glsl/glcpp/tests/069-repeated-argument.c.expected b/src/compiler/glsl/glcpp/tests/069-repeated-argument.c.expected new file mode 100644 index 0000000..8b4b095 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/069-repeated-argument.c.expected @@ -0,0 +1,2 @@ + +1 1 diff --git a/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c b/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c new file mode 100644 index 0000000..d15a484 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c @@ -0,0 +1,5 @@ +#if UNDEFINED_MACRO +Failure +#else +Success +#endif diff --git a/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected b/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected new file mode 100644 index 0000000..44b93a4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected @@ -0,0 +1,5 @@ + + + +Success + diff --git a/src/compiler/glsl/glcpp/tests/071-punctuator.c b/src/compiler/glsl/glcpp/tests/071-punctuator.c new file mode 100644 index 0000000..959d682 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/071-punctuator.c @@ -0,0 +1 @@ +a = b diff --git a/src/compiler/glsl/glcpp/tests/071-punctuator.c.expected b/src/compiler/glsl/glcpp/tests/071-punctuator.c.expected new file mode 100644 index 0000000..959d682 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/071-punctuator.c.expected @@ -0,0 +1 @@ +a = b diff --git a/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c b/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c new file mode 100644 index 0000000..e421e9d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c @@ -0,0 +1,2 @@ +#define paste(x) success_ ## x +paste(1) paste(2) paste(3) diff --git a/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c.expected b/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c.expected new file mode 100644 index 0000000..7b80af7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c.expected @@ -0,0 +1,2 @@ + +success_1 success_2 success_3 diff --git a/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c b/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c new file mode 100644 index 0000000..61a4809 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c @@ -0,0 +1,4 @@ +#ifdef UNDEF +#if UNDEF > 1 +#endif +#endif diff --git a/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c.expected b/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c.expected new file mode 100644 index 0000000..fd40910 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c.expected @@ -0,0 +1,4 @@ + + + + diff --git a/src/compiler/glsl/glcpp/tests/074-elif-undef.c b/src/compiler/glsl/glcpp/tests/074-elif-undef.c new file mode 100644 index 0000000..67aac89 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/074-elif-undef.c @@ -0,0 +1,3 @@ +#ifndef UNDEF +#elif UNDEF < 0 +#endif diff --git a/src/compiler/glsl/glcpp/tests/074-elif-undef.c.expected b/src/compiler/glsl/glcpp/tests/074-elif-undef.c.expected new file mode 100644 index 0000000..b28b04f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/074-elif-undef.c.expected @@ -0,0 +1,3 @@ + + + diff --git a/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c b/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c new file mode 100644 index 0000000..264bc4f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c @@ -0,0 +1,4 @@ +#ifndef UNDEF +#elif UNDEF < 0 +#elif UNDEF == 3 +#endif diff --git a/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c.expected b/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c.expected new file mode 100644 index 0000000..fd40910 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c.expected @@ -0,0 +1,4 @@ + + + + diff --git a/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c b/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c new file mode 100644 index 0000000..ebd550e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c @@ -0,0 +1,5 @@ +#ifdef UNDEF +#if UNDEF == 4 +#elif UNDEF == 5 +#endif +#endif diff --git a/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c.expected b/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c.expected new file mode 100644 index 0000000..3f2ff2d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c.expected @@ -0,0 +1,5 @@ + + + + + diff --git a/src/compiler/glsl/glcpp/tests/077-else-without-if.c b/src/compiler/glsl/glcpp/tests/077-else-without-if.c new file mode 100644 index 0000000..81f00bf --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/077-else-without-if.c @@ -0,0 +1 @@ +#else diff --git a/src/compiler/glsl/glcpp/tests/077-else-without-if.c.expected b/src/compiler/glsl/glcpp/tests/077-else-without-if.c.expected new file mode 100644 index 0000000..69f3404 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/077-else-without-if.c.expected @@ -0,0 +1,3 @@ +0:1(1): preprocessor error: #else without #if + + diff --git a/src/compiler/glsl/glcpp/tests/078-elif-without-if.c b/src/compiler/glsl/glcpp/tests/078-elif-without-if.c new file mode 100644 index 0000000..60466b3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/078-elif-without-if.c @@ -0,0 +1 @@ +#elif defined FOO diff --git a/src/compiler/glsl/glcpp/tests/078-elif-without-if.c.expected b/src/compiler/glsl/glcpp/tests/078-elif-without-if.c.expected new file mode 100644 index 0000000..b8e40ec --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/078-elif-without-if.c.expected @@ -0,0 +1,3 @@ +0:1(1): preprocessor error: #elif without #if + + diff --git a/src/compiler/glsl/glcpp/tests/079-endif-without-if.c b/src/compiler/glsl/glcpp/tests/079-endif-without-if.c new file mode 100644 index 0000000..69331c3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/079-endif-without-if.c @@ -0,0 +1 @@ +#endif diff --git a/src/compiler/glsl/glcpp/tests/079-endif-without-if.c.expected b/src/compiler/glsl/glcpp/tests/079-endif-without-if.c.expected new file mode 100644 index 0000000..7ae579d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/079-endif-without-if.c.expected @@ -0,0 +1,3 @@ +0:1(1): preprocessor error: #endif without #if + + diff --git a/src/compiler/glsl/glcpp/tests/080-if-without-expression.c b/src/compiler/glsl/glcpp/tests/080-if-without-expression.c new file mode 100644 index 0000000..a27ba36 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/080-if-without-expression.c @@ -0,0 +1,4 @@ +/* Error message for unskipped #if with no expression. */ +#if +#endif + diff --git a/src/compiler/glsl/glcpp/tests/080-if-without-expression.c.expected b/src/compiler/glsl/glcpp/tests/080-if-without-expression.c.expected new file mode 100644 index 0000000..2e4cd73 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/080-if-without-expression.c.expected @@ -0,0 +1,5 @@ +0:2(1): preprocessor error: #if with no expression + + + + diff --git a/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c b/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c new file mode 100644 index 0000000..79c7866 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c @@ -0,0 +1,3 @@ +#if 0 +#elif +#endif diff --git a/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c.expected b/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c.expected new file mode 100644 index 0000000..b607b84 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c.expected @@ -0,0 +1,4 @@ +0:2(1): preprocessor error: #elif with no expression + + + diff --git a/src/compiler/glsl/glcpp/tests/082-invalid-paste.c b/src/compiler/glsl/glcpp/tests/082-invalid-paste.c new file mode 100644 index 0000000..8b84d50 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/082-invalid-paste.c @@ -0,0 +1,7 @@ +#define PASTE(x,y) x ## y +PASTE(<,>) +PASTE(0,abc) +PASTE(1,=) +PASTE(2,@) +PASTE(3,-4) +PASTE(4,+5.2) diff --git a/src/compiler/glsl/glcpp/tests/082-invalid-paste.c.expected b/src/compiler/glsl/glcpp/tests/082-invalid-paste.c.expected new file mode 100644 index 0000000..b48a2d6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/082-invalid-paste.c.expected @@ -0,0 +1,19 @@ +0:2(7): preprocessor error: +Pasting "<" and ">" does not give a valid preprocessing token. +0:3(7): preprocessor error: +Pasting "0" and "abc" does not give a valid preprocessing token. +0:4(7): preprocessor error: +Pasting "1" and "=" does not give a valid preprocessing token. +0:5(7): preprocessor error: +Pasting "2" and "@" does not give a valid preprocessing token. +0:6(7): preprocessor error: +Pasting "3" and "-" does not give a valid preprocessing token. +0:7(7): preprocessor error: +Pasting "4" and "+" does not give a valid preprocessing token. + +< +0 +1 +2 +34 +45.2 diff --git a/src/compiler/glsl/glcpp/tests/083-unterminated-if.c b/src/compiler/glsl/glcpp/tests/083-unterminated-if.c new file mode 100644 index 0000000..9180635 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/083-unterminated-if.c @@ -0,0 +1,2 @@ +#if 1 + diff --git a/src/compiler/glsl/glcpp/tests/083-unterminated-if.c.expected b/src/compiler/glsl/glcpp/tests/083-unterminated-if.c.expected new file mode 100644 index 0000000..4659ab6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/083-unterminated-if.c.expected @@ -0,0 +1,4 @@ +0:1(6): preprocessor error: Unterminated #if + + + diff --git a/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c b/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c new file mode 100644 index 0000000..0789ba5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c @@ -0,0 +1,2 @@ +#define FUNC(x) (2*(x)) +FUNC(23 diff --git a/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected b/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected new file mode 100644 index 0000000..af49a37 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected @@ -0,0 +1,2 @@ +0:2(8): preprocessor error: syntax error, unexpected $end + diff --git a/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c b/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c new file mode 100644 index 0000000..91bea60 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c @@ -0,0 +1,5 @@ +#define MULT(x,y) ((x)*(y)) +MULT() +MULT(1) +MULT(1,2,3) + diff --git a/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c.expected b/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c.expected new file mode 100644 index 0000000..d23845b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c.expected @@ -0,0 +1,11 @@ +0:2(1): preprocessor error: Error: macro MULT invoked with 1 arguments (expected 2) + +0:3(1): preprocessor error: Error: macro MULT invoked with 1 arguments (expected 2) + +0:4(1): preprocessor error: Error: macro MULT invoked with 3 arguments (expected 2) + + +MULT() +MULT(1) +MULT(1,2,3) + diff --git a/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c b/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c new file mode 100644 index 0000000..a6b7201 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c @@ -0,0 +1,3 @@ +#define __BAD reserved +#define GL_ALSO_BAD() also reserved +#define THIS__TOO__IS__BAD reserved diff --git a/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c.expected b/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c.expected new file mode 100644 index 0000000..38b089d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c.expected @@ -0,0 +1,9 @@ +0:1(9): preprocessor warning: Macro names containing "__" are reserved for use by the implementation. + +0:2(9): preprocessor error: Macro names starting with "GL_" are reserved. + +0:3(9): preprocessor warning: Macro names containing "__" are reserved for use by the implementation. + + + + diff --git a/src/compiler/glsl/glcpp/tests/087-if-comments.c b/src/compiler/glsl/glcpp/tests/087-if-comments.c new file mode 100644 index 0000000..ce8dc43 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/087-if-comments.c @@ -0,0 +1,5 @@ +#if (1 == 0) // dangerous comment +fail +#else +win +#endif diff --git a/src/compiler/glsl/glcpp/tests/087-if-comments.c.expected b/src/compiler/glsl/glcpp/tests/087-if-comments.c.expected new file mode 100644 index 0000000..2783a9c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/087-if-comments.c.expected @@ -0,0 +1,5 @@ + + + +win + diff --git a/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c b/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c new file mode 100644 index 0000000..422c654 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c @@ -0,0 +1,5 @@ +#define abc 123 +#define abc 123 + +#define foo(x) ( x ) + 23 +#define foo(x) ( x ) + 23 diff --git a/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected b/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected new file mode 100644 index 0000000..3f2ff2d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected @@ -0,0 +1,5 @@ + + + + + diff --git a/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c b/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c new file mode 100644 index 0000000..b3d1391 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c @@ -0,0 +1,17 @@ +#define x y +#define x z + +#define abc 123 +#define abc() 123 + +#define foo() bar +#define foo(x) bar + +#define bar() baz +#define bar baz + +#define biff(a,b) a+b +#define biff(a,b,c) a+b + +#define oper(a,b) a+b +#define oper(a,b) a*b diff --git a/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c.expected b/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c.expected new file mode 100644 index 0000000..a945161 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c.expected @@ -0,0 +1,29 @@ +0:2(9): preprocessor error: Redefinition of macro x + +0:5(9): preprocessor error: Redefinition of macro abc + +0:8(9): preprocessor error: Redefinition of macro foo + +0:11(9): preprocessor error: Redefinition of macro bar + +0:14(9): preprocessor error: Redefinition of macro biff + +0:17(9): preprocessor error: Redefinition of macro oper + + + + + + + + + + + + + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/090-hash-error.c b/src/compiler/glsl/glcpp/tests/090-hash-error.c new file mode 100644 index 0000000..d19bb7f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/090-hash-error.c @@ -0,0 +1 @@ +#error human error diff --git a/src/compiler/glsl/glcpp/tests/090-hash-error.c.expected b/src/compiler/glsl/glcpp/tests/090-hash-error.c.expected new file mode 100644 index 0000000..876a6ea --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/090-hash-error.c.expected @@ -0,0 +1 @@ +0:1(1): preprocessor error: #error human error diff --git a/src/compiler/glsl/glcpp/tests/091-hash-line.c b/src/compiler/glsl/glcpp/tests/091-hash-line.c new file mode 100644 index 0000000..26d7038 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/091-hash-line.c @@ -0,0 +1,14 @@ +#line 0 +#error line 0 error +#line 25 +#error line 25 error +#line 0 1 +#error source 1, line 0 error +#line 30 2 +#error source 2, line 30 error +#line 45 2 /* A line with a comment */ +#define NINETY 90 +#define TWO 2 +#line NINETY TWO /* A #line line with macro expansion */ +#define FUNCTION_LIKE_MACRO(source, line) source line +#line FUNCTION_LIKE_MACRO(180,2) diff --git a/src/compiler/glsl/glcpp/tests/091-hash-line.c.expected b/src/compiler/glsl/glcpp/tests/091-hash-line.c.expected new file mode 100644 index 0000000..ac9ab25 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/091-hash-line.c.expected @@ -0,0 +1,14 @@ +0:0(1): preprocessor error: #error line 0 error +0:25(1): preprocessor error: #error line 25 error +1:0(1): preprocessor error: #error source 1, line 0 error +2:30(1): preprocessor error: #error source 2, line 30 error +#line 0 +#line 25 +#line 0 1 +#line 30 2 +#line 45 2 + + +#line 90 2 + +#line 180 2 diff --git a/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c b/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c new file mode 100644 index 0000000..3c161a5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c @@ -0,0 +1,5 @@ +#define A +#define A 1 + +#define B 1 +#define B diff --git a/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected b/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected new file mode 100644 index 0000000..698294d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected @@ -0,0 +1,9 @@ +0:2(9): preprocessor error: Redefinition of macro A + +0:5(9): preprocessor error: Redefinition of macro B + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c b/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c new file mode 100644 index 0000000..bf65d4f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c @@ -0,0 +1,2 @@ +#if (1 / 0) +#endif diff --git a/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c.expected b/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c.expected new file mode 100644 index 0000000..a858870 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c.expected @@ -0,0 +1,3 @@ +0:1(12): preprocessor error: division by 0 in preprocessor directive + + diff --git a/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c b/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c new file mode 100644 index 0000000..04497b1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c @@ -0,0 +1,13 @@ +/* glcpp is generating a division-by-zero error for this case. It's + * easy to argue that it should be short-circuiting the evaluation and + * not generating the diagnostic (which happens to be what gcc does). + * But it doesn't seem like we should force this behavior on our + * pre-processor, (and, as always, the GLSL specification of the + * pre-processor is too vague on this point). + * + * If a short-circuit evaluation optimization does get added to the + * pre-processor then it would legitimate to update the expected file + * for this test. +*/ +#if 1 || (1 / 0) +#endif diff --git a/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected b/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected new file mode 100644 index 0000000..570952b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected @@ -0,0 +1,14 @@ +0:12(17): preprocessor error: division by 0 in preprocessor directive + + + + + + + + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/095-recursive-define.c b/src/compiler/glsl/glcpp/tests/095-recursive-define.c new file mode 100644 index 0000000..801d90c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/095-recursive-define.c @@ -0,0 +1,3 @@ +#define A(a, b) B(a, b) +#define C A(0, C) +C diff --git a/src/compiler/glsl/glcpp/tests/095-recursive-define.c.expected b/src/compiler/glsl/glcpp/tests/095-recursive-define.c.expected new file mode 100644 index 0000000..493ab09 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/095-recursive-define.c.expected @@ -0,0 +1,3 @@ + + +B(0, C) diff --git a/src/compiler/glsl/glcpp/tests/096-paste-twice.c b/src/compiler/glsl/glcpp/tests/096-paste-twice.c new file mode 100644 index 0000000..8da756f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/096-paste-twice.c @@ -0,0 +1,3 @@ +#define paste_twice(a,b,c) a ## b ## c +paste_twice(just, one, token) + diff --git a/src/compiler/glsl/glcpp/tests/096-paste-twice.c.expected b/src/compiler/glsl/glcpp/tests/096-paste-twice.c.expected new file mode 100644 index 0000000..96c57d2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/096-paste-twice.c.expected @@ -0,0 +1,3 @@ + +justonetoken + diff --git a/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c b/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c new file mode 100644 index 0000000..0f46835 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c @@ -0,0 +1,3 @@ +#define PASTE_MACRO one ## token +PASTE_MACRO + diff --git a/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected b/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected new file mode 100644 index 0000000..36f6699 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected @@ -0,0 +1,3 @@ + +onetoken + diff --git a/src/compiler/glsl/glcpp/tests/098-elif-undefined.c b/src/compiler/glsl/glcpp/tests/098-elif-undefined.c new file mode 100644 index 0000000..1f520d4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/098-elif-undefined.c @@ -0,0 +1,7 @@ +#if 0 +Not this +#elif UNDEFINED_MACRO +Nor this +#else +Yes, this. +#endif diff --git a/src/compiler/glsl/glcpp/tests/098-elif-undefined.c.expected b/src/compiler/glsl/glcpp/tests/098-elif-undefined.c.expected new file mode 100644 index 0000000..c6ef689 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/098-elif-undefined.c.expected @@ -0,0 +1,7 @@ + + + + + +Yes, this. + diff --git a/src/compiler/glsl/glcpp/tests/099-c99-example.c b/src/compiler/glsl/glcpp/tests/099-c99-example.c new file mode 100644 index 0000000..d1976b1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/099-c99-example.c @@ -0,0 +1,17 @@ +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +#define r(x,y) x ## y +f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); +g(x +(3,4)-w) | h 5) & m + (f)^m(m); +p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,)}; diff --git a/src/compiler/glsl/glcpp/tests/099-c99-example.c.expected b/src/compiler/glsl/glcpp/tests/099-c99-example.c.expected new file mode 100644 index 0000000..352bbff --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/099-c99-example.c.expected @@ -0,0 +1,16 @@ + + + + + + + + + + + + + +f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1); +f(2 * (2 +(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1); +int i[] = { 1, 23, 4, 5, }; diff --git a/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c b/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c new file mode 100644 index 0000000..31dbb9a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c @@ -0,0 +1,7 @@ +#define one 1 +#define two 2 + +switch (1) { + case one + two: + break; +} diff --git a/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c.expected b/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c.expected new file mode 100644 index 0000000..09f1f41 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c.expected @@ -0,0 +1,7 @@ + + + +switch (1) { + case 1 + 2: + break; +} diff --git a/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c b/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c new file mode 100644 index 0000000..e169380 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c @@ -0,0 +1,16 @@ +#define object 1 +#define function(x) 1 + +#if object +once +#endif +#if object +twice +#endif + +#if function(0) +once +#endif +#if function(0) +once again +#endif diff --git a/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c.expected b/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c.expected new file mode 100644 index 0000000..1e0b306 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c.expected @@ -0,0 +1,16 @@ + + + + +once + + +twice + + + +once + + +once again + diff --git a/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c b/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c new file mode 100644 index 0000000..301779e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c @@ -0,0 +1,2 @@ +#if 0 +#endif garbage diff --git a/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c.expected b/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c.expected new file mode 100644 index 0000000..d9f3bdc --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c.expected @@ -0,0 +1,2 @@ +0:2(8): preprocessor error: syntax error, unexpected IDENTIFIER, expecting NEWLINE + diff --git a/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c b/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c new file mode 100644 index 0000000..c460fea --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c @@ -0,0 +1,3 @@ +#if 0 +#else garbage +#endif diff --git a/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c.expected b/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c.expected new file mode 100644 index 0000000..b053b39 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c.expected @@ -0,0 +1,4 @@ +0:2(7): preprocessor error: syntax error, unexpected IDENTIFIER, expecting NEWLINE +0:1(6): preprocessor error: Unterminated #if + + diff --git a/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c b/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c new file mode 100644 index 0000000..3fbeec4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c @@ -0,0 +1,2 @@ +#line 2 +int foo(); diff --git a/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected b/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected new file mode 100644 index 0000000..3fbeec4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected @@ -0,0 +1,2 @@ +#line 2 +int foo(); diff --git a/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c b/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c new file mode 100644 index 0000000..da156c6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c @@ -0,0 +1,5 @@ +#define X(x) x +#line X( \ + 1 \ + ) +#line 2 diff --git a/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c.expected b/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c.expected new file mode 100644 index 0000000..814cef1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c.expected @@ -0,0 +1,5 @@ + +#line 1 + + +#line 2 diff --git a/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c b/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c new file mode 100644 index 0000000..929e93e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c @@ -0,0 +1,6 @@ +#define X(x) x +#if X( \ + 1 \ + ) +int foo(); +#endif diff --git a/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c.expected b/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c.expected new file mode 100644 index 0000000..1c0cbc9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c.expected @@ -0,0 +1,6 @@ + + + + +int foo(); + diff --git a/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c b/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c new file mode 100644 index 0000000..8c1c67a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c @@ -0,0 +1,7 @@ +#define X(x) x +#if 0 +#elif X( \ + 1 \ + ) +int foo(); +#endif diff --git a/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c.expected b/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c.expected new file mode 100644 index 0000000..b0601d7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c.expected @@ -0,0 +1,7 @@ + + + + + +int foo(); + diff --git a/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c b/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c new file mode 100644 index 0000000..0ce36f2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c @@ -0,0 +1 @@ +#version110 diff --git a/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected b/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected new file mode 100644 index 0000000..4f4243f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected @@ -0,0 +1 @@ +0:1(1): preprocessor error: Illegal non-directive after # diff --git a/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c b/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c new file mode 100644 index 0000000..f52966a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c @@ -0,0 +1 @@ +#line2 diff --git a/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected b/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected new file mode 100644 index 0000000..4f4243f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected @@ -0,0 +1 @@ +0:1(1): preprocessor error: Illegal non-directive after # diff --git a/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c b/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c new file mode 100644 index 0000000..6d7d0f3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c @@ -0,0 +1,3 @@ +#if 1 +#elif110 +#endif diff --git a/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected b/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected new file mode 100644 index 0000000..4d93de4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected @@ -0,0 +1,3 @@ +0:2(1): preprocessor error: Illegal non-directive after # + + diff --git a/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c b/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c new file mode 100644 index 0000000..b341337 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c @@ -0,0 +1,19 @@ +#if(1) +success +#endif + +#if+1 +success +#endif + +#if-1 +success +#endif + +#if!1 +success +#endif + +#if~1 +success +#endif diff --git a/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected b/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected new file mode 100644 index 0000000..5c005c3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected @@ -0,0 +1,19 @@ + +success + + + +success + + + +success + + + + + + + +success + diff --git a/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c b/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c new file mode 100644 index 0000000..e8221bc --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c @@ -0,0 +1,24 @@ +#if 0 +#elif(1) +success +#endif + +#if 0 +#elif+1 +success +#endif + +#if 0 +#elif-1 +success +#endif + +#if 0 +#elif!1 +success +#endif + +#if 0 +#elif~1 +success +#endif diff --git a/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected b/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected new file mode 100644 index 0000000..86b3703 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected @@ -0,0 +1,24 @@ + + +success + + + + +success + + + + +success + + + + + + + + + +success + diff --git a/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c b/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c new file mode 100644 index 0000000..369c487 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c @@ -0,0 +1,7 @@ +1. Number of dalmations: __LINE__ __FILE__ __LINE__ +2. Nominal visual acuity: __LINE__ __FILE__ / __LINE__ __FILE__ +3. Battle of Thermopylae, as film: __LINE__ __FILE__ __FILE__ +4. HTTP code for "Not Found": __LINE__ __FILE__ __LINE__ +5. Hexadecimal for 20560: __LINE__ __FILE__ __LINE__ __FILE__ +6: Zip code for Nortonville, KS: __LINE__ __LINE__ __FILE__ __LINE__ __FILE__ +7. James Bond, as a number: __FILE__ __FILE__ __LINE__ diff --git a/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c.expected b/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c.expected new file mode 100644 index 0000000..55bc788 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c.expected @@ -0,0 +1,7 @@ +1. Number of dalmations: 1 0 1 +2. Nominal visual acuity: 2 0 / 2 0 +3. Battle of Thermopylae, as film: 3 0 0 +4. HTTP code for "Not Found": 4 0 4 +5. Hexadecimal for 20560: 5 0 5 0 +6: Zip code for Nortonville, KS: 6 6 0 6 0 +7. James Bond, as a number: 0 0 7 diff --git a/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c b/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c new file mode 100644 index 0000000..d80d9c7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c @@ -0,0 +1,7 @@ +#define PASTE3(a,b,c) a ## b ## c +#define PASTE4(a,b,c,d) a ## b ## c ## d +#define PASTE5(a,b,c,d,e) a ## b ## c ## d ## e +4. HTTP code for "Not Found": PASTE3(__LINE__, __FILE__ , __LINE__) +5. Hexadecimal for 20560: PASTE4(__LINE__, __FILE__, __LINE__, __FILE__) +6: Zip code for Nortonville, KS: PASTE5(__LINE__, __LINE__, __FILE__, __LINE__, __FILE__) +7. James Bond, as a number: PASTE3(__FILE__, __FILE__, __LINE__) diff --git a/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c.expected b/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c.expected new file mode 100644 index 0000000..aa97110 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c.expected @@ -0,0 +1,7 @@ + + + +4. HTTP code for "Not Found": 404 +5. Hexadecimal for 20560: 5050 +6: Zip code for Nortonville, KS: 66060 +7. James Bond, as a number: 007 diff --git a/src/compiler/glsl/glcpp/tests/115-line-continuations.c b/src/compiler/glsl/glcpp/tests/115-line-continuations.c new file mode 100644 index 0000000..105590d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/115-line-continuations.c @@ -0,0 +1,9 @@ +// This comment continues to the next line, hiding the define \ +#define CONTINUATION_UNSUPPORTED + +#ifdef CONTINUATION_UNSUPPORTED +failure +#else +success +#endif + diff --git a/src/compiler/glsl/glcpp/tests/115-line-continuations.c.expected b/src/compiler/glsl/glcpp/tests/115-line-continuations.c.expected new file mode 100644 index 0000000..428b5e8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/115-line-continuations.c.expected @@ -0,0 +1,9 @@ + + + + + + +success + + diff --git a/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c b/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c new file mode 100644 index 0000000..83d5ddf --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c @@ -0,0 +1,13 @@ +// glcpp-args: --disable-line-continuations + +// This comments ends with a backslash \\ +#define NO_CONTINUATION + +#ifdef NO_CONTINUATION +success +#else +failure +#endif + + + diff --git a/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c.expected b/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c.expected new file mode 100644 index 0000000..5ca7892 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c.expected @@ -0,0 +1,13 @@ + + + + + + +success + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c b/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c new file mode 100644 index 0000000..6a6f282 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c @@ -0,0 +1,12 @@ +/* This test case is the minimal case to replicate the bug reported here: + * + * https://bugs.freedesktop.org/show_bug.cgi?id=65112 + * + * To trigger the bug, there must be a line-continuation sequence + * (backslash newline), then an additional newline character, and + * finally another backslash that is not part of a line-continuation + * sequence. + */ +\ + +/* \ */ diff --git a/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected b/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected new file mode 100644 index 0000000..8aaa04d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c b/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c new file mode 100644 index 0000000..53e8039 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c @@ -0,0 +1,4 @@ +#define FOO first/* +*/second + +FOO diff --git a/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c.expected b/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c.expected new file mode 100644 index 0000000..1fa8135 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c.expected @@ -0,0 +1,4 @@ + + + +first second diff --git a/src/compiler/glsl/glcpp/tests/119-elif-after-else.c b/src/compiler/glsl/glcpp/tests/119-elif-after-else.c new file mode 100644 index 0000000..9b9e923 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/119-elif-after-else.c @@ -0,0 +1,6 @@ +#if 0 +#else +int foo; +#elif 0 +int bar; +#endif diff --git a/src/compiler/glsl/glcpp/tests/119-elif-after-else.c.expected b/src/compiler/glsl/glcpp/tests/119-elif-after-else.c.expected new file mode 100644 index 0000000..6369567 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/119-elif-after-else.c.expected @@ -0,0 +1,7 @@ +0:4(1): preprocessor error: #elif after #else + + +int foo; + +int bar; + diff --git a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c new file mode 100644 index 0000000..49e7696 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c @@ -0,0 +1,3 @@ +#undef __LINE__ +#undef __FILE__ +#undef __VERSION__ diff --git a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected new file mode 100644 index 0000000..3b736df --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected @@ -0,0 +1,6 @@ +0:1(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. +0:2(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. +0:3(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. + + + diff --git a/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c b/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c new file mode 100644 index 0000000..67ebe73 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c @@ -0,0 +1,2 @@ +/* + */ // diff --git a/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c.expected b/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c.expected new file mode 100644 index 0000000..8cb7cb9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c.expected @@ -0,0 +1,2 @@ + + diff --git a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c new file mode 100644 index 0000000..ae7ea09 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c @@ -0,0 +1,16 @@ +/* Original definitions. */ +#define TWO ( 1+1 ) +#define FOUR (2 + 2) +#define SIX (3 + 3) + +/* Redefinitions with whitespace in same places, but different amounts, (so no + * error). */ +#define TWO ( 1+1 ) +#define FOUR (2 + 2) +#define SIX (3/*comment is whitespace*/+ /* collapsed */ /* to */ /* one */ /* space */ 3) + +/* Redefinitions with whitespace in different places. Each of these should + * trigger an error. */ +#define TWO (1 + 1) +#define FOUR ( 2+2 ) +#define SIX (/*not*/3 + 3/*expected*/) diff --git a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected new file mode 100644 index 0000000..602bdef --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected @@ -0,0 +1,22 @@ +0:14(9): preprocessor error: Redefinition of macro TWO + +0:15(9): preprocessor error: Redefinition of macro FOUR + +0:16(9): preprocessor error: Redefinition of macro SIX + + + + + + + + + + + + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c b/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c new file mode 100644 index 0000000..0b341a3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c @@ -0,0 +1,3 @@ +#if 1 +#else garbage +#endif diff --git a/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c.expected b/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c.expected new file mode 100644 index 0000000..b053b39 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c.expected @@ -0,0 +1,4 @@ +0:2(7): preprocessor error: syntax error, unexpected IDENTIFIER, expecting NEWLINE +0:1(6): preprocessor error: Unterminated #if + + diff --git a/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c b/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c new file mode 100644 index 0000000..947ba18 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c @@ -0,0 +1,37 @@ +#define e THIS_SHOULD_NOT_BE_EXPANDED +#define E NOR_THIS +#define p NOT_THIS_EITHER +#define P AND_SURELY_NOT_THIS +#define OK CRAZY_BUT_TRUE_THIS_NEITHER + +/* This one is actually meant to be expanded */ +#define MUST_EXPAND GO + +/* The following are "preprocessing numbers" and should not trigger macro + * expansion. */ +1e +1OK + +/* These are also "preprocessing numbers", so no expansion */ +123e+OK +.23E+OK +1.3e-OK +12.E-OK +123p+OK +.23P+OK +1.3p-OK +12.P-OK +123..OK +.23.OK.OK + +/* Importantly, just before the MUST_EXPAND in each of these, the preceding + * "preprocessing number" ends and we have an actual expression. So the + * MUST_EXPAND macro must be expanded (who would have though?) in each case. */ +123ef+MUST_EXPAND +.23E3-MUST_EXPAND +1.3e--MUST_EXPAND +12.E-&MUST_EXPAND +123p+OK+MUST_EXPAND +.23P+OK;MUST_EXPAND +1.3p-OK-MUST_EXPAND +12.P-OK&MUST_EXPAND diff --git a/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c.expected b/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c.expected new file mode 100644 index 0000000..6ec5888 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c.expected @@ -0,0 +1,37 @@ + + + + + + + + + + + +1e +1OK + + +123e+OK +.23E+OK +1.3e-OK +12.E-OK +123p+OK +.23P+OK +1.3p-OK +12.P-OK +123..OK +.23.OK.OK + + + + +123ef+GO +.23E3-GO +1.3e--GO +12.E-&GO +123p+OK+GO +.23P+OK;GO +1.3p-OK-GO +12.P-OK&GO diff --git a/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c b/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c new file mode 100644 index 0000000..4ee29f6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c @@ -0,0 +1,27 @@ +/* For GLSL in OpenGL ES, an undefined macro appearing in an #if or #elif + * expression, (other than as an argument to defined) is an error. + * + * Except in the case of a short-circuiting && or || operator, where the + * specification explicitly mandates that there be no error. + */ +#version 300 es + +/* These yield errors */ +#if NOT_DEFINED +#endif + +#if 0 +#elif ALSO_NOT_DEFINED +#endif + +/* But these yield no errors */ +#if 1 || STILL_NOT_DEFINED +Success +#endif + +#if 0 +#elif 0 && WILL_ANYONE_DEFINE_ANYTHING +#else +More success +#endif + diff --git a/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected b/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected new file mode 100644 index 0000000..616aa91 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected @@ -0,0 +1,29 @@ +0:10(16): preprocessor error: undefined macro NOT_DEFINED in expression (illegal in GLES) +0:14(23): preprocessor error: undefined macro ALSO_NOT_DEFINED in expression (illegal in GLES) + + + + + + +#version 300 es + + + + + + + + + + + +Success + + + + + +More success + + diff --git a/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c b/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c new file mode 100644 index 0000000..4c0d290 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c @@ -0,0 +1,5 @@ +#ifdef MACRO garbage +#endif + +#ifndef MORE garbage +#endif diff --git a/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c.expected b/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c.expected new file mode 100644 index 0000000..82a06f8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c.expected @@ -0,0 +1,7 @@ +0:1(14): preprocessor error: extra tokens at end of directive +0:4(14): preprocessor error: extra tokens at end of directive + + + + + diff --git a/src/compiler/glsl/glcpp/tests/127-pragma-empty.c b/src/compiler/glsl/glcpp/tests/127-pragma-empty.c new file mode 100644 index 0000000..0f9b0b3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/127-pragma-empty.c @@ -0,0 +1,3 @@ +/* It seems an odd (and particularly useless) thing to have an empty pragma, + * but we probably shouldn't trigger an error in this case. */ +#pragma diff --git a/src/compiler/glsl/glcpp/tests/127-pragma-empty.c.expected b/src/compiler/glsl/glcpp/tests/127-pragma-empty.c.expected new file mode 100644 index 0000000..92371a0 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/127-pragma-empty.c.expected @@ -0,0 +1,3 @@ + + + diff --git a/src/compiler/glsl/glcpp/tests/128-space-before-hash.c b/src/compiler/glsl/glcpp/tests/128-space-before-hash.c new file mode 100644 index 0000000..fba9596 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/128-space-before-hash.c @@ -0,0 +1,21 @@ + /* Any directive can be preceded by a space. */ + #version 300 + #pragma Testing spaces before hash + # + #line 3 + #define FOO + #ifdef FOO + yes + #endif + #if 0 + #elif defined FOO + yes again + #endif + #if 0 + #else + for the third time, yes! + #endif + #undef FOO + #ifndef FOO + yes, of course + #endif diff --git a/src/compiler/glsl/glcpp/tests/128-space-before-hash.c.expected b/src/compiler/glsl/glcpp/tests/128-space-before-hash.c.expected new file mode 100644 index 0000000..9babb6f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/128-space-before-hash.c.expected @@ -0,0 +1,21 @@ + +#version 300 +#pragma Testing spaces before hash + +#line 3 + + + yes + + + + yes again + + + + for the third time, yes! + + + + yes, of course + diff --git a/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c b/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c new file mode 100644 index 0000000..a229179 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c @@ -0,0 +1 @@ +#define 123 456 diff --git a/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c.expected b/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c.expected new file mode 100644 index 0000000..fd0b413 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c.expected @@ -0,0 +1,2 @@ +0:1(9): preprocessor error: #define followed by a non-identifier: 123 +0:1(9): preprocessor error: syntax error, unexpected INTEGER_STRING, expecting FUNC_IDENTIFIER or OBJ_IDENTIFIER diff --git a/src/compiler/glsl/glcpp/tests/130-define-comment.c b/src/compiler/glsl/glcpp/tests/130-define-comment.c new file mode 100644 index 0000000..3331236 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/130-define-comment.c @@ -0,0 +1,2 @@ +#define /*...*/ FUNC( /*...*/ x /*...*/ ) /*...*/ FOO( /*...*/ x /*...*/ ) +FUNC(bar) diff --git a/src/compiler/glsl/glcpp/tests/130-define-comment.c.expected b/src/compiler/glsl/glcpp/tests/130-define-comment.c.expected new file mode 100644 index 0000000..d789e29 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/130-define-comment.c.expected @@ -0,0 +1,2 @@ + +FOO( bar ) diff --git a/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c b/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c new file mode 100644 index 0000000..240292d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c @@ -0,0 +1 @@ +this file ends with no newline
\ No newline at end of file diff --git a/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c.expected b/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c.expected new file mode 100644 index 0000000..5780030 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c.expected @@ -0,0 +1 @@ +this file ends with no newline diff --git a/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c b/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c new file mode 100644 index 0000000..6795e35 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c @@ -0,0 +1 @@ +#define
\ No newline at end of file diff --git a/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c.expected b/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c.expected new file mode 100644 index 0000000..341e5e2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c.expected @@ -0,0 +1 @@ +0:1(1): preprocessor error: #define without macro name diff --git a/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c b/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c new file mode 100644 index 0000000..56ec5f7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c @@ -0,0 +1 @@ +This file ends with no newline within a comment /*
\ No newline at end of file diff --git a/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected b/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected new file mode 100644 index 0000000..d186f48 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected @@ -0,0 +1,2 @@ +0:1(51): preprocessor error: Unterminated comment +This file ends with no newline within a comment diff --git a/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c b/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c new file mode 100644 index 0000000..3015f0e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c @@ -0,0 +1,22 @@ +/*...*/ # /*...*/ version 300 + /*...*/#/*...*/ extension whatever + /*..*/ # /*..*/ pragma ignored +/**/ # /**/ line 4 + /*...*/# /*...*/ ifdef NOT_DEFINED + /*...*/# /*...*/ else + /*..*/ #/*..*/ endif + /*...*/# /*...*/ ifndef ALSO_NOT_DEFINED + /*...*/# /*...*/ else + /*..*/ #/*..*/ endif +/*...*/ # /*...*/ if 0 + /*...*/#/*...*/ elif 1 + /*..*/ # /*..*/ else + /**/ # /**/ endif + /*...*/# /*...*/ define FOO bar + /*..*/ #/*..*/ define FUNC() baz + /*..*/ # /*..*/ define FUNC2(a,b) b a +FOO +FUNC() +FUNC2(x,y) + + diff --git a/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c.expected b/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c.expected new file mode 100644 index 0000000..760c960 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c.expected @@ -0,0 +1,22 @@ +#version 300 +#extension whatever +#pragma ignored +#line 4 + + + + + + + + + + + + + +bar +baz +y x + + diff --git a/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c b/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c new file mode 100644 index 0000000..fd96bd6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c @@ -0,0 +1,2 @@ +#define FOO(a,a) which a? +#define BAR(x,y,z,x) so very x diff --git a/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c.expected b/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c.expected new file mode 100644 index 0000000..bc1a334 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c.expected @@ -0,0 +1,4 @@ +0:1(9): preprocessor error: Duplicate macro parameter "a" +0:2(9): preprocessor error: Duplicate macro parameter "x" + + diff --git a/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c b/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c new file mode 100644 index 0000000..167d3c8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c @@ -0,0 +1,8 @@ +/* The body can include C expressions with ++ and -- */ +a = x++; +b = ++x; +c = x--; +d = --x; +/* But these are not legal in preprocessor expressions. */ +#if x++ > 4 +#endif diff --git a/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected b/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected new file mode 100644 index 0000000..137921b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected @@ -0,0 +1,8 @@ +0:7(12): preprocessor error: syntax error, unexpected PLUS_PLUS + +a = x++; +b = ++x; +c = x--; +d = --x; + + diff --git a/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c b/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c new file mode 100644 index 0000000..c8cd47f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c @@ -0,0 +1,4 @@ +#define FIELD(x) foo.x +#define FIELD_OF(s, x) s.x +FIELD(bar) +FIELD_OF(foo, bar) diff --git a/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c.expected b/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c.expected new file mode 100644 index 0000000..f9f5be1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c.expected @@ -0,0 +1,4 @@ + + +foo.bar +foo.bar diff --git a/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c b/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c new file mode 100644 index 0000000..38967dc --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c @@ -0,0 +1,7 @@ +#if 0 +/* + * This multi-line comment needs to be 3 lines to test what's intended. + */ +#else +SUCCESS +#endif diff --git a/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected b/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected new file mode 100644 index 0000000..0d6ef4d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected @@ -0,0 +1,7 @@ + + + + + +SUCCESS + diff --git a/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c b/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c new file mode 100644 index 0000000..30e128d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c @@ -0,0 +1,5 @@ +#define +#define +#define /*...*/ +#define //... +Errors expected because no macro name is ever given! diff --git a/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c.expected b/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c.expected new file mode 100644 index 0000000..42b02d1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c.expected @@ -0,0 +1,5 @@ +0:1(1): preprocessor error: #define without macro name +0:2(1): preprocessor error: #define without macro name +0:3(1): preprocessor error: #define without macro name +0:4(1): preprocessor error: #define without macro name +Errors expected because no macro name is ever given! diff --git a/src/compiler/glsl/glcpp/tests/140-null-directive.c b/src/compiler/glsl/glcpp/tests/140-null-directive.c new file mode 100644 index 0000000..1dcb26e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/140-null-directive.c @@ -0,0 +1,9 @@ +/* GLSL accepts a null directive. Let's test that in several variations: */ +# + # +/*....*/#/*....*/ + /*..*/ # /*..*/ +#//... + # //... +/*....*/#/**///.. + /*..*/ # /**/ // diff --git a/src/compiler/glsl/glcpp/tests/140-null-directive.c.expected b/src/compiler/glsl/glcpp/tests/140-null-directive.c.expected new file mode 100644 index 0000000..fa103f6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/140-null-directive.c.expected @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c b/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c new file mode 100644 index 0000000..a93f3ce --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c @@ -0,0 +1,6 @@ +Line 1 /* Test for a bug where #pragma was throwing off the __LINE__ count. */ +Line __LINE__ /* Line 2 */ +#pragma Line 3 +Line __LINE__ /* Line 4 */ +#pragma Line 5 +Line __LINE__ /* Line 6 */ diff --git a/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected b/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected new file mode 100644 index 0000000..330731d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected @@ -0,0 +1,6 @@ +Line 1 +Line 2 +#pragma Line 3 +Line 4 +#pragma Line 5 +Line 6 diff --git a/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c b/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c new file mode 100644 index 0000000..b60c042 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c @@ -0,0 +1,94 @@ +/* Macro using defined with a hard-coded identifier (no parentheses) */ +#define is_foo_defined defined /*...*/ foo +#undef foo +#if is_foo_defined +failure +#else +success +#endif +#define foo +#if is_foo_defined +success +#else +failure +#endif + +/* Macro using defined with a hard-coded identifier within parentheses */ +#define is_foo_defined_parens defined /*...*/ ( /*...*/ foo /*...*/ ) // +#define foo +#if is_foo_defined_parens +success +#else +failure +#endif +#undef foo +#if is_foo_defined_parens +failure +#else +success +#endif + +/* Macro using defined with an argument identifier (no parentheses) */ +#define is_defined(arg) defined /*...*/ arg +#define foo bar +#undef bar +#if is_defined(foo) +failure +#else +success +#endif +#define bar bar +#if is_defined(foo) +success +#else +failure +#endif + +/* Macro using defined with an argument identifier within parentheses */ +#define is_defined_parens(arg) defined /*...*/ ( /*...*/ arg /*...*/ ) // +#define foo bar +#define bar bar +#if is_defined_parens(foo) +success +#else +failure +#endif +#undef bar +#if is_defined_parens(foo) +failure +#else +success +#endif + +/* Multiple levels of macro resulting in defined */ +#define X defined A && Y +#define Y defined B && Z +#define Z defined C +#define A +#define B +#define C +#if X +success +#else +failure +#endif +#undef A +#if X +failure +#else +success +#endif +#define A +#undef B +#if X +failure +#else +success +#endif +#define B +#undef C +#if X +failure +#else +success +#endif diff --git a/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c.expected b/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c.expected new file mode 100644 index 0000000..4eca90b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c.expected @@ -0,0 +1,94 @@ + + + + + + +success + + + +success + + + + + + + + +success + + + + + + + +success + + + + + + + + + +success + + + +success + + + + + + + + + +success + + + + + + + +success + + + + + + + + + + +success + + + + + + + +success + + + + + + +success + + + + + + +success + diff --git a/src/compiler/glsl/glcpp/tests/143-multiple-else.c b/src/compiler/glsl/glcpp/tests/143-multiple-else.c new file mode 100644 index 0000000..62ad49c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/143-multiple-else.c @@ -0,0 +1,6 @@ +#if 0 +#else +int foo; +#else +int bar; +#endif diff --git a/src/compiler/glsl/glcpp/tests/143-multiple-else.c.expected b/src/compiler/glsl/glcpp/tests/143-multiple-else.c.expected new file mode 100644 index 0000000..00b3328 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/143-multiple-else.c.expected @@ -0,0 +1,7 @@ +0:4(1): preprocessor error: multiple #else + + +int foo; + +int bar; + diff --git a/src/compiler/glsl/glcpp/tests/glcpp-test b/src/compiler/glsl/glcpp/tests/glcpp-test new file mode 100755 index 0000000..3945ee4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/glcpp-test @@ -0,0 +1,110 @@ +#!/bin/sh + +if [ ! -z "$srcdir" ]; then + testdir=$srcdir/glsl/glcpp/tests + outdir=`pwd`/glsl/glcpp/tests + glcpp=`pwd`/glsl/glcpp/glcpp +else + testdir=. + outdir=. + glcpp=../glcpp +fi + +trap 'rm $test.valgrind-errors; exit 1' INT QUIT + +usage () +{ + cat <<EOF +Usage: glcpp [options...] + +Run the test suite for mesa's GLSL pre-processor. + +Valid options include: + + --testdir=<DIR> Use tests in the given <DIR> (default is ".") + --valgrind Run the test suite a second time under valgrind +EOF +} + +test_specific_args () +{ + test="$1" + + tr "\r" "\n" < "$test" | grep 'glcpp-args:' | sed -e 's,^.*glcpp-args: *,,' +} + +# Parse command-line options +for option; do + case "${option}" in + "--help") + usage + exit 0 + ;; + "--valgrind") + do_valgrind=yes + ;; + "--testdir="*) + testdir="${option#--testdir=}" + outdir="${outdir}/${option#--testdir=}" + ;; + *) + echo "Unrecognized option: $option" >&2 + echo >&2 + usage + exit 1 + ;; + esac +done + +total=0 +pass=0 +clean=0 + +mkdir -p $outdir + +echo "====== Testing for correctness ======" +for test in $testdir/*.c; do + out=$outdir/${test##*/}.out + + printf "Testing $test... > $out ($test.expected) " + $glcpp $(test_specific_args $test) < $test > $out 2>&1 + total=$((total+1)) + if cmp $test.expected $out >/dev/null 2>&1; then + echo "PASS" + pass=$((pass+1)) + else + echo "FAIL" + diff -u $test.expected $out + fi +done + +echo "" +echo "$pass/$total tests returned correct results" +echo "" + +if [ "$do_valgrind" = "yes" ]; then + echo "====== Testing for valgrind cleanliness ======" + for test in $testdir/*.c; do + printf "Testing $test with valgrind..." + valgrind --error-exitcode=31 --log-file=$test.valgrind-errors $glcpp $(test_specific_args $test) < $test >/dev/null 2>&1 + if [ "$?" = "31" ]; then + echo "ERRORS" + cat $test.valgrind-errors + else + echo "CLEAN" + clean=$((clean+1)) + rm $test.valgrind-errors + fi + done + + echo "" + echo "$pass/$total tests returned correct results" + echo "$clean/$total tests are valgrind-clean" +fi + +if [ "$pass" = "$total" ] && [ "$do_valgrind" != "yes" ] || [ "$pass" = "$total" ]; then + exit 0 +else + exit 1 +fi + diff --git a/src/compiler/glsl/glcpp/tests/glcpp-test-cr-lf b/src/compiler/glsl/glcpp/tests/glcpp-test-cr-lf new file mode 100755 index 0000000..c75370f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/glcpp-test-cr-lf @@ -0,0 +1,141 @@ +#!/bin/sh + +# The build system runs this test from a different working directory, and may +# be in a build directory entirely separate from the source. So if the +# "srcdir" variable is set, we must use it to locate the test files and the +# glcpp-test script. + +if [ ! -z "$srcdir" ]; then + testdir="$srcdir/glsl/glcpp/tests" + glcpp_test="$srcdir/glsl/glcpp/tests/glcpp-test" +else + testdir=. + glcpp_test=./glcpp-test +fi + +total=0 +pass=0 + +# This supports a pipe that doesn't destroy the exit status of first command +# +# http://unix.stackexchange.com/questions/14270/get-exit-status-of-process-thats-piped-to-another +stdintoexitstatus() { + read exitstatus + return $exitstatus +} + +run_test () +{ + cmd="$1" + + total=$((total+1)) + + if [ "$VERBOSE" = "yes" ]; then + if $cmd; then + echo "PASS" + pass=$((pass+1)) + else + echo "FAIL" + fi + else + # This is "$cmd | tail -2" but with the exit status of "$cmd" not "tail -2" + if (((($cmd; echo $? >&3) | tail -2 | head -1 >&4) 3>&1) | stdintoexitstatus) 4>&1; then + echo "PASS" + pass=$((pass+1)) + else + echo "FAIL" + fi + fi +} + +usage () +{ + cat <<EOF +Usage: glcpp-cr-lf [options...] + +Run the entire glcpp-test suite several times, each time with each source +file transformed to use a non-standard line-termination character. Each +entire run with a different line-termination character is considered a +single test. + +Valid options include: + + -v|--verbose Print all output from the various sub-tests +EOF +} + +# Parse command-line options +for option; do + case "${option}" in + -v|--verbose) + VERBOSE=yes; + ;; + *) + echo "Unrecognized option: $option" >&2 + echo >&2 + usage + exit 1 + ;; + esac +done + +# All tests depend on the .out files being present. So first do a +# normal run of the test suite, (silently) just to create the .out +# files as a side effect. +rm -rf ./subtest-lf +mkdir subtest-lf +for file in "$testdir"/*.c; do + base=$(basename "$file") + cp "$file" subtest-lf +done + +${glcpp_test} --testdir=subtest-lf >/dev/null 2>&1 + +echo "===== Testing with \\\\r line terminators (old Mac format) =====" + +# Prepare test files with '\r' instead of '\n' +rm -rf ./subtest-cr +mkdir subtest-cr +for file in "$testdir"/*.c; do + base=$(basename "$file") + tr "\n" "\r" < "$file" > subtest-cr/"$base" + cp `pwd`/glsl/glcpp/tests/subtest-lf/"$base".out subtest-cr/"$base".expected +done + +run_test "${glcpp_test} --testdir=subtest-cr" + +echo "===== Testing with \\\\r\\\\n line terminators (DOS format) =====" + +# Prepare test files with '\r\n' instead of '\n' +rm -rf ./subtest-cr-lf +mkdir subtest-cr-lf +for file in "$testdir"/*.c; do + base=$(basename "$file") + sed -e 's/$/
/' < "$file" > subtest-cr-lf/"$base" + cp `pwd`/glsl/glcpp/tests/subtest-lf/"$base".out subtest-cr-lf/"$base".expected +done + +run_test "${glcpp_test} --testdir=subtest-cr-lf" + +echo "===== Testing with \\\\n\\\\r (bizarre, but allowed by GLSL spec.) =====" + +# Prepare test files with '\n\r' instead of '\n' +rm -rf ./subtest-lf-cr +mkdir subtest-lf-cr +for file in "$testdir"/*.c; do + base=$(basename "$file") + sed -e 's/$/
/' < "$file" | tr "\n\r" "\r\n" > subtest-lf-cr/"$base" + cp `pwd`/glsl/glcpp/tests/subtest-lf/"$base".out subtest-lf-cr/"$base".expected +done + +run_test "${glcpp_test} --testdir=subtest-lf-cr" + +echo "" +echo "$pass/$total tests returned correct results" +echo "" + +if [ "$pass" = "$total" ]; then + exit 0 +else + exit 1 +fi diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll new file mode 100644 index 0000000..e59f93e --- /dev/null +++ b/src/compiler/glsl/glsl_lexer.ll @@ -0,0 +1,635 @@ +%{ +/* + * Copyright © 2008, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <ctype.h> +#include <limits.h> +#include "util/strtod.h" +#include "ast.h" +#include "glsl_parser_extras.h" +#include "glsl_parser.h" + +static int classify_identifier(struct _mesa_glsl_parse_state *, const char *); + +#ifdef _MSC_VER +#define YY_NO_UNISTD_H +#endif + +#define YY_USER_ACTION \ + do { \ + yylloc->first_column = yycolumn + 1; \ + yylloc->first_line = yylloc->last_line = yylineno + 1; \ + yycolumn += yyleng; \ + yylloc->last_column = yycolumn + 1; \ + } while(0); + +#define YY_USER_INIT yylineno = 0; yycolumn = 0; yylloc->source = 0; + +/* A macro for handling reserved words and keywords across language versions. + * + * Certain words start out as identifiers, become reserved words in + * later language revisions, and finally become language keywords. + * This may happen at different times in desktop GLSL and GLSL ES. + * + * For example, consider the following lexer rule: + * samplerBuffer KEYWORD(130, 0, 140, 0, SAMPLERBUFFER) + * + * This means that "samplerBuffer" will be treated as: + * - a keyword (SAMPLERBUFFER token) ...in GLSL >= 1.40 + * - a reserved word - error ...in GLSL >= 1.30 + * - an identifier ...in GLSL < 1.30 or GLSL ES + */ +#define KEYWORD(reserved_glsl, reserved_glsl_es, \ + allowed_glsl, allowed_glsl_es, token) \ + KEYWORD_WITH_ALT(reserved_glsl, reserved_glsl_es, \ + allowed_glsl, allowed_glsl_es, false, token) + +/** + * Like the KEYWORD macro, but the word is also treated as a keyword + * if the given boolean expression is true. + */ +#define KEYWORD_WITH_ALT(reserved_glsl, reserved_glsl_es, \ + allowed_glsl, allowed_glsl_es, \ + alt_expr, token) \ + do { \ + if (yyextra->is_version(allowed_glsl, allowed_glsl_es) \ + || (alt_expr)) { \ + return token; \ + } else if (yyextra->is_version(reserved_glsl, \ + reserved_glsl_es)) { \ + _mesa_glsl_error(yylloc, yyextra, \ + "illegal use of reserved word `%s'", yytext); \ + return ERROR_TOK; \ + } else { \ + void *mem_ctx = yyextra; \ + yylval->identifier = ralloc_strdup(mem_ctx, yytext); \ + return classify_identifier(yyextra, yytext); \ + } \ + } while (0) + +/** + * A macro for handling keywords that have been present in GLSL since + * its origin, but were changed into reserved words in GLSL 3.00 ES. + */ +#define DEPRECATED_ES_KEYWORD(token) \ + do { \ + if (yyextra->is_version(0, 300)) { \ + _mesa_glsl_error(yylloc, yyextra, \ + "illegal use of reserved word `%s'", yytext); \ + return ERROR_TOK; \ + } else { \ + return token; \ + } \ + } while (0) + +static int +literal_integer(char *text, int len, struct _mesa_glsl_parse_state *state, + YYSTYPE *lval, YYLTYPE *lloc, int base) +{ + bool is_uint = (text[len - 1] == 'u' || + text[len - 1] == 'U'); + const char *digits = text; + + /* Skip "0x" */ + if (base == 16) + digits += 2; + +#ifdef _MSC_VER + unsigned __int64 value = _strtoui64(digits, NULL, base); +#else + unsigned long long value = strtoull(digits, NULL, base); +#endif + + lval->n = (int)value; + + if (value > UINT_MAX) { + /* Note that signed 0xffffffff is valid, not out of range! */ + if (state->is_version(130, 300)) { + _mesa_glsl_error(lloc, state, + "literal value `%s' out of range", text); + } else { + _mesa_glsl_warning(lloc, state, + "literal value `%s' out of range", text); + } + } else if (base == 10 && !is_uint && (unsigned)value > (unsigned)INT_MAX + 1) { + /* Tries to catch unintentionally providing a negative value. + * Note that -2147483648 is parsed as -(2147483648), so we don't + * want to warn for INT_MAX. + */ + _mesa_glsl_warning(lloc, state, + "signed literal value `%s' is interpreted as %d", + text, lval->n); + } + return is_uint ? UINTCONSTANT : INTCONSTANT; +} + +#define LITERAL_INTEGER(base) \ + literal_integer(yytext, yyleng, yyextra, yylval, yylloc, base) + +%} + +%option bison-bridge bison-locations reentrant noyywrap +%option nounput noyy_top_state +%option never-interactive +%option prefix="_mesa_glsl_lexer_" +%option extra-type="struct _mesa_glsl_parse_state *" +%option warn nodefault + + /* Note: When adding any start conditions to this list, you must also + * update the "Internal compiler error" catch-all rule near the end of + * this file. */ +%x PP PRAGMA + +DEC_INT [1-9][0-9]* +HEX_INT 0[xX][0-9a-fA-F]+ +OCT_INT 0[0-7]* +INT ({DEC_INT}|{HEX_INT}|{OCT_INT}) +SPC [ \t]* +SPCP [ \t]+ +HASH ^{SPC}#{SPC} +%% + +[ \r\t]+ ; + + /* Preprocessor tokens. */ +^[ \t]*#[ \t]*$ ; +^[ \t]*#[ \t]*version { BEGIN PP; return VERSION_TOK; } +^[ \t]*#[ \t]*extension { BEGIN PP; return EXTENSION; } +{HASH}line{SPCP}{INT}{SPCP}{INT}{SPC}$ { + /* Eat characters until the first digit is + * encountered + */ + char *ptr = yytext; + while (!isdigit(*ptr)) + ptr++; + + /* Subtract one from the line number because + * yylineno is zero-based instead of + * one-based. + */ + yylineno = strtol(ptr, &ptr, 0) - 1; + + /* From GLSL 3.30 and GLSL ES on, after processing the + * line directive (including its new-line), the implementation + * will behave as if it is compiling at the line number passed + * as argument. It was line number + 1 in older specifications. + */ + if (yyextra->is_version(330, 100)) + yylineno--; + + yylloc->source = strtol(ptr, NULL, 0); + } +{HASH}line{SPCP}{INT}{SPC}$ { + /* Eat characters until the first digit is + * encountered + */ + char *ptr = yytext; + while (!isdigit(*ptr)) + ptr++; + + /* Subtract one from the line number because + * yylineno is zero-based instead of + * one-based. + */ + yylineno = strtol(ptr, &ptr, 0) - 1; + + /* From GLSL 3.30 and GLSL ES on, after processing the + * line directive (including its new-line), the implementation + * will behave as if it is compiling at the line number passed + * as argument. It was line number + 1 in older specifications. + */ + if (yyextra->is_version(330, 100)) + yylineno--; + } +^{SPC}#{SPC}pragma{SPCP}debug{SPC}\({SPC}on{SPC}\) { + BEGIN PP; + return PRAGMA_DEBUG_ON; + } +^{SPC}#{SPC}pragma{SPCP}debug{SPC}\({SPC}off{SPC}\) { + BEGIN PP; + return PRAGMA_DEBUG_OFF; + } +^{SPC}#{SPC}pragma{SPCP}optimize{SPC}\({SPC}on{SPC}\) { + BEGIN PP; + return PRAGMA_OPTIMIZE_ON; + } +^{SPC}#{SPC}pragma{SPCP}optimize{SPC}\({SPC}off{SPC}\) { + BEGIN PP; + return PRAGMA_OPTIMIZE_OFF; + } +^{SPC}#{SPC}pragma{SPCP}STDGL{SPCP}invariant{SPC}\({SPC}all{SPC}\) { + BEGIN PP; + return PRAGMA_INVARIANT_ALL; + } +^{SPC}#{SPC}pragma{SPCP} { BEGIN PRAGMA; } + +<PRAGMA>\n { BEGIN 0; yylineno++; yycolumn = 0; } +<PRAGMA>. { } + +<PP>\/\/[^\n]* { } +<PP>[ \t\r]* { } +<PP>: return COLON; +<PP>[_a-zA-Z][_a-zA-Z0-9]* { + void *mem_ctx = yyextra; + yylval->identifier = ralloc_strdup(mem_ctx, yytext); + return IDENTIFIER; + } +<PP>[1-9][0-9]* { + yylval->n = strtol(yytext, NULL, 10); + return INTCONSTANT; + } +<PP>\n { BEGIN 0; yylineno++; yycolumn = 0; return EOL; } +<PP>. { return yytext[0]; } + +\n { yylineno++; yycolumn = 0; } + +attribute DEPRECATED_ES_KEYWORD(ATTRIBUTE); +const return CONST_TOK; +bool return BOOL_TOK; +float return FLOAT_TOK; +int return INT_TOK; +uint KEYWORD(130, 300, 130, 300, UINT_TOK); + +break return BREAK; +continue return CONTINUE; +do return DO; +while return WHILE; +else return ELSE; +for return FOR; +if return IF; +discard return DISCARD; +return return RETURN; + +bvec2 return BVEC2; +bvec3 return BVEC3; +bvec4 return BVEC4; +ivec2 return IVEC2; +ivec3 return IVEC3; +ivec4 return IVEC4; +uvec2 KEYWORD(130, 300, 130, 300, UVEC2); +uvec3 KEYWORD(130, 300, 130, 300, UVEC3); +uvec4 KEYWORD(130, 300, 130, 300, UVEC4); +vec2 return VEC2; +vec3 return VEC3; +vec4 return VEC4; +mat2 return MAT2X2; +mat3 return MAT3X3; +mat4 return MAT4X4; +mat2x2 KEYWORD(120, 300, 120, 300, MAT2X2); +mat2x3 KEYWORD(120, 300, 120, 300, MAT2X3); +mat2x4 KEYWORD(120, 300, 120, 300, MAT2X4); +mat3x2 KEYWORD(120, 300, 120, 300, MAT3X2); +mat3x3 KEYWORD(120, 300, 120, 300, MAT3X3); +mat3x4 KEYWORD(120, 300, 120, 300, MAT3X4); +mat4x2 KEYWORD(120, 300, 120, 300, MAT4X2); +mat4x3 KEYWORD(120, 300, 120, 300, MAT4X3); +mat4x4 KEYWORD(120, 300, 120, 300, MAT4X4); + +in return IN_TOK; +out return OUT_TOK; +inout return INOUT_TOK; +uniform return UNIFORM; +buffer return BUFFER; +varying DEPRECATED_ES_KEYWORD(VARYING); +centroid KEYWORD(120, 300, 120, 300, CENTROID); +invariant KEYWORD(120, 100, 120, 100, INVARIANT); +flat KEYWORD(130, 100, 130, 300, FLAT); +smooth KEYWORD(130, 300, 130, 300, SMOOTH); +noperspective KEYWORD(130, 300, 130, 0, NOPERSPECTIVE); +patch KEYWORD_WITH_ALT(0, 300, 400, 0, yyextra->ARB_tessellation_shader_enable, PATCH); + +sampler1D DEPRECATED_ES_KEYWORD(SAMPLER1D); +sampler2D return SAMPLER2D; +sampler3D return SAMPLER3D; +samplerCube return SAMPLERCUBE; +sampler1DArray KEYWORD(130, 300, 130, 0, SAMPLER1DARRAY); +sampler2DArray KEYWORD(130, 300, 130, 300, SAMPLER2DARRAY); +sampler1DShadow DEPRECATED_ES_KEYWORD(SAMPLER1DSHADOW); +sampler2DShadow return SAMPLER2DSHADOW; +samplerCubeShadow KEYWORD(130, 300, 130, 300, SAMPLERCUBESHADOW); +sampler1DArrayShadow KEYWORD(130, 300, 130, 0, SAMPLER1DARRAYSHADOW); +sampler2DArrayShadow KEYWORD(130, 300, 130, 300, SAMPLER2DARRAYSHADOW); +isampler1D KEYWORD(130, 300, 130, 0, ISAMPLER1D); +isampler2D KEYWORD(130, 300, 130, 300, ISAMPLER2D); +isampler3D KEYWORD(130, 300, 130, 300, ISAMPLER3D); +isamplerCube KEYWORD(130, 300, 130, 300, ISAMPLERCUBE); +isampler1DArray KEYWORD(130, 300, 130, 0, ISAMPLER1DARRAY); +isampler2DArray KEYWORD(130, 300, 130, 300, ISAMPLER2DARRAY); +usampler1D KEYWORD(130, 300, 130, 0, USAMPLER1D); +usampler2D KEYWORD(130, 300, 130, 300, USAMPLER2D); +usampler3D KEYWORD(130, 300, 130, 300, USAMPLER3D); +usamplerCube KEYWORD(130, 300, 130, 300, USAMPLERCUBE); +usampler1DArray KEYWORD(130, 300, 130, 0, USAMPLER1DARRAY); +usampler2DArray KEYWORD(130, 300, 130, 300, USAMPLER2DARRAY); + + /* additional keywords in ARB_texture_multisample, included in GLSL 1.50 */ + /* these are reserved but not defined in GLSL 3.00 */ + /* [iu]sampler2DMS are defined in GLSL ES 3.10 */ +sampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS); +isampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS); +usampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS); +sampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, SAMPLER2DMSARRAY); +isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, ISAMPLER2DMSARRAY); +usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY); + + /* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */ +samplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY); +isamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY); +usamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY); +samplerCubeArrayShadow KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW); + +samplerExternalOES { + if (yyextra->OES_EGL_image_external_enable) + return SAMPLEREXTERNALOES; + else + return IDENTIFIER; + } + + /* keywords available with ARB_gpu_shader5 */ +precise KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_gpu_shader5_enable, PRECISE); + + /* keywords available with ARB_shader_image_load_store */ +image1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1D); +image2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2D); +image3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE3D); +image2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DRECT); +imageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGECUBE); +imageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGEBUFFER); +image1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1DARRAY); +image2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2DARRAY); +imageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGECUBEARRAY); +image2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMS); +image2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMSARRAY); +iimage1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1D); +iimage2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2D); +iimage3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE3D); +iimage2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DRECT); +iimageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBE); +iimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGEBUFFER); +iimage1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1DARRAY); +iimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DARRAY); +iimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBEARRAY); +iimage2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMS); +iimage2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMSARRAY); +uimage1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1D); +uimage2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2D); +uimage3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE3D); +uimage2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DRECT); +uimageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBE); +uimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGEBUFFER); +uimage1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1DARRAY); +uimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DARRAY); +uimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBEARRAY); +uimage2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMS); +uimage2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMSARRAY); +image1DShadow KEYWORD(130, 300, 0, 0, IMAGE1DSHADOW); +image2DShadow KEYWORD(130, 300, 0, 0, IMAGE2DSHADOW); +image1DArrayShadow KEYWORD(130, 300, 0, 0, IMAGE1DARRAYSHADOW); +image2DArrayShadow KEYWORD(130, 300, 0, 0, IMAGE2DARRAYSHADOW); + +coherent KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, COHERENT); +volatile KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, VOLATILE); +restrict KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, RESTRICT); +readonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, READONLY); +writeonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, WRITEONLY); + +atomic_uint KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT); + +shared KEYWORD_WITH_ALT(430, 310, 430, 310, yyextra->ARB_compute_shader_enable, SHARED); + +struct return STRUCT; +void return VOID_TOK; + +layout { + if ((yyextra->is_version(140, 300)) + || yyextra->AMD_conservative_depth_enable + || yyextra->ARB_conservative_depth_enable + || yyextra->ARB_explicit_attrib_location_enable + || yyextra->ARB_explicit_uniform_location_enable + || yyextra->has_separate_shader_objects() + || yyextra->ARB_uniform_buffer_object_enable + || yyextra->ARB_fragment_coord_conventions_enable + || yyextra->ARB_shading_language_420pack_enable + || yyextra->ARB_compute_shader_enable + || yyextra->ARB_tessellation_shader_enable) { + return LAYOUT_TOK; + } else { + void *mem_ctx = yyextra; + yylval->identifier = ralloc_strdup(mem_ctx, yytext); + return classify_identifier(yyextra, yytext); + } + } + +\+\+ return INC_OP; +-- return DEC_OP; +\<= return LE_OP; +>= return GE_OP; +== return EQ_OP; +!= return NE_OP; +&& return AND_OP; +\|\| return OR_OP; +"^^" return XOR_OP; +"<<" return LEFT_OP; +">>" return RIGHT_OP; + +\*= return MUL_ASSIGN; +\/= return DIV_ASSIGN; +\+= return ADD_ASSIGN; +\%= return MOD_ASSIGN; +\<\<= return LEFT_ASSIGN; +>>= return RIGHT_ASSIGN; +&= return AND_ASSIGN; +"^=" return XOR_ASSIGN; +\|= return OR_ASSIGN; +-= return SUB_ASSIGN; + +[1-9][0-9]*[uU]? { + return LITERAL_INTEGER(10); + } +0[xX][0-9a-fA-F]+[uU]? { + return LITERAL_INTEGER(16); + } +0[0-7]*[uU]? { + return LITERAL_INTEGER(8); + } + +[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?[fF]? | +\.[0-9]+([eE][+-]?[0-9]+)?[fF]? | +[0-9]+\.([eE][+-]?[0-9]+)?[fF]? | +[0-9]+[eE][+-]?[0-9]+[fF]? { + yylval->real = _mesa_strtof(yytext, NULL); + return FLOATCONSTANT; + } + +[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF) | +\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF) | +[0-9]+\.([eE][+-]?[0-9]+)?(lf|LF) | +[0-9]+[eE][+-]?[0-9]+(lf|LF) { + if (!yyextra->is_version(400, 0) && + !yyextra->ARB_gpu_shader_fp64_enable) + return ERROR_TOK; + yylval->dreal = _mesa_strtod(yytext, NULL); + return DOUBLECONSTANT; + } + +true { + yylval->n = 1; + return BOOLCONSTANT; + } +false { + yylval->n = 0; + return BOOLCONSTANT; + } + + + /* Reserved words in GLSL 1.10. */ +asm KEYWORD(110, 100, 0, 0, ASM); +class KEYWORD(110, 100, 0, 0, CLASS); +union KEYWORD(110, 100, 0, 0, UNION); +enum KEYWORD(110, 100, 0, 0, ENUM); +typedef KEYWORD(110, 100, 0, 0, TYPEDEF); +template KEYWORD(110, 100, 0, 0, TEMPLATE); +this KEYWORD(110, 100, 0, 0, THIS); +packed KEYWORD_WITH_ALT(110, 100, 140, 300, yyextra->ARB_uniform_buffer_object_enable, PACKED_TOK); +goto KEYWORD(110, 100, 0, 0, GOTO); +switch KEYWORD(110, 100, 130, 300, SWITCH); +default KEYWORD(110, 100, 130, 300, DEFAULT); +inline KEYWORD(110, 100, 0, 0, INLINE_TOK); +noinline KEYWORD(110, 100, 0, 0, NOINLINE); +public KEYWORD(110, 100, 0, 0, PUBLIC_TOK); +static KEYWORD(110, 100, 0, 0, STATIC); +extern KEYWORD(110, 100, 0, 0, EXTERN); +external KEYWORD(110, 100, 0, 0, EXTERNAL); +interface KEYWORD(110, 100, 0, 0, INTERFACE); +long KEYWORD(110, 100, 0, 0, LONG_TOK); +short KEYWORD(110, 100, 0, 0, SHORT_TOK); +double KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DOUBLE_TOK); +half KEYWORD(110, 100, 0, 0, HALF); +fixed KEYWORD(110, 100, 0, 0, FIXED_TOK); +unsigned KEYWORD(110, 100, 0, 0, UNSIGNED); +input KEYWORD(110, 100, 0, 0, INPUT_TOK); +output KEYWORD(110, 100, 0, 0, OUTPUT); +hvec2 KEYWORD(110, 100, 0, 0, HVEC2); +hvec3 KEYWORD(110, 100, 0, 0, HVEC3); +hvec4 KEYWORD(110, 100, 0, 0, HVEC4); +dvec2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC2); +dvec3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC3); +dvec4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC4); +dmat2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2); +dmat3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3); +dmat4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4); +dmat2x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2); +dmat2x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X3); +dmat2x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X4); +dmat3x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X2); +dmat3x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3); +dmat3x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X4); +dmat4x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X2); +dmat4x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X3); +dmat4x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4); +fvec2 KEYWORD(110, 100, 0, 0, FVEC2); +fvec3 KEYWORD(110, 100, 0, 0, FVEC3); +fvec4 KEYWORD(110, 100, 0, 0, FVEC4); +sampler2DRect DEPRECATED_ES_KEYWORD(SAMPLER2DRECT); +sampler3DRect KEYWORD(110, 100, 0, 0, SAMPLER3DRECT); +sampler2DRectShadow DEPRECATED_ES_KEYWORD(SAMPLER2DRECTSHADOW); +sizeof KEYWORD(110, 100, 0, 0, SIZEOF); +cast KEYWORD(110, 100, 0, 0, CAST); +namespace KEYWORD(110, 100, 0, 0, NAMESPACE); +using KEYWORD(110, 100, 0, 0, USING); + + /* Additional reserved words in GLSL 1.20. */ +lowp KEYWORD(120, 100, 130, 100, LOWP); +mediump KEYWORD(120, 100, 130, 100, MEDIUMP); +highp KEYWORD(120, 100, 130, 100, HIGHP); +precision KEYWORD(120, 100, 130, 100, PRECISION); + + /* Additional reserved words in GLSL 1.30. */ +case KEYWORD(130, 300, 130, 300, CASE); +common KEYWORD(130, 300, 0, 0, COMMON); +partition KEYWORD(130, 300, 0, 0, PARTITION); +active KEYWORD(130, 300, 0, 0, ACTIVE); +superp KEYWORD(130, 100, 0, 0, SUPERP); +samplerBuffer KEYWORD(130, 300, 140, 0, SAMPLERBUFFER); +filter KEYWORD(130, 300, 0, 0, FILTER); +row_major KEYWORD_WITH_ALT(130, 0, 140, 0, yyextra->ARB_uniform_buffer_object_enable && !yyextra->es_shader, ROW_MAJOR); + + /* Additional reserved words in GLSL 1.40 */ +isampler2DRect KEYWORD(140, 300, 140, 0, ISAMPLER2DRECT); +usampler2DRect KEYWORD(140, 300, 140, 0, USAMPLER2DRECT); +isamplerBuffer KEYWORD(140, 300, 140, 0, ISAMPLERBUFFER); +usamplerBuffer KEYWORD(140, 300, 140, 0, USAMPLERBUFFER); + + /* Additional reserved words in GLSL ES 3.00 */ +resource KEYWORD(0, 300, 0, 0, RESOURCE); +sample KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_gpu_shader5_enable, SAMPLE); +subroutine KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_shader_subroutine_enable, SUBROUTINE); + + +[_a-zA-Z][_a-zA-Z0-9]* { + struct _mesa_glsl_parse_state *state = yyextra; + void *ctx = state; + if (state->es_shader && strlen(yytext) > 1024) { + _mesa_glsl_error(yylloc, state, + "Identifier `%s' exceeds 1024 characters", + yytext); + } else { + yylval->identifier = ralloc_strdup(ctx, yytext); + } + return classify_identifier(state, yytext); + } + +\. { struct _mesa_glsl_parse_state *state = yyextra; + state->is_field = true; + return DOT_TOK; } + +. { return yytext[0]; } + +%% + +int +classify_identifier(struct _mesa_glsl_parse_state *state, const char *name) +{ + if (state->is_field) { + state->is_field = false; + return FIELD_SELECTION; + } + if (state->symbols->get_variable(name) || state->symbols->get_function(name)) + return IDENTIFIER; + else if (state->symbols->get_type(name)) + return TYPE_IDENTIFIER; + else + return NEW_IDENTIFIER; +} + +void +_mesa_glsl_lexer_ctor(struct _mesa_glsl_parse_state *state, const char *string) +{ + yylex_init_extra(state, & state->scanner); + yy_scan_string(string, state->scanner); +} + +void +_mesa_glsl_lexer_dtor(struct _mesa_glsl_parse_state *state) +{ + yylex_destroy(state->scanner); +} diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy new file mode 100644 index 0000000..99bd0e6 --- /dev/null +++ b/src/compiler/glsl/glsl_parser.yy @@ -0,0 +1,2855 @@ +%{ +/* + * Copyright © 2008, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifndef _MSC_VER +#include <strings.h> +#endif +#include <assert.h> + +#include "ast.h" +#include "glsl_parser_extras.h" +#include "compiler/glsl_types.h" +#include "main/context.h" + +#ifdef _MSC_VER +#pragma warning( disable : 4065 ) // switch statement contains 'default' but no 'case' labels +#endif + +#undef yyerror + +static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg) +{ + _mesa_glsl_error(loc, st, "%s", msg); +} + +static int +_mesa_glsl_lex(YYSTYPE *val, YYLTYPE *loc, _mesa_glsl_parse_state *state) +{ + return _mesa_glsl_lexer_lex(val, loc, state->scanner); +} + +static bool match_layout_qualifier(const char *s1, const char *s2, + _mesa_glsl_parse_state *state) +{ + /* From the GLSL 1.50 spec, section 4.3.8 (Layout Qualifiers): + * + * "The tokens in any layout-qualifier-id-list ... are not case + * sensitive, unless explicitly noted otherwise." + * + * The text "unless explicitly noted otherwise" appears to be + * vacuous--no desktop GLSL spec (up through GLSL 4.40) notes + * otherwise. + * + * However, the GLSL ES 3.00 spec says, in section 4.3.8 (Layout + * Qualifiers): + * + * "As for other identifiers, they are case sensitive." + * + * So we need to do a case-sensitive or a case-insensitive match, + * depending on whether we are compiling for GLSL ES. + */ + if (state->es_shader) + return strcmp(s1, s2); + else + return strcasecmp(s1, s2); +} +%} + +%expect 0 + +%pure-parser +%error-verbose + +%locations +%initial-action { + @$.first_line = 1; + @$.first_column = 1; + @$.last_line = 1; + @$.last_column = 1; + @$.source = 0; +} + +%lex-param {struct _mesa_glsl_parse_state *state} +%parse-param {struct _mesa_glsl_parse_state *state} + +%union { + int n; + float real; + double dreal; + const char *identifier; + + struct ast_type_qualifier type_qualifier; + + ast_node *node; + ast_type_specifier *type_specifier; + ast_array_specifier *array_specifier; + ast_fully_specified_type *fully_specified_type; + ast_function *function; + ast_parameter_declarator *parameter_declarator; + ast_function_definition *function_definition; + ast_compound_statement *compound_statement; + ast_expression *expression; + ast_declarator_list *declarator_list; + ast_struct_specifier *struct_specifier; + ast_declaration *declaration; + ast_switch_body *switch_body; + ast_case_label *case_label; + ast_case_label_list *case_label_list; + ast_case_statement *case_statement; + ast_case_statement_list *case_statement_list; + ast_interface_block *interface_block; + ast_subroutine_list *subroutine_list; + struct { + ast_node *cond; + ast_expression *rest; + } for_rest_statement; + + struct { + ast_node *then_statement; + ast_node *else_statement; + } selection_rest_statement; +} + +%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK DOUBLE_TOK +%token BREAK BUFFER CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT +%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 DVEC2 DVEC3 DVEC4 +%token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING SAMPLE +%token NOPERSPECTIVE FLAT SMOOTH +%token MAT2X2 MAT2X3 MAT2X4 +%token MAT3X2 MAT3X3 MAT3X4 +%token MAT4X2 MAT4X3 MAT4X4 +%token DMAT2X2 DMAT2X3 DMAT2X4 +%token DMAT3X2 DMAT3X3 DMAT3X4 +%token DMAT4X2 DMAT4X3 DMAT4X4 +%token SAMPLER1D SAMPLER2D SAMPLER3D SAMPLERCUBE SAMPLER1DSHADOW SAMPLER2DSHADOW +%token SAMPLERCUBESHADOW SAMPLER1DARRAY SAMPLER2DARRAY SAMPLER1DARRAYSHADOW +%token SAMPLER2DARRAYSHADOW SAMPLERCUBEARRAY SAMPLERCUBEARRAYSHADOW +%token ISAMPLER1D ISAMPLER2D ISAMPLER3D ISAMPLERCUBE +%token ISAMPLER1DARRAY ISAMPLER2DARRAY ISAMPLERCUBEARRAY +%token USAMPLER1D USAMPLER2D USAMPLER3D USAMPLERCUBE USAMPLER1DARRAY +%token USAMPLER2DARRAY USAMPLERCUBEARRAY +%token SAMPLER2DRECT ISAMPLER2DRECT USAMPLER2DRECT SAMPLER2DRECTSHADOW +%token SAMPLERBUFFER ISAMPLERBUFFER USAMPLERBUFFER +%token SAMPLER2DMS ISAMPLER2DMS USAMPLER2DMS +%token SAMPLER2DMSARRAY ISAMPLER2DMSARRAY USAMPLER2DMSARRAY +%token SAMPLEREXTERNALOES +%token IMAGE1D IMAGE2D IMAGE3D IMAGE2DRECT IMAGECUBE IMAGEBUFFER +%token IMAGE1DARRAY IMAGE2DARRAY IMAGECUBEARRAY IMAGE2DMS IMAGE2DMSARRAY +%token IIMAGE1D IIMAGE2D IIMAGE3D IIMAGE2DRECT IIMAGECUBE IIMAGEBUFFER +%token IIMAGE1DARRAY IIMAGE2DARRAY IIMAGECUBEARRAY IIMAGE2DMS IIMAGE2DMSARRAY +%token UIMAGE1D UIMAGE2D UIMAGE3D UIMAGE2DRECT UIMAGECUBE UIMAGEBUFFER +%token UIMAGE1DARRAY UIMAGE2DARRAY UIMAGECUBEARRAY UIMAGE2DMS UIMAGE2DMSARRAY +%token IMAGE1DSHADOW IMAGE2DSHADOW IMAGE1DARRAYSHADOW IMAGE2DARRAYSHADOW +%token COHERENT VOLATILE RESTRICT READONLY WRITEONLY +%token ATOMIC_UINT +%token SHARED +%token STRUCT VOID_TOK WHILE +%token <identifier> IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER +%type <identifier> any_identifier +%type <interface_block> instance_name_opt +%type <interface_block> buffer_instance_name_opt +%token <real> FLOATCONSTANT +%token <dreal> DOUBLECONSTANT +%token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT +%token <identifier> FIELD_SELECTION +%token LEFT_OP RIGHT_OP +%token INC_OP DEC_OP LE_OP GE_OP EQ_OP NE_OP +%token AND_OP OR_OP XOR_OP MUL_ASSIGN DIV_ASSIGN ADD_ASSIGN +%token MOD_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN +%token SUB_ASSIGN +%token INVARIANT PRECISE +%token LOWP MEDIUMP HIGHP SUPERP PRECISION + +%token VERSION_TOK EXTENSION LINE COLON EOL INTERFACE OUTPUT +%token PRAGMA_DEBUG_ON PRAGMA_DEBUG_OFF +%token PRAGMA_OPTIMIZE_ON PRAGMA_OPTIMIZE_OFF +%token PRAGMA_INVARIANT_ALL +%token LAYOUT_TOK +%token DOT_TOK + /* Reserved words that are not actually used in the grammar. + */ +%token ASM CLASS UNION ENUM TYPEDEF TEMPLATE THIS PACKED_TOK GOTO +%token INLINE_TOK NOINLINE PUBLIC_TOK STATIC EXTERN EXTERNAL +%token LONG_TOK SHORT_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK +%token HVEC2 HVEC3 HVEC4 FVEC2 FVEC3 FVEC4 +%token SAMPLER3DRECT +%token SIZEOF CAST NAMESPACE USING +%token RESOURCE PATCH +%token SUBROUTINE + +%token ERROR_TOK + +%token COMMON PARTITION ACTIVE FILTER ROW_MAJOR + +%type <identifier> variable_identifier +%type <node> statement +%type <node> statement_list +%type <node> simple_statement +%type <n> precision_qualifier +%type <type_qualifier> type_qualifier +%type <type_qualifier> auxiliary_storage_qualifier +%type <type_qualifier> storage_qualifier +%type <type_qualifier> interpolation_qualifier +%type <type_qualifier> layout_qualifier +%type <type_qualifier> layout_qualifier_id_list layout_qualifier_id +%type <type_qualifier> interface_block_layout_qualifier +%type <type_qualifier> memory_qualifier +%type <type_qualifier> subroutine_qualifier +%type <subroutine_list> subroutine_type_list +%type <type_qualifier> interface_qualifier +%type <type_qualifier> buffer_interface_qualifier +%type <type_specifier> type_specifier +%type <type_specifier> type_specifier_nonarray +%type <array_specifier> array_specifier +%type <identifier> basic_type_specifier_nonarray +%type <fully_specified_type> fully_specified_type +%type <function> function_prototype +%type <function> function_header +%type <function> function_header_with_parameters +%type <function> function_declarator +%type <parameter_declarator> parameter_declarator +%type <parameter_declarator> parameter_declaration +%type <type_qualifier> parameter_qualifier +%type <type_qualifier> parameter_direction_qualifier +%type <type_specifier> parameter_type_specifier +%type <function_definition> function_definition +%type <compound_statement> compound_statement_no_new_scope +%type <compound_statement> compound_statement +%type <node> statement_no_new_scope +%type <node> expression_statement +%type <expression> expression +%type <expression> primary_expression +%type <expression> assignment_expression +%type <expression> conditional_expression +%type <expression> logical_or_expression +%type <expression> logical_xor_expression +%type <expression> logical_and_expression +%type <expression> inclusive_or_expression +%type <expression> exclusive_or_expression +%type <expression> and_expression +%type <expression> equality_expression +%type <expression> relational_expression +%type <expression> shift_expression +%type <expression> additive_expression +%type <expression> multiplicative_expression +%type <expression> unary_expression +%type <expression> constant_expression +%type <expression> integer_expression +%type <expression> postfix_expression +%type <expression> function_call_header_with_parameters +%type <expression> function_call_header_no_parameters +%type <expression> function_call_header +%type <expression> function_call_generic +%type <expression> function_call_or_method +%type <expression> function_call +%type <n> assignment_operator +%type <n> unary_operator +%type <expression> function_identifier +%type <node> external_declaration +%type <declarator_list> init_declarator_list +%type <declarator_list> single_declaration +%type <expression> initializer +%type <expression> initializer_list +%type <node> declaration +%type <node> declaration_statement +%type <node> jump_statement +%type <node> interface_block +%type <interface_block> basic_interface_block +%type <struct_specifier> struct_specifier +%type <declarator_list> struct_declaration_list +%type <declarator_list> struct_declaration +%type <declaration> struct_declarator +%type <declaration> struct_declarator_list +%type <declarator_list> member_list +%type <declarator_list> member_declaration +%type <node> selection_statement +%type <selection_rest_statement> selection_rest_statement +%type <node> switch_statement +%type <switch_body> switch_body +%type <case_label_list> case_label_list +%type <case_label> case_label +%type <case_statement> case_statement +%type <case_statement_list> case_statement_list +%type <node> iteration_statement +%type <node> condition +%type <node> conditionopt +%type <node> for_init_statement +%type <for_rest_statement> for_rest_statement +%type <node> layout_defaults +%type <node> layout_uniform_defaults +%type <node> layout_buffer_defaults +%type <node> layout_in_defaults +%type <node> layout_out_defaults + +%right THEN ELSE +%% + +translation_unit: + version_statement extension_statement_list + { + _mesa_glsl_initialize_types(state); + } + external_declaration_list + { + delete state->symbols; + state->symbols = new(ralloc_parent(state)) glsl_symbol_table; + if (state->es_shader) { + if (state->stage == MESA_SHADER_FRAGMENT) { + state->symbols->add_default_precision_qualifier("int", ast_precision_medium); + } else { + state->symbols->add_default_precision_qualifier("float", ast_precision_high); + state->symbols->add_default_precision_qualifier("int", ast_precision_high); + } + state->symbols->add_default_precision_qualifier("sampler2D", ast_precision_low); + state->symbols->add_default_precision_qualifier("samplerExternalOES", ast_precision_low); + state->symbols->add_default_precision_qualifier("samplerCube", ast_precision_low); + state->symbols->add_default_precision_qualifier("atomic_uint", ast_precision_high); + } + _mesa_glsl_initialize_types(state); + } + ; + +version_statement: + /* blank - no #version specified: defaults are already set */ + | VERSION_TOK INTCONSTANT EOL + { + state->process_version_directive(&@2, $2, NULL); + if (state->error) { + YYERROR; + } + } + | VERSION_TOK INTCONSTANT any_identifier EOL + { + state->process_version_directive(&@2, $2, $3); + if (state->error) { + YYERROR; + } + } + ; + +pragma_statement: + PRAGMA_DEBUG_ON EOL + | PRAGMA_DEBUG_OFF EOL + | PRAGMA_OPTIMIZE_ON EOL + | PRAGMA_OPTIMIZE_OFF EOL + | PRAGMA_INVARIANT_ALL EOL + { + /* Pragma invariant(all) cannot be used in a fragment shader. + * + * Page 27 of the GLSL 1.20 spec, Page 53 of the GLSL ES 3.00 spec: + * + * "It is an error to use this pragma in a fragment shader." + */ + if (state->is_version(120, 300) && + state->stage == MESA_SHADER_FRAGMENT) { + _mesa_glsl_error(& @1, state, + "pragma `invariant(all)' cannot be used " + "in a fragment shader."); + } else if (!state->is_version(120, 100)) { + _mesa_glsl_warning(& @1, state, + "pragma `invariant(all)' not supported in %s " + "(GLSL ES 1.00 or GLSL 1.20 required)", + state->get_version_string()); + } else { + state->all_invariant = true; + } + } + ; + +extension_statement_list: + + | extension_statement_list extension_statement + ; + +any_identifier: + IDENTIFIER + | TYPE_IDENTIFIER + | NEW_IDENTIFIER + ; + +extension_statement: + EXTENSION any_identifier COLON any_identifier EOL + { + if (!_mesa_glsl_process_extension($2, & @2, $4, & @4, state)) { + YYERROR; + } + } + ; + +external_declaration_list: + external_declaration + { + /* FINISHME: The NULL test is required because pragmas are set to + * FINISHME: NULL. (See production rule for external_declaration.) + */ + if ($1 != NULL) + state->translation_unit.push_tail(& $1->link); + } + | external_declaration_list external_declaration + { + /* FINISHME: The NULL test is required because pragmas are set to + * FINISHME: NULL. (See production rule for external_declaration.) + */ + if ($2 != NULL) + state->translation_unit.push_tail(& $2->link); + } + | external_declaration_list extension_statement { + if (!state->allow_extension_directive_midshader) { + _mesa_glsl_error(& @2, state, + "#extension directive is not allowed " + "in the middle of a shader"); + YYERROR; + } + } + ; + +variable_identifier: + IDENTIFIER + | NEW_IDENTIFIER + ; + +primary_expression: + variable_identifier + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_identifier, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.identifier = $1; + } + | INTCONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_int_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.int_constant = $1; + } + | UINTCONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_uint_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.uint_constant = $1; + } + | FLOATCONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_float_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.float_constant = $1; + } + | DOUBLECONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_double_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.double_constant = $1; + } + | BOOLCONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_bool_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.bool_constant = $1; + } + | '(' expression ')' + { + $$ = $2; + } + ; + +postfix_expression: + primary_expression + | postfix_expression '[' integer_expression ']' + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_array_index, $1, $3, NULL); + $$->set_location_range(@1, @4); + } + | function_call + { + $$ = $1; + } + | postfix_expression DOT_TOK FIELD_SELECTION + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_field_selection, $1, NULL, NULL); + $$->set_location_range(@1, @3); + $$->primary_expression.identifier = $3; + } + | postfix_expression INC_OP + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_post_inc, $1, NULL, NULL); + $$->set_location_range(@1, @2); + } + | postfix_expression DEC_OP + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_post_dec, $1, NULL, NULL); + $$->set_location_range(@1, @2); + } + ; + +integer_expression: + expression + ; + +function_call: + function_call_or_method + ; + +function_call_or_method: + function_call_generic + ; + +function_call_generic: + function_call_header_with_parameters ')' + | function_call_header_no_parameters ')' + ; + +function_call_header_no_parameters: + function_call_header VOID_TOK + | function_call_header + ; + +function_call_header_with_parameters: + function_call_header assignment_expression + { + $$ = $1; + $$->set_location(@1); + $$->expressions.push_tail(& $2->link); + } + | function_call_header_with_parameters ',' assignment_expression + { + $$ = $1; + $$->set_location(@1); + $$->expressions.push_tail(& $3->link); + } + ; + + // Grammar Note: Constructors look like functions, but lexical + // analysis recognized most of them as keywords. They are now + // recognized through "type_specifier". +function_call_header: + function_identifier '(' + ; + +function_identifier: + type_specifier + { + void *ctx = state; + $$ = new(ctx) ast_function_expression($1); + $$->set_location(@1); + } + | postfix_expression + { + void *ctx = state; + $$ = new(ctx) ast_function_expression($1); + $$->set_location(@1); + } + ; + + // Grammar Note: Constructors look like methods, but lexical + // analysis recognized most of them as keywords. They are now + // recognized through "type_specifier". + + // Grammar Note: No traditional style type casts. +unary_expression: + postfix_expression + | INC_OP unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_pre_inc, $2, NULL, NULL); + $$->set_location(@1); + } + | DEC_OP unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_pre_dec, $2, NULL, NULL); + $$->set_location(@1); + } + | unary_operator unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression($1, $2, NULL, NULL); + $$->set_location_range(@1, @2); + } + ; + + // Grammar Note: No '*' or '&' unary ops. Pointers are not supported. +unary_operator: + '+' { $$ = ast_plus; } + | '-' { $$ = ast_neg; } + | '!' { $$ = ast_logic_not; } + | '~' { $$ = ast_bit_not; } + ; + +multiplicative_expression: + unary_expression + | multiplicative_expression '*' unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_mul, $1, $3); + $$->set_location_range(@1, @3); + } + | multiplicative_expression '/' unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_div, $1, $3); + $$->set_location_range(@1, @3); + } + | multiplicative_expression '%' unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_mod, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +additive_expression: + multiplicative_expression + | additive_expression '+' multiplicative_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_add, $1, $3); + $$->set_location_range(@1, @3); + } + | additive_expression '-' multiplicative_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_sub, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +shift_expression: + additive_expression + | shift_expression LEFT_OP additive_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_lshift, $1, $3); + $$->set_location_range(@1, @3); + } + | shift_expression RIGHT_OP additive_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_rshift, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +relational_expression: + shift_expression + | relational_expression '<' shift_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_less, $1, $3); + $$->set_location_range(@1, @3); + } + | relational_expression '>' shift_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_greater, $1, $3); + $$->set_location_range(@1, @3); + } + | relational_expression LE_OP shift_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_lequal, $1, $3); + $$->set_location_range(@1, @3); + } + | relational_expression GE_OP shift_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_gequal, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +equality_expression: + relational_expression + | equality_expression EQ_OP relational_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_equal, $1, $3); + $$->set_location_range(@1, @3); + } + | equality_expression NE_OP relational_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_nequal, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +and_expression: + equality_expression + | and_expression '&' equality_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_bit_and, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +exclusive_or_expression: + and_expression + | exclusive_or_expression '^' and_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_bit_xor, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +inclusive_or_expression: + exclusive_or_expression + | inclusive_or_expression '|' exclusive_or_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_bit_or, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +logical_and_expression: + inclusive_or_expression + | logical_and_expression AND_OP inclusive_or_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_logic_and, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +logical_xor_expression: + logical_and_expression + | logical_xor_expression XOR_OP logical_and_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_logic_xor, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +logical_or_expression: + logical_xor_expression + | logical_or_expression OR_OP logical_xor_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_logic_or, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +conditional_expression: + logical_or_expression + | logical_or_expression '?' expression ':' assignment_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_conditional, $1, $3, $5); + $$->set_location_range(@1, @5); + } + ; + +assignment_expression: + conditional_expression + | unary_expression assignment_operator assignment_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression($2, $1, $3, NULL); + $$->set_location_range(@1, @3); + } + ; + +assignment_operator: + '=' { $$ = ast_assign; } + | MUL_ASSIGN { $$ = ast_mul_assign; } + | DIV_ASSIGN { $$ = ast_div_assign; } + | MOD_ASSIGN { $$ = ast_mod_assign; } + | ADD_ASSIGN { $$ = ast_add_assign; } + | SUB_ASSIGN { $$ = ast_sub_assign; } + | LEFT_ASSIGN { $$ = ast_ls_assign; } + | RIGHT_ASSIGN { $$ = ast_rs_assign; } + | AND_ASSIGN { $$ = ast_and_assign; } + | XOR_ASSIGN { $$ = ast_xor_assign; } + | OR_ASSIGN { $$ = ast_or_assign; } + ; + +expression: + assignment_expression + { + $$ = $1; + } + | expression ',' assignment_expression + { + void *ctx = state; + if ($1->oper != ast_sequence) { + $$ = new(ctx) ast_expression(ast_sequence, NULL, NULL, NULL); + $$->set_location_range(@1, @3); + $$->expressions.push_tail(& $1->link); + } else { + $$ = $1; + } + + $$->expressions.push_tail(& $3->link); + } + ; + +constant_expression: + conditional_expression + ; + +declaration: + function_prototype ';' + { + state->symbols->pop_scope(); + $$ = $1; + } + | init_declarator_list ';' + { + $$ = $1; + } + | PRECISION precision_qualifier type_specifier ';' + { + $3->default_precision = $2; + $$ = $3; + } + | interface_block + { + $$ = $1; + } + ; + +function_prototype: + function_declarator ')' + ; + +function_declarator: + function_header + | function_header_with_parameters + ; + +function_header_with_parameters: + function_header parameter_declaration + { + $$ = $1; + $$->parameters.push_tail(& $2->link); + } + | function_header_with_parameters ',' parameter_declaration + { + $$ = $1; + $$->parameters.push_tail(& $3->link); + } + ; + +function_header: + fully_specified_type variable_identifier '(' + { + void *ctx = state; + $$ = new(ctx) ast_function(); + $$->set_location(@2); + $$->return_type = $1; + $$->identifier = $2; + + if ($1->qualifier.flags.q.subroutine) { + /* add type for IDENTIFIER search */ + state->symbols->add_type($2, glsl_type::get_subroutine_instance($2)); + } else + state->symbols->add_function(new(state) ir_function($2)); + state->symbols->push_scope(); + } + ; + +parameter_declarator: + type_specifier any_identifier + { + void *ctx = state; + $$ = new(ctx) ast_parameter_declarator(); + $$->set_location_range(@1, @2); + $$->type = new(ctx) ast_fully_specified_type(); + $$->type->set_location(@1); + $$->type->specifier = $1; + $$->identifier = $2; + } + | type_specifier any_identifier array_specifier + { + void *ctx = state; + $$ = new(ctx) ast_parameter_declarator(); + $$->set_location_range(@1, @3); + $$->type = new(ctx) ast_fully_specified_type(); + $$->type->set_location(@1); + $$->type->specifier = $1; + $$->identifier = $2; + $$->array_specifier = $3; + } + ; + +parameter_declaration: + parameter_qualifier parameter_declarator + { + $$ = $2; + $$->type->qualifier = $1; + } + | parameter_qualifier parameter_type_specifier + { + void *ctx = state; + $$ = new(ctx) ast_parameter_declarator(); + $$->set_location(@2); + $$->type = new(ctx) ast_fully_specified_type(); + $$->type->set_location_range(@1, @2); + $$->type->qualifier = $1; + $$->type->specifier = $2; + } + ; + +parameter_qualifier: + /* empty */ + { + memset(& $$, 0, sizeof($$)); + } + | CONST_TOK parameter_qualifier + { + if ($2.flags.q.constant) + _mesa_glsl_error(&@1, state, "duplicate const qualifier"); + + $$ = $2; + $$.flags.q.constant = 1; + } + | PRECISE parameter_qualifier + { + if ($2.flags.q.precise) + _mesa_glsl_error(&@1, state, "duplicate precise qualifier"); + + $$ = $2; + $$.flags.q.precise = 1; + } + | parameter_direction_qualifier parameter_qualifier + { + if (($1.flags.q.in || $1.flags.q.out) && ($2.flags.q.in || $2.flags.q.out)) + _mesa_glsl_error(&@1, state, "duplicate in/out/inout qualifier"); + + if (!state->has_420pack_or_es31() && $2.flags.q.constant) + _mesa_glsl_error(&@1, state, "in/out/inout must come after const " + "or precise"); + + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | precision_qualifier parameter_qualifier + { + if ($2.precision != ast_precision_none) + _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); + + if (!state->has_420pack_or_es31() && + $2.flags.i != 0) + _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); + + $$ = $2; + $$.precision = $1; + } + | memory_qualifier parameter_qualifier + { + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + +parameter_direction_qualifier: + IN_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.in = 1; + } + | OUT_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.out = 1; + } + | INOUT_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.in = 1; + $$.flags.q.out = 1; + } + ; + +parameter_type_specifier: + type_specifier + ; + +init_declarator_list: + single_declaration + | init_declarator_list ',' any_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($3, NULL, NULL); + decl->set_location(@3); + + $$ = $1; + $$->declarations.push_tail(&decl->link); + state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); + } + | init_declarator_list ',' any_identifier array_specifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($3, $4, NULL); + decl->set_location_range(@3, @4); + + $$ = $1; + $$->declarations.push_tail(&decl->link); + state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); + } + | init_declarator_list ',' any_identifier array_specifier '=' initializer + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($3, $4, $6); + decl->set_location_range(@3, @4); + + $$ = $1; + $$->declarations.push_tail(&decl->link); + state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); + } + | init_declarator_list ',' any_identifier '=' initializer + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($3, NULL, $5); + decl->set_location(@3); + + $$ = $1; + $$->declarations.push_tail(&decl->link); + state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); + } + ; + + // Grammar Note: No 'enum', or 'typedef'. +single_declaration: + fully_specified_type + { + void *ctx = state; + /* Empty declaration list is valid. */ + $$ = new(ctx) ast_declarator_list($1); + $$->set_location(@1); + } + | fully_specified_type any_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, NULL, NULL); + decl->set_location(@2); + + $$ = new(ctx) ast_declarator_list($1); + $$->set_location_range(@1, @2); + $$->declarations.push_tail(&decl->link); + } + | fully_specified_type any_identifier array_specifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, $3, NULL); + decl->set_location_range(@2, @3); + + $$ = new(ctx) ast_declarator_list($1); + $$->set_location_range(@1, @3); + $$->declarations.push_tail(&decl->link); + } + | fully_specified_type any_identifier array_specifier '=' initializer + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, $3, $5); + decl->set_location_range(@2, @3); + + $$ = new(ctx) ast_declarator_list($1); + $$->set_location_range(@1, @3); + $$->declarations.push_tail(&decl->link); + } + | fully_specified_type any_identifier '=' initializer + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, NULL, $4); + decl->set_location(@2); + + $$ = new(ctx) ast_declarator_list($1); + $$->set_location_range(@1, @2); + $$->declarations.push_tail(&decl->link); + } + | INVARIANT variable_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, NULL, NULL); + decl->set_location(@2); + + $$ = new(ctx) ast_declarator_list(NULL); + $$->set_location_range(@1, @2); + $$->invariant = true; + + $$->declarations.push_tail(&decl->link); + } + | PRECISE variable_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, NULL, NULL); + decl->set_location(@2); + + $$ = new(ctx) ast_declarator_list(NULL); + $$->set_location_range(@1, @2); + $$->precise = true; + + $$->declarations.push_tail(&decl->link); + } + ; + +fully_specified_type: + type_specifier + { + void *ctx = state; + $$ = new(ctx) ast_fully_specified_type(); + $$->set_location(@1); + $$->specifier = $1; + } + | type_qualifier type_specifier + { + void *ctx = state; + $$ = new(ctx) ast_fully_specified_type(); + $$->set_location_range(@1, @2); + $$->qualifier = $1; + $$->specifier = $2; + if ($$->specifier->structure != NULL && + $$->specifier->structure->is_declaration) { + $$->specifier->structure->layout = &$$->qualifier; + } + } + ; + +layout_qualifier: + LAYOUT_TOK '(' layout_qualifier_id_list ')' + { + $$ = $3; + } + ; + +layout_qualifier_id_list: + layout_qualifier_id + | layout_qualifier_id_list ',' layout_qualifier_id + { + $$ = $1; + if (!$$.merge_qualifier(& @3, state, $3, true)) { + YYERROR; + } + } + ; + +layout_qualifier_id: + any_identifier + { + memset(& $$, 0, sizeof($$)); + + /* Layout qualifiers for ARB_fragment_coord_conventions. */ + if (!$$.flags.i && (state->ARB_fragment_coord_conventions_enable || + state->is_version(150, 0))) { + if (match_layout_qualifier($1, "origin_upper_left", state) == 0) { + $$.flags.q.origin_upper_left = 1; + } else if (match_layout_qualifier($1, "pixel_center_integer", + state) == 0) { + $$.flags.q.pixel_center_integer = 1; + } + + if ($$.flags.i && state->ARB_fragment_coord_conventions_warn) { + _mesa_glsl_warning(& @1, state, + "GL_ARB_fragment_coord_conventions layout " + "identifier `%s' used", $1); + } + } + + /* Layout qualifiers for AMD/ARB_conservative_depth. */ + if (!$$.flags.i && + (state->AMD_conservative_depth_enable || + state->ARB_conservative_depth_enable || + state->is_version(420, 0))) { + if (match_layout_qualifier($1, "depth_any", state) == 0) { + $$.flags.q.depth_any = 1; + } else if (match_layout_qualifier($1, "depth_greater", state) == 0) { + $$.flags.q.depth_greater = 1; + } else if (match_layout_qualifier($1, "depth_less", state) == 0) { + $$.flags.q.depth_less = 1; + } else if (match_layout_qualifier($1, "depth_unchanged", + state) == 0) { + $$.flags.q.depth_unchanged = 1; + } + + if ($$.flags.i && state->AMD_conservative_depth_warn) { + _mesa_glsl_warning(& @1, state, + "GL_AMD_conservative_depth " + "layout qualifier `%s' is used", $1); + } + if ($$.flags.i && state->ARB_conservative_depth_warn) { + _mesa_glsl_warning(& @1, state, + "GL_ARB_conservative_depth " + "layout qualifier `%s' is used", $1); + } + } + + /* See also interface_block_layout_qualifier. */ + if (!$$.flags.i && state->has_uniform_buffer_objects()) { + if (match_layout_qualifier($1, "std140", state) == 0) { + $$.flags.q.std140 = 1; + } else if (match_layout_qualifier($1, "shared", state) == 0) { + $$.flags.q.shared = 1; + } else if (match_layout_qualifier($1, "std430", state) == 0) { + $$.flags.q.std430 = 1; + } else if (match_layout_qualifier($1, "column_major", state) == 0) { + $$.flags.q.column_major = 1; + /* "row_major" is a reserved word in GLSL 1.30+. Its token is parsed + * below in the interface_block_layout_qualifier rule. + * + * It is not a reserved word in GLSL ES 3.00, so it's handled here as + * an identifier. + * + * Also, this takes care of alternate capitalizations of + * "row_major" (which is necessary because layout qualifiers + * are case-insensitive in desktop GLSL). + */ + } else if (match_layout_qualifier($1, "row_major", state) == 0) { + $$.flags.q.row_major = 1; + /* "packed" is a reserved word in GLSL, and its token is + * parsed below in the interface_block_layout_qualifier rule. + * However, we must take care of alternate capitalizations of + * "packed", because layout qualifiers are case-insensitive + * in desktop GLSL. + */ + } else if (match_layout_qualifier($1, "packed", state) == 0) { + $$.flags.q.packed = 1; + } + + if ($$.flags.i && state->ARB_uniform_buffer_object_warn) { + _mesa_glsl_warning(& @1, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "layout qualifier `%s' is used", $1); + } + } + + /* Layout qualifiers for GLSL 1.50 geometry shaders. */ + if (!$$.flags.i) { + static const struct { + const char *s; + GLenum e; + } map[] = { + { "points", GL_POINTS }, + { "lines", GL_LINES }, + { "lines_adjacency", GL_LINES_ADJACENCY }, + { "line_strip", GL_LINE_STRIP }, + { "triangles", GL_TRIANGLES }, + { "triangles_adjacency", GL_TRIANGLES_ADJACENCY }, + { "triangle_strip", GL_TRIANGLE_STRIP }, + }; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { + if (match_layout_qualifier($1, map[i].s, state) == 0) { + $$.flags.q.prim_type = 1; + $$.prim_type = map[i].e; + break; + } + } + + if ($$.flags.i && !state->has_geometry_shader()) { + _mesa_glsl_error(& @1, state, "#version 150 layout " + "qualifier `%s' used", $1); + } + } + + /* Layout qualifiers for ARB_shader_image_load_store. */ + if (state->ARB_shader_image_load_store_enable || + state->is_version(420, 310)) { + if (!$$.flags.i) { + static const struct { + const char *name; + GLenum format; + glsl_base_type base_type; + /** Minimum desktop GLSL version required for the image + * format. Use 130 if already present in the original + * ARB extension. + */ + unsigned required_glsl; + /** Minimum GLSL ES version required for the image format. */ + unsigned required_essl; + } map[] = { + { "rgba32f", GL_RGBA32F, GLSL_TYPE_FLOAT, 130, 310 }, + { "rgba16f", GL_RGBA16F, GLSL_TYPE_FLOAT, 130, 310 }, + { "rg32f", GL_RG32F, GLSL_TYPE_FLOAT, 130, 0 }, + { "rg16f", GL_RG16F, GLSL_TYPE_FLOAT, 130, 0 }, + { "r11f_g11f_b10f", GL_R11F_G11F_B10F, GLSL_TYPE_FLOAT, 130, 0 }, + { "r32f", GL_R32F, GLSL_TYPE_FLOAT, 130, 310 }, + { "r16f", GL_R16F, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba32ui", GL_RGBA32UI, GLSL_TYPE_UINT, 130, 310 }, + { "rgba16ui", GL_RGBA16UI, GLSL_TYPE_UINT, 130, 310 }, + { "rgb10_a2ui", GL_RGB10_A2UI, GLSL_TYPE_UINT, 130, 0 }, + { "rgba8ui", GL_RGBA8UI, GLSL_TYPE_UINT, 130, 310 }, + { "rg32ui", GL_RG32UI, GLSL_TYPE_UINT, 130, 0 }, + { "rg16ui", GL_RG16UI, GLSL_TYPE_UINT, 130, 0 }, + { "rg8ui", GL_RG8UI, GLSL_TYPE_UINT, 130, 0 }, + { "r32ui", GL_R32UI, GLSL_TYPE_UINT, 130, 310 }, + { "r16ui", GL_R16UI, GLSL_TYPE_UINT, 130, 0 }, + { "r8ui", GL_R8UI, GLSL_TYPE_UINT, 130, 0 }, + { "rgba32i", GL_RGBA32I, GLSL_TYPE_INT, 130, 310 }, + { "rgba16i", GL_RGBA16I, GLSL_TYPE_INT, 130, 310 }, + { "rgba8i", GL_RGBA8I, GLSL_TYPE_INT, 130, 310 }, + { "rg32i", GL_RG32I, GLSL_TYPE_INT, 130, 0 }, + { "rg16i", GL_RG16I, GLSL_TYPE_INT, 130, 0 }, + { "rg8i", GL_RG8I, GLSL_TYPE_INT, 130, 0 }, + { "r32i", GL_R32I, GLSL_TYPE_INT, 130, 310 }, + { "r16i", GL_R16I, GLSL_TYPE_INT, 130, 0 }, + { "r8i", GL_R8I, GLSL_TYPE_INT, 130, 0 }, + { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT, 130, 310 }, + { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 0 }, + { "rg8", GL_RG8, GLSL_TYPE_FLOAT, 130, 0 }, + { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 0 }, + { "r8", GL_R8, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT, 130, 310 }, + { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0 } + }; + + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { + if (state->is_version(map[i].required_glsl, + map[i].required_essl) && + match_layout_qualifier($1, map[i].name, state) == 0) { + $$.flags.q.explicit_image_format = 1; + $$.image_format = map[i].format; + $$.image_base_type = map[i].base_type; + break; + } + } + } + + if (!$$.flags.i && + match_layout_qualifier($1, "early_fragment_tests", state) == 0) { + /* From section 4.4.1.3 of the GLSL 4.50 specification + * (Fragment Shader Inputs): + * + * "Fragment shaders also allow the following layout + * qualifier on in only (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (state->stage != MESA_SHADER_FRAGMENT) { + _mesa_glsl_error(& @1, state, + "early_fragment_tests layout qualifier only " + "valid in fragment shaders"); + } + + $$.flags.q.early_fragment_tests = 1; + } + } + + /* Layout qualifiers for tessellation evaluation shaders. */ + if (!$$.flags.i) { + struct { + const char *s; + GLenum e; + } map[] = { + /* triangles already parsed by gs-specific code */ + { "quads", GL_QUADS }, + { "isolines", GL_ISOLINES }, + }; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { + if (match_layout_qualifier($1, map[i].s, state) == 0) { + $$.flags.q.prim_type = 1; + $$.prim_type = map[i].e; + break; + } + } + + if ($$.flags.i && + !state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "primitive mode qualifier `%s' requires " + "GLSL 4.00 or ARB_tessellation_shader", $1); + } + } + if (!$$.flags.i) { + struct { + const char *s; + GLenum e; + } map[] = { + { "equal_spacing", GL_EQUAL }, + { "fractional_odd_spacing", GL_FRACTIONAL_ODD }, + { "fractional_even_spacing", GL_FRACTIONAL_EVEN }, + }; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { + if (match_layout_qualifier($1, map[i].s, state) == 0) { + $$.flags.q.vertex_spacing = 1; + $$.vertex_spacing = map[i].e; + break; + } + } + + if ($$.flags.i && + !state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "vertex spacing qualifier `%s' requires " + "GLSL 4.00 or ARB_tessellation_shader", $1); + } + } + if (!$$.flags.i) { + if (match_layout_qualifier($1, "cw", state) == 0) { + $$.flags.q.ordering = 1; + $$.ordering = GL_CW; + } else if (match_layout_qualifier($1, "ccw", state) == 0) { + $$.flags.q.ordering = 1; + $$.ordering = GL_CCW; + } + + if ($$.flags.i && + !state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "ordering qualifier `%s' requires " + "GLSL 4.00 or ARB_tessellation_shader", $1); + } + } + if (!$$.flags.i) { + if (match_layout_qualifier($1, "point_mode", state) == 0) { + $$.flags.q.point_mode = 1; + $$.point_mode = true; + } + + if ($$.flags.i && + !state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "qualifier `point_mode' requires " + "GLSL 4.00 or ARB_tessellation_shader"); + } + } + + if (!$$.flags.i) { + _mesa_glsl_error(& @1, state, "unrecognized layout identifier " + "`%s'", $1); + YYERROR; + } + } + | any_identifier '=' constant_expression + { + memset(& $$, 0, sizeof($$)); + void *ctx = state; + + if ($3->oper != ast_int_constant && + $3->oper != ast_uint_constant && + !state->has_enhanced_layouts()) { + _mesa_glsl_error(& @1, state, + "compile-time constant expressions require " + "GLSL 4.40 or ARB_enhanced_layouts"); + } + + if (match_layout_qualifier("location", $1, state) == 0) { + $$.flags.q.explicit_location = 1; + + if ($$.flags.q.attribute == 1 && + state->ARB_explicit_attrib_location_warn) { + _mesa_glsl_warning(& @1, state, + "GL_ARB_explicit_attrib_location layout " + "identifier `%s' used", $1); + } + $$.location = $3; + } + + if (match_layout_qualifier("index", $1, state) == 0) { + if (state->es_shader && !state->EXT_blend_func_extended_enable) { + _mesa_glsl_error(& @3, state, "index layout qualifier requires EXT_blend_func_extended"); + YYERROR; + } + + $$.flags.q.explicit_index = 1; + $$.index = $3; + } + + if ((state->has_420pack_or_es31() || + state->has_atomic_counters() || + state->has_shader_storage_buffer_objects()) && + match_layout_qualifier("binding", $1, state) == 0) { + $$.flags.q.explicit_binding = 1; + $$.binding = $3; + } + + if (state->has_atomic_counters() && + match_layout_qualifier("offset", $1, state) == 0) { + $$.flags.q.explicit_offset = 1; + $$.offset = $3; + } + + if (match_layout_qualifier("max_vertices", $1, state) == 0) { + $$.flags.q.max_vertices = 1; + $$.max_vertices = new(ctx) ast_layout_expression(@1, $3); + if (!state->has_geometry_shader()) { + _mesa_glsl_error(& @3, state, + "#version 150 max_vertices qualifier " + "specified", $3); + } + } + + if (state->stage == MESA_SHADER_GEOMETRY) { + if (match_layout_qualifier("stream", $1, state) == 0 && + state->check_explicit_attrib_stream_allowed(& @3)) { + $$.flags.q.stream = 1; + $$.flags.q.explicit_stream = 1; + $$.stream = $3; + } + } + + static const char * const local_size_qualifiers[3] = { + "local_size_x", + "local_size_y", + "local_size_z", + }; + for (int i = 0; i < 3; i++) { + if (match_layout_qualifier(local_size_qualifiers[i], $1, + state) == 0) { + if (!state->has_compute_shader()) { + _mesa_glsl_error(& @3, state, + "%s qualifier requires GLSL 4.30 or " + "GLSL ES 3.10 or ARB_compute_shader", + local_size_qualifiers[i]); + YYERROR; + } else { + $$.flags.q.local_size |= (1 << i); + $$.local_size[i] = new(ctx) ast_layout_expression(@1, $3); + } + break; + } + } + + if (match_layout_qualifier("invocations", $1, state) == 0) { + $$.flags.q.invocations = 1; + $$.invocations = new(ctx) ast_layout_expression(@1, $3); + if (!state->is_version(400, 0) && + !state->ARB_gpu_shader5_enable) { + _mesa_glsl_error(& @3, state, + "GL_ARB_gpu_shader5 invocations " + "qualifier specified", $3); + } + } + + /* Layout qualifiers for tessellation control shaders. */ + if (match_layout_qualifier("vertices", $1, state) == 0) { + $$.flags.q.vertices = 1; + $$.vertices = new(ctx) ast_layout_expression(@1, $3); + if (!state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "vertices qualifier requires GLSL 4.00 or " + "ARB_tessellation_shader"); + } + } + + /* If the identifier didn't match any known layout identifiers, + * emit an error. + */ + if (!$$.flags.i) { + _mesa_glsl_error(& @1, state, "unrecognized layout identifier " + "`%s'", $1); + YYERROR; + } + } + | interface_block_layout_qualifier + { + $$ = $1; + /* Layout qualifiers for ARB_uniform_buffer_object. */ + if ($$.flags.q.uniform && !state->has_uniform_buffer_objects()) { + _mesa_glsl_error(& @1, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "layout qualifier `%s' is used", $1); + } else if ($$.flags.q.uniform && state->ARB_uniform_buffer_object_warn) { + _mesa_glsl_warning(& @1, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "layout qualifier `%s' is used", $1); + } + } + ; + +/* This is a separate language rule because we parse these as tokens + * (due to them being reserved keywords) instead of identifiers like + * most qualifiers. See the any_identifier path of + * layout_qualifier_id for the others. + * + * Note that since layout qualifiers are case-insensitive in desktop + * GLSL, all of these qualifiers need to be handled as identifiers as + * well (by the any_identifier path of layout_qualifier_id). + */ +interface_block_layout_qualifier: + ROW_MAJOR + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.row_major = 1; + } + | PACKED_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.packed = 1; + } + | SHARED + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.shared = 1; + } + ; + +subroutine_qualifier: + SUBROUTINE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.subroutine = 1; + } + | SUBROUTINE '(' subroutine_type_list ')' + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.subroutine_def = 1; + $$.subroutine_list = $3; + } + ; + +subroutine_type_list: + any_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($1, NULL, NULL); + decl->set_location(@1); + + $$ = new(ctx) ast_subroutine_list(); + $$->declarations.push_tail(&decl->link); + } + | subroutine_type_list ',' any_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($3, NULL, NULL); + decl->set_location(@3); + + $$ = $1; + $$->declarations.push_tail(&decl->link); + } + ; + +interpolation_qualifier: + SMOOTH + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.smooth = 1; + } + | FLAT + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.flat = 1; + } + | NOPERSPECTIVE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.noperspective = 1; + } + ; + +type_qualifier: + /* Single qualifiers */ + INVARIANT + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.invariant = 1; + } + | PRECISE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.precise = 1; + } + | auxiliary_storage_qualifier + | storage_qualifier + | interpolation_qualifier + | layout_qualifier + | memory_qualifier + | subroutine_qualifier + | precision_qualifier + { + memset(&$$, 0, sizeof($$)); + $$.precision = $1; + } + + /* Multiple qualifiers: + * In GLSL 4.20, these can be specified in any order. In earlier versions, + * they appear in this order (see GLSL 1.50 section 4.7 & comments below): + * + * invariant interpolation auxiliary storage precision ...or... + * layout storage precision + * + * Each qualifier's rule ensures that the accumulated qualifiers on the right + * side don't contain any that must appear on the left hand side. + * For example, when processing a storage qualifier, we check that there are + * no auxiliary, interpolation, layout, invariant, or precise qualifiers to the right. + */ + | PRECISE type_qualifier + { + if ($2.flags.q.precise) + _mesa_glsl_error(&@1, state, "duplicate \"precise\" qualifier"); + + $$ = $2; + $$.flags.q.precise = 1; + } + | INVARIANT type_qualifier + { + if ($2.flags.q.invariant) + _mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier"); + + if (!state->has_420pack_or_es31() && $2.flags.q.precise) + _mesa_glsl_error(&@1, state, + "\"invariant\" must come after \"precise\""); + + $$ = $2; + $$.flags.q.invariant = 1; + + /* GLSL ES 3.00 spec, section 4.6.1 "The Invariant Qualifier": + * + * "Only variables output from a shader can be candidates for invariance. + * This includes user-defined output variables and the built-in output + * variables. As only outputs can be declared as invariant, an invariant + * output from one shader stage will still match an input of a subsequent + * stage without the input being declared as invariant." + */ + if (state->es_shader && state->language_version >= 300 && $$.flags.q.in) + _mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs"); + } + | interpolation_qualifier type_qualifier + { + /* Section 4.3 of the GLSL 1.40 specification states: + * "...qualified with one of these interpolation qualifiers" + * + * GLSL 1.30 claims to allow "one or more", but insists that: + * "These interpolation qualifiers may only precede the qualifiers in, + * centroid in, out, or centroid out in a declaration." + * + * ...which means that e.g. smooth can't precede smooth, so there can be + * only one after all, and the 1.40 text is a clarification, not a change. + */ + if ($2.has_interpolation()) + _mesa_glsl_error(&@1, state, "duplicate interpolation qualifier"); + + if (!state->has_420pack_or_es31() && + ($2.flags.q.precise || $2.flags.q.invariant)) { + _mesa_glsl_error(&@1, state, "interpolation qualifiers must come " + "after \"precise\" or \"invariant\""); + } + + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | layout_qualifier type_qualifier + { + /* In the absence of ARB_shading_language_420pack, layout qualifiers may + * appear no later than auxiliary storage qualifiers. There is no + * particularly clear spec language mandating this, but in all examples + * the layout qualifier precedes the storage qualifier. + * + * We allow combinations of layout with interpolation, invariant or + * precise qualifiers since these are useful in ARB_separate_shader_objects. + * There is no clear spec guidance on this either. + */ + if (!state->has_420pack_or_es31() && $2.has_layout()) + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | subroutine_qualifier type_qualifier + { + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | auxiliary_storage_qualifier type_qualifier + { + if ($2.has_auxiliary_storage()) { + _mesa_glsl_error(&@1, state, + "duplicate auxiliary storage qualifier (centroid or sample)"); + } + + if (!state->has_420pack_or_es31() && + ($2.flags.q.precise || $2.flags.q.invariant || + $2.has_interpolation() || $2.has_layout())) { + _mesa_glsl_error(&@1, state, "auxiliary storage qualifiers must come " + "just before storage qualifiers"); + } + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | storage_qualifier type_qualifier + { + /* Section 4.3 of the GLSL 1.20 specification states: + * "Variable declarations may have a storage qualifier specified..." + * 1.30 clarifies this to "may have one storage qualifier". + */ + if ($2.has_storage()) + _mesa_glsl_error(&@1, state, "duplicate storage qualifier"); + + if (!state->has_420pack_or_es31() && + ($2.flags.q.precise || $2.flags.q.invariant || $2.has_interpolation() || + $2.has_layout() || $2.has_auxiliary_storage())) { + _mesa_glsl_error(&@1, state, "storage qualifiers must come after " + "precise, invariant, interpolation, layout and auxiliary " + "storage qualifiers"); + } + + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | precision_qualifier type_qualifier + { + if ($2.precision != ast_precision_none) + _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); + + if (!(state->has_420pack_or_es31()) && + $2.flags.i != 0) + _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); + + $$ = $2; + $$.precision = $1; + } + | memory_qualifier type_qualifier + { + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + ; + +auxiliary_storage_qualifier: + CENTROID + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.centroid = 1; + } + | SAMPLE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.sample = 1; + } + | PATCH + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.patch = 1; + } + +storage_qualifier: + CONST_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.constant = 1; + } + | ATTRIBUTE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.attribute = 1; + } + | VARYING + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.varying = 1; + } + | IN_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.in = 1; + } + | OUT_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.out = 1; + + if (state->stage == MESA_SHADER_GEOMETRY && + state->has_explicit_attrib_stream()) { + /* Section 4.3.8.2 (Output Layout Qualifiers) of the GLSL 4.00 + * spec says: + * + * "If the block or variable is declared with the stream + * identifier, it is associated with the specified stream; + * otherwise, it is associated with the current default stream." + */ + $$.flags.q.stream = 1; + $$.flags.q.explicit_stream = 0; + $$.stream = state->out_qualifier->stream; + } + } + | UNIFORM + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.uniform = 1; + } + | BUFFER + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.buffer = 1; + } + | SHARED + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.shared_storage = 1; + } + ; + +memory_qualifier: + COHERENT + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.coherent = 1; + } + | VOLATILE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q._volatile = 1; + } + | RESTRICT + { + STATIC_ASSERT(sizeof($$.flags.q) <= sizeof($$.flags.i)); + memset(& $$, 0, sizeof($$)); + $$.flags.q.restrict_flag = 1; + } + | READONLY + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.read_only = 1; + } + | WRITEONLY + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.write_only = 1; + } + ; + +array_specifier: + '[' ']' + { + void *ctx = state; + $$ = new(ctx) ast_array_specifier(@1, new(ctx) ast_expression( + ast_unsized_array_dim, NULL, + NULL, NULL)); + $$->set_location_range(@1, @2); + } + | '[' constant_expression ']' + { + void *ctx = state; + $$ = new(ctx) ast_array_specifier(@1, $2); + $$->set_location_range(@1, @3); + } + | array_specifier '[' ']' + { + void *ctx = state; + $$ = $1; + + if (state->check_arrays_of_arrays_allowed(& @1)) { + $$->add_dimension(new(ctx) ast_expression(ast_unsized_array_dim, NULL, + NULL, NULL)); + } + } + | array_specifier '[' constant_expression ']' + { + $$ = $1; + + if (state->check_arrays_of_arrays_allowed(& @1)) { + $$->add_dimension($3); + } + } + ; + +type_specifier: + type_specifier_nonarray + | type_specifier_nonarray array_specifier + { + $$ = $1; + $$->array_specifier = $2; + } + ; + +type_specifier_nonarray: + basic_type_specifier_nonarray + { + void *ctx = state; + $$ = new(ctx) ast_type_specifier($1); + $$->set_location(@1); + } + | struct_specifier + { + void *ctx = state; + $$ = new(ctx) ast_type_specifier($1); + $$->set_location(@1); + } + | TYPE_IDENTIFIER + { + void *ctx = state; + $$ = new(ctx) ast_type_specifier($1); + $$->set_location(@1); + } + ; + +basic_type_specifier_nonarray: + VOID_TOK { $$ = "void"; } + | FLOAT_TOK { $$ = "float"; } + | DOUBLE_TOK { $$ = "double"; } + | INT_TOK { $$ = "int"; } + | UINT_TOK { $$ = "uint"; } + | BOOL_TOK { $$ = "bool"; } + | VEC2 { $$ = "vec2"; } + | VEC3 { $$ = "vec3"; } + | VEC4 { $$ = "vec4"; } + | BVEC2 { $$ = "bvec2"; } + | BVEC3 { $$ = "bvec3"; } + | BVEC4 { $$ = "bvec4"; } + | IVEC2 { $$ = "ivec2"; } + | IVEC3 { $$ = "ivec3"; } + | IVEC4 { $$ = "ivec4"; } + | UVEC2 { $$ = "uvec2"; } + | UVEC3 { $$ = "uvec3"; } + | UVEC4 { $$ = "uvec4"; } + | DVEC2 { $$ = "dvec2"; } + | DVEC3 { $$ = "dvec3"; } + | DVEC4 { $$ = "dvec4"; } + | MAT2X2 { $$ = "mat2"; } + | MAT2X3 { $$ = "mat2x3"; } + | MAT2X4 { $$ = "mat2x4"; } + | MAT3X2 { $$ = "mat3x2"; } + | MAT3X3 { $$ = "mat3"; } + | MAT3X4 { $$ = "mat3x4"; } + | MAT4X2 { $$ = "mat4x2"; } + | MAT4X3 { $$ = "mat4x3"; } + | MAT4X4 { $$ = "mat4"; } + | DMAT2X2 { $$ = "dmat2"; } + | DMAT2X3 { $$ = "dmat2x3"; } + | DMAT2X4 { $$ = "dmat2x4"; } + | DMAT3X2 { $$ = "dmat3x2"; } + | DMAT3X3 { $$ = "dmat3"; } + | DMAT3X4 { $$ = "dmat3x4"; } + | DMAT4X2 { $$ = "dmat4x2"; } + | DMAT4X3 { $$ = "dmat4x3"; } + | DMAT4X4 { $$ = "dmat4"; } + | SAMPLER1D { $$ = "sampler1D"; } + | SAMPLER2D { $$ = "sampler2D"; } + | SAMPLER2DRECT { $$ = "sampler2DRect"; } + | SAMPLER3D { $$ = "sampler3D"; } + | SAMPLERCUBE { $$ = "samplerCube"; } + | SAMPLEREXTERNALOES { $$ = "samplerExternalOES"; } + | SAMPLER1DSHADOW { $$ = "sampler1DShadow"; } + | SAMPLER2DSHADOW { $$ = "sampler2DShadow"; } + | SAMPLER2DRECTSHADOW { $$ = "sampler2DRectShadow"; } + | SAMPLERCUBESHADOW { $$ = "samplerCubeShadow"; } + | SAMPLER1DARRAY { $$ = "sampler1DArray"; } + | SAMPLER2DARRAY { $$ = "sampler2DArray"; } + | SAMPLER1DARRAYSHADOW { $$ = "sampler1DArrayShadow"; } + | SAMPLER2DARRAYSHADOW { $$ = "sampler2DArrayShadow"; } + | SAMPLERBUFFER { $$ = "samplerBuffer"; } + | SAMPLERCUBEARRAY { $$ = "samplerCubeArray"; } + | SAMPLERCUBEARRAYSHADOW { $$ = "samplerCubeArrayShadow"; } + | ISAMPLER1D { $$ = "isampler1D"; } + | ISAMPLER2D { $$ = "isampler2D"; } + | ISAMPLER2DRECT { $$ = "isampler2DRect"; } + | ISAMPLER3D { $$ = "isampler3D"; } + | ISAMPLERCUBE { $$ = "isamplerCube"; } + | ISAMPLER1DARRAY { $$ = "isampler1DArray"; } + | ISAMPLER2DARRAY { $$ = "isampler2DArray"; } + | ISAMPLERBUFFER { $$ = "isamplerBuffer"; } + | ISAMPLERCUBEARRAY { $$ = "isamplerCubeArray"; } + | USAMPLER1D { $$ = "usampler1D"; } + | USAMPLER2D { $$ = "usampler2D"; } + | USAMPLER2DRECT { $$ = "usampler2DRect"; } + | USAMPLER3D { $$ = "usampler3D"; } + | USAMPLERCUBE { $$ = "usamplerCube"; } + | USAMPLER1DARRAY { $$ = "usampler1DArray"; } + | USAMPLER2DARRAY { $$ = "usampler2DArray"; } + | USAMPLERBUFFER { $$ = "usamplerBuffer"; } + | USAMPLERCUBEARRAY { $$ = "usamplerCubeArray"; } + | SAMPLER2DMS { $$ = "sampler2DMS"; } + | ISAMPLER2DMS { $$ = "isampler2DMS"; } + | USAMPLER2DMS { $$ = "usampler2DMS"; } + | SAMPLER2DMSARRAY { $$ = "sampler2DMSArray"; } + | ISAMPLER2DMSARRAY { $$ = "isampler2DMSArray"; } + | USAMPLER2DMSARRAY { $$ = "usampler2DMSArray"; } + | IMAGE1D { $$ = "image1D"; } + | IMAGE2D { $$ = "image2D"; } + | IMAGE3D { $$ = "image3D"; } + | IMAGE2DRECT { $$ = "image2DRect"; } + | IMAGECUBE { $$ = "imageCube"; } + | IMAGEBUFFER { $$ = "imageBuffer"; } + | IMAGE1DARRAY { $$ = "image1DArray"; } + | IMAGE2DARRAY { $$ = "image2DArray"; } + | IMAGECUBEARRAY { $$ = "imageCubeArray"; } + | IMAGE2DMS { $$ = "image2DMS"; } + | IMAGE2DMSARRAY { $$ = "image2DMSArray"; } + | IIMAGE1D { $$ = "iimage1D"; } + | IIMAGE2D { $$ = "iimage2D"; } + | IIMAGE3D { $$ = "iimage3D"; } + | IIMAGE2DRECT { $$ = "iimage2DRect"; } + | IIMAGECUBE { $$ = "iimageCube"; } + | IIMAGEBUFFER { $$ = "iimageBuffer"; } + | IIMAGE1DARRAY { $$ = "iimage1DArray"; } + | IIMAGE2DARRAY { $$ = "iimage2DArray"; } + | IIMAGECUBEARRAY { $$ = "iimageCubeArray"; } + | IIMAGE2DMS { $$ = "iimage2DMS"; } + | IIMAGE2DMSARRAY { $$ = "iimage2DMSArray"; } + | UIMAGE1D { $$ = "uimage1D"; } + | UIMAGE2D { $$ = "uimage2D"; } + | UIMAGE3D { $$ = "uimage3D"; } + | UIMAGE2DRECT { $$ = "uimage2DRect"; } + | UIMAGECUBE { $$ = "uimageCube"; } + | UIMAGEBUFFER { $$ = "uimageBuffer"; } + | UIMAGE1DARRAY { $$ = "uimage1DArray"; } + | UIMAGE2DARRAY { $$ = "uimage2DArray"; } + | UIMAGECUBEARRAY { $$ = "uimageCubeArray"; } + | UIMAGE2DMS { $$ = "uimage2DMS"; } + | UIMAGE2DMSARRAY { $$ = "uimage2DMSArray"; } + | ATOMIC_UINT { $$ = "atomic_uint"; } + ; + +precision_qualifier: + HIGHP + { + state->check_precision_qualifiers_allowed(&@1); + $$ = ast_precision_high; + } + | MEDIUMP + { + state->check_precision_qualifiers_allowed(&@1); + $$ = ast_precision_medium; + } + | LOWP + { + state->check_precision_qualifiers_allowed(&@1); + $$ = ast_precision_low; + } + ; + +struct_specifier: + STRUCT any_identifier '{' struct_declaration_list '}' + { + void *ctx = state; + $$ = new(ctx) ast_struct_specifier($2, $4); + $$->set_location_range(@2, @5); + state->symbols->add_type($2, glsl_type::void_type); + } + | STRUCT '{' struct_declaration_list '}' + { + void *ctx = state; + $$ = new(ctx) ast_struct_specifier(NULL, $3); + $$->set_location_range(@2, @4); + } + ; + +struct_declaration_list: + struct_declaration + { + $$ = $1; + $1->link.self_link(); + } + | struct_declaration_list struct_declaration + { + $$ = $1; + $$->link.insert_before(& $2->link); + } + ; + +struct_declaration: + fully_specified_type struct_declarator_list ';' + { + void *ctx = state; + ast_fully_specified_type *const type = $1; + type->set_location(@1); + + if (type->qualifier.flags.i != 0) + _mesa_glsl_error(&@1, state, + "only precision qualifiers may be applied to " + "structure members"); + + $$ = new(ctx) ast_declarator_list(type); + $$->set_location(@2); + + $$->declarations.push_degenerate_list_at_head(& $2->link); + } + ; + +struct_declarator_list: + struct_declarator + { + $$ = $1; + $1->link.self_link(); + } + | struct_declarator_list ',' struct_declarator + { + $$ = $1; + $$->link.insert_before(& $3->link); + } + ; + +struct_declarator: + any_identifier + { + void *ctx = state; + $$ = new(ctx) ast_declaration($1, NULL, NULL); + $$->set_location(@1); + } + | any_identifier array_specifier + { + void *ctx = state; + $$ = new(ctx) ast_declaration($1, $2, NULL); + $$->set_location_range(@1, @2); + } + ; + +initializer: + assignment_expression + | '{' initializer_list '}' + { + $$ = $2; + } + | '{' initializer_list ',' '}' + { + $$ = $2; + } + ; + +initializer_list: + initializer + { + void *ctx = state; + $$ = new(ctx) ast_aggregate_initializer(); + $$->set_location(@1); + $$->expressions.push_tail(& $1->link); + } + | initializer_list ',' initializer + { + $1->expressions.push_tail(& $3->link); + } + ; + +declaration_statement: + declaration + ; + + // Grammar Note: labeled statements for SWITCH only; 'goto' is not + // supported. +statement: + compound_statement { $$ = (ast_node *) $1; } + | simple_statement + ; + +simple_statement: + declaration_statement + | expression_statement + | selection_statement + | switch_statement + | iteration_statement + | jump_statement + ; + +compound_statement: + '{' '}' + { + void *ctx = state; + $$ = new(ctx) ast_compound_statement(true, NULL); + $$->set_location_range(@1, @2); + } + | '{' + { + state->symbols->push_scope(); + } + statement_list '}' + { + void *ctx = state; + $$ = new(ctx) ast_compound_statement(true, $3); + $$->set_location_range(@1, @4); + state->symbols->pop_scope(); + } + ; + +statement_no_new_scope: + compound_statement_no_new_scope { $$ = (ast_node *) $1; } + | simple_statement + ; + +compound_statement_no_new_scope: + '{' '}' + { + void *ctx = state; + $$ = new(ctx) ast_compound_statement(false, NULL); + $$->set_location_range(@1, @2); + } + | '{' statement_list '}' + { + void *ctx = state; + $$ = new(ctx) ast_compound_statement(false, $2); + $$->set_location_range(@1, @3); + } + ; + +statement_list: + statement + { + if ($1 == NULL) { + _mesa_glsl_error(& @1, state, "<nil> statement"); + assert($1 != NULL); + } + + $$ = $1; + $$->link.self_link(); + } + | statement_list statement + { + if ($2 == NULL) { + _mesa_glsl_error(& @2, state, "<nil> statement"); + assert($2 != NULL); + } + $$ = $1; + $$->link.insert_before(& $2->link); + } + ; + +expression_statement: + ';' + { + void *ctx = state; + $$ = new(ctx) ast_expression_statement(NULL); + $$->set_location(@1); + } + | expression ';' + { + void *ctx = state; + $$ = new(ctx) ast_expression_statement($1); + $$->set_location(@1); + } + ; + +selection_statement: + IF '(' expression ')' selection_rest_statement + { + $$ = new(state) ast_selection_statement($3, $5.then_statement, + $5.else_statement); + $$->set_location_range(@1, @5); + } + ; + +selection_rest_statement: + statement ELSE statement + { + $$.then_statement = $1; + $$.else_statement = $3; + } + | statement %prec THEN + { + $$.then_statement = $1; + $$.else_statement = NULL; + } + ; + +condition: + expression + { + $$ = (ast_node *) $1; + } + | fully_specified_type any_identifier '=' initializer + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, NULL, $4); + ast_declarator_list *declarator = new(ctx) ast_declarator_list($1); + decl->set_location_range(@2, @4); + declarator->set_location(@1); + + declarator->declarations.push_tail(&decl->link); + $$ = declarator; + } + ; + +/* + * switch_statement grammar is based on the syntax described in the body + * of the GLSL spec, not in it's appendix!!! + */ +switch_statement: + SWITCH '(' expression ')' switch_body + { + $$ = new(state) ast_switch_statement($3, $5); + $$->set_location_range(@1, @5); + } + ; + +switch_body: + '{' '}' + { + $$ = new(state) ast_switch_body(NULL); + $$->set_location_range(@1, @2); + } + | '{' case_statement_list '}' + { + $$ = new(state) ast_switch_body($2); + $$->set_location_range(@1, @3); + } + ; + +case_label: + CASE expression ':' + { + $$ = new(state) ast_case_label($2); + $$->set_location(@2); + } + | DEFAULT ':' + { + $$ = new(state) ast_case_label(NULL); + $$->set_location(@2); + } + ; + +case_label_list: + case_label + { + ast_case_label_list *labels = new(state) ast_case_label_list(); + + labels->labels.push_tail(& $1->link); + $$ = labels; + $$->set_location(@1); + } + | case_label_list case_label + { + $$ = $1; + $$->labels.push_tail(& $2->link); + } + ; + +case_statement: + case_label_list statement + { + ast_case_statement *stmts = new(state) ast_case_statement($1); + stmts->set_location(@2); + + stmts->stmts.push_tail(& $2->link); + $$ = stmts; + } + | case_statement statement + { + $$ = $1; + $$->stmts.push_tail(& $2->link); + } + ; + +case_statement_list: + case_statement + { + ast_case_statement_list *cases= new(state) ast_case_statement_list(); + cases->set_location(@1); + + cases->cases.push_tail(& $1->link); + $$ = cases; + } + | case_statement_list case_statement + { + $$ = $1; + $$->cases.push_tail(& $2->link); + } + ; + +iteration_statement: + WHILE '(' condition ')' statement_no_new_scope + { + void *ctx = state; + $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_while, + NULL, $3, NULL, $5); + $$->set_location_range(@1, @4); + } + | DO statement WHILE '(' expression ')' ';' + { + void *ctx = state; + $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_do_while, + NULL, $5, NULL, $2); + $$->set_location_range(@1, @6); + } + | FOR '(' for_init_statement for_rest_statement ')' statement_no_new_scope + { + void *ctx = state; + $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_for, + $3, $4.cond, $4.rest, $6); + $$->set_location_range(@1, @6); + } + ; + +for_init_statement: + expression_statement + | declaration_statement + ; + +conditionopt: + condition + | /* empty */ + { + $$ = NULL; + } + ; + +for_rest_statement: + conditionopt ';' + { + $$.cond = $1; + $$.rest = NULL; + } + | conditionopt ';' expression + { + $$.cond = $1; + $$.rest = $3; + } + ; + + // Grammar Note: No 'goto'. Gotos are not supported. +jump_statement: + CONTINUE ';' + { + void *ctx = state; + $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_continue, NULL); + $$->set_location(@1); + } + | BREAK ';' + { + void *ctx = state; + $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_break, NULL); + $$->set_location(@1); + } + | RETURN ';' + { + void *ctx = state; + $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_return, NULL); + $$->set_location(@1); + } + | RETURN expression ';' + { + void *ctx = state; + $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_return, $2); + $$->set_location_range(@1, @2); + } + | DISCARD ';' // Fragment shader only. + { + void *ctx = state; + $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_discard, NULL); + $$->set_location(@1); + } + ; + +external_declaration: + function_definition { $$ = $1; } + | declaration { $$ = $1; } + | pragma_statement { $$ = NULL; } + | layout_defaults { $$ = $1; } + ; + +function_definition: + function_prototype compound_statement_no_new_scope + { + void *ctx = state; + $$ = new(ctx) ast_function_definition(); + $$->set_location_range(@1, @2); + $$->prototype = $1; + $$->body = $2; + + state->symbols->pop_scope(); + } + ; + +/* layout_qualifieropt is packed into this rule */ +interface_block: + basic_interface_block + { + $$ = $1; + } + | layout_qualifier interface_block + { + ast_interface_block *block = (ast_interface_block *) $2; + + if (!state->has_420pack_or_es31() && block->layout.has_layout() && + !block->layout.is_default_qualifier) { + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + YYERROR; + } + + if (!block->layout.merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + + block->layout.is_default_qualifier = false; + + $$ = block; + } + | memory_qualifier interface_block + { + ast_interface_block *block = (ast_interface_block *)$2; + + if (!block->layout.flags.q.buffer) { + _mesa_glsl_error(& @1, state, + "memory qualifiers can only be used in the " + "declaration of shader storage blocks"); + } + if (!block->layout.merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + $$ = block; + } + ; + +basic_interface_block: + interface_qualifier NEW_IDENTIFIER '{' member_list '}' instance_name_opt ';' + { + ast_interface_block *const block = $6; + + block->block_name = $2; + block->declarations.push_degenerate_list_at_head(& $4->link); + + _mesa_ast_process_interface_block(& @1, state, block, $1); + + $$ = block; + } + | buffer_interface_qualifier NEW_IDENTIFIER '{' member_list '}' buffer_instance_name_opt ';' + { + ast_interface_block *const block = $6; + + block->block_name = $2; + block->declarations.push_degenerate_list_at_head(& $4->link); + + _mesa_ast_process_interface_block(& @1, state, block, $1); + + $$ = block; + } + ; + +interface_qualifier: + IN_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.in = 1; + } + | OUT_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.out = 1; + } + | UNIFORM + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.uniform = 1; + } + ; + +buffer_interface_qualifier: + BUFFER + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.buffer = 1; + } + ; + +instance_name_opt: + /* empty */ + { + $$ = new(state) ast_interface_block(*state->default_uniform_qualifier, + NULL, NULL); + } + | NEW_IDENTIFIER + { + $$ = new(state) ast_interface_block(*state->default_uniform_qualifier, + $1, NULL); + $$->set_location(@1); + } + | NEW_IDENTIFIER array_specifier + { + $$ = new(state) ast_interface_block(*state->default_uniform_qualifier, + $1, $2); + $$->set_location_range(@1, @2); + } + ; + +buffer_instance_name_opt: + /* empty */ + { + $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier, + NULL, NULL); + } + | NEW_IDENTIFIER + { + $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier, + $1, NULL); + $$->set_location(@1); + } + | NEW_IDENTIFIER array_specifier + { + $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier, + $1, $2); + $$->set_location_range(@1, @2); + } + ; + +member_list: + member_declaration + { + $$ = $1; + $1->link.self_link(); + } + | member_declaration member_list + { + $$ = $1; + $2->link.insert_before(& $$->link); + } + ; + +member_declaration: + fully_specified_type struct_declarator_list ';' + { + void *ctx = state; + ast_fully_specified_type *type = $1; + type->set_location(@1); + + if (type->qualifier.flags.q.attribute) { + _mesa_glsl_error(& @1, state, + "keyword 'attribute' cannot be used with " + "interface block member"); + } else if (type->qualifier.flags.q.varying) { + _mesa_glsl_error(& @1, state, + "keyword 'varying' cannot be used with " + "interface block member"); + } + + $$ = new(ctx) ast_declarator_list(type); + $$->set_location(@2); + + $$->declarations.push_degenerate_list_at_head(& $2->link); + } + ; + +layout_uniform_defaults: + layout_qualifier layout_uniform_defaults + { + $$ = NULL; + if (!state->has_420pack_or_es31()) { + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + YYERROR; + } else { + if (!state->default_uniform_qualifier-> + merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + } + } + | layout_qualifier UNIFORM ';' + { + if (!state->default_uniform_qualifier-> + merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + $$ = NULL; + } + ; + +layout_buffer_defaults: + layout_qualifier layout_buffer_defaults + { + $$ = NULL; + if (!state->has_420pack_or_es31()) { + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + YYERROR; + } else { + if (!state->default_shader_storage_qualifier-> + merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + } + } + | layout_qualifier BUFFER ';' + { + if (!state->default_shader_storage_qualifier-> + merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + + /* From the GLSL 4.50 spec, section 4.4.5: + * + * "It is a compile-time error to specify the binding identifier for + * the global scope or for block member declarations." + */ + if (state->default_shader_storage_qualifier->flags.q.explicit_binding) { + _mesa_glsl_error(& @1, state, + "binding qualifier cannot be set for default layout"); + } + + $$ = NULL; + } + ; + +layout_in_defaults: + layout_qualifier layout_in_defaults + { + $$ = NULL; + if (!state->has_420pack_or_es31()) { + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + YYERROR; + } else { + if (!state->in_qualifier-> + merge_in_qualifier(& @1, state, $1, $$, false)) { + YYERROR; + } + } + } + | layout_qualifier IN_TOK ';' + { + $$ = NULL; + if (!state->in_qualifier-> + merge_in_qualifier(& @1, state, $1, $$, true)) { + YYERROR; + } + } + ; + +layout_out_defaults: + layout_qualifier layout_out_defaults + { + $$ = NULL; + if (!state->has_420pack_or_es31()) { + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + YYERROR; + } else { + if (!state->out_qualifier-> + merge_out_qualifier(& @1, state, $1, $$, false)) { + YYERROR; + } + } + } + | layout_qualifier OUT_TOK ';' + { + $$ = NULL; + if (!state->out_qualifier-> + merge_out_qualifier(& @1, state, $1, $$, true)) + YYERROR; + } + ; + +layout_defaults: + layout_uniform_defaults + | layout_buffer_defaults + | layout_in_defaults + | layout_out_defaults + ; diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp new file mode 100644 index 0000000..6038954 --- /dev/null +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -0,0 +1,1952 @@ +/* + * Copyright © 2008, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <assert.h> + +#include "main/core.h" /* for struct gl_context */ +#include "main/context.h" +#include "main/shaderobj.h" +#include "util/u_atomic.h" /* for p_atomic_cmpxchg */ +#include "util/ralloc.h" +#include "ast.h" +#include "glsl_parser_extras.h" +#include "glsl_parser.h" +#include "ir_optimization.h" +#include "loop_analysis.h" + +/** + * Format a short human-readable description of the given GLSL version. + */ +const char * +glsl_compute_version_string(void *mem_ctx, bool is_es, unsigned version) +{ + return ralloc_asprintf(mem_ctx, "GLSL%s %d.%02d", is_es ? " ES" : "", + version / 100, version % 100); +} + + +static const unsigned known_desktop_glsl_versions[] = + { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440, 450 }; + + +_mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, + gl_shader_stage stage, + void *mem_ctx) + : ctx(_ctx), cs_input_local_size_specified(false), cs_input_local_size(), + switch_state() +{ + assert(stage < MESA_SHADER_STAGES); + this->stage = stage; + + this->scanner = NULL; + this->translation_unit.make_empty(); + this->symbols = new(mem_ctx) glsl_symbol_table; + + this->info_log = ralloc_strdup(mem_ctx, ""); + this->error = false; + this->loop_nesting_ast = NULL; + + this->struct_specifier_depth = 0; + + this->uses_builtin_functions = false; + + /* Set default language version and extensions */ + this->language_version = 110; + this->forced_language_version = ctx->Const.ForceGLSLVersion; + this->es_shader = false; + this->ARB_texture_rectangle_enable = true; + + /* OpenGL ES 2.0 has different defaults from desktop GL. */ + if (ctx->API == API_OPENGLES2) { + this->language_version = 100; + this->es_shader = true; + this->ARB_texture_rectangle_enable = false; + } + + this->extensions = &ctx->Extensions; + + this->Const.MaxLights = ctx->Const.MaxLights; + this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes; + this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits; + this->Const.MaxTextureCoords = ctx->Const.MaxTextureCoordUnits; + this->Const.MaxVertexAttribs = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs; + this->Const.MaxVertexUniformComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents; + this->Const.MaxVertexTextureImageUnits = ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits; + this->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxCombinedTextureImageUnits; + this->Const.MaxTextureImageUnits = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + this->Const.MaxFragmentUniformComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents; + this->Const.MinProgramTexelOffset = ctx->Const.MinProgramTexelOffset; + this->Const.MaxProgramTexelOffset = ctx->Const.MaxProgramTexelOffset; + + this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers; + + this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers; + + /* 1.50 constants */ + this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents; + this->Const.MaxGeometryOutputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents; + this->Const.MaxFragmentInputComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents; + this->Const.MaxGeometryTextureImageUnits = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits; + this->Const.MaxGeometryOutputVertices = ctx->Const.MaxGeometryOutputVertices; + this->Const.MaxGeometryTotalOutputComponents = ctx->Const.MaxGeometryTotalOutputComponents; + this->Const.MaxGeometryUniformComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents; + + this->Const.MaxVertexAtomicCounters = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters; + this->Const.MaxTessControlAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicCounters; + this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters; + this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters; + this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters; + this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters; + this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings; + this->Const.MaxVertexAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers; + this->Const.MaxTessControlAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers; + this->Const.MaxTessEvaluationAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers; + this->Const.MaxGeometryAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers; + this->Const.MaxFragmentAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + this->Const.MaxCombinedAtomicCounterBuffers = + ctx->Const.MaxCombinedAtomicBuffers; + this->Const.MaxAtomicCounterBufferSize = + ctx->Const.MaxAtomicBufferSize; + + /* Compute shader constants */ + for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupCount); i++) + this->Const.MaxComputeWorkGroupCount[i] = ctx->Const.MaxComputeWorkGroupCount[i]; + for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupSize); i++) + this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i]; + + this->Const.MaxImageUnits = ctx->Const.MaxImageUnits; + this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources; + this->Const.MaxImageSamples = ctx->Const.MaxImageSamples; + this->Const.MaxVertexImageUniforms = ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms; + this->Const.MaxTessControlImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms; + this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms; + this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms; + this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms; + this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms; + + /* ARB_viewport_array */ + this->Const.MaxViewports = ctx->Const.MaxViewports; + + /* tessellation shader constants */ + this->Const.MaxPatchVertices = ctx->Const.MaxPatchVertices; + this->Const.MaxTessGenLevel = ctx->Const.MaxTessGenLevel; + this->Const.MaxTessControlInputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents; + this->Const.MaxTessControlOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents; + this->Const.MaxTessControlTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits; + this->Const.MaxTessEvaluationInputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents; + this->Const.MaxTessEvaluationOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents; + this->Const.MaxTessEvaluationTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits; + this->Const.MaxTessPatchComponents = ctx->Const.MaxTessPatchComponents; + this->Const.MaxTessControlTotalOutputComponents = ctx->Const.MaxTessControlTotalOutputComponents; + this->Const.MaxTessControlUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents; + this->Const.MaxTessEvaluationUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents; + + this->current_function = NULL; + this->toplevel_ir = NULL; + this->found_return = false; + this->all_invariant = false; + this->user_structures = NULL; + this->num_user_structures = 0; + this->num_subroutines = 0; + this->subroutines = NULL; + this->num_subroutine_types = 0; + this->subroutine_types = NULL; + + /* supported_versions should be large enough to support the known desktop + * GLSL versions plus 3 GLES versions (ES 1.00, ES 3.00, and ES 3.10)) + */ + STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 3) == + ARRAY_SIZE(this->supported_versions)); + + /* Populate the list of supported GLSL versions */ + /* FINISHME: Once the OpenGL 3.0 'forward compatible' context or + * the OpenGL 3.2 Core context is supported, this logic will need + * change. Older versions of GLSL are no longer supported + * outside the compatibility contexts of 3.x. + */ + this->num_supported_versions = 0; + if (_mesa_is_desktop_gl(ctx)) { + for (unsigned i = 0; i < ARRAY_SIZE(known_desktop_glsl_versions); i++) { + if (known_desktop_glsl_versions[i] <= ctx->Const.GLSLVersion) { + this->supported_versions[this->num_supported_versions].ver + = known_desktop_glsl_versions[i]; + this->supported_versions[this->num_supported_versions].es = false; + this->num_supported_versions++; + } + } + } + if (ctx->API == API_OPENGLES2 || ctx->Extensions.ARB_ES2_compatibility) { + this->supported_versions[this->num_supported_versions].ver = 100; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } + if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) { + this->supported_versions[this->num_supported_versions].ver = 300; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } + if (_mesa_is_gles31(ctx)) { + this->supported_versions[this->num_supported_versions].ver = 310; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } + + /* Create a string for use in error messages to tell the user which GLSL + * versions are supported. + */ + char *supported = ralloc_strdup(this, ""); + for (unsigned i = 0; i < this->num_supported_versions; i++) { + unsigned ver = this->supported_versions[i].ver; + const char *const prefix = (i == 0) + ? "" + : ((i == this->num_supported_versions - 1) ? ", and " : ", "); + const char *const suffix = (this->supported_versions[i].es) ? " ES" : ""; + + ralloc_asprintf_append(& supported, "%s%u.%02u%s", + prefix, + ver / 100, ver % 100, + suffix); + } + + this->supported_version_string = supported; + + if (ctx->Const.ForceGLSLExtensionsWarn) + _mesa_glsl_process_extension("all", NULL, "warn", NULL, this); + + this->default_uniform_qualifier = new(this) ast_type_qualifier(); + this->default_uniform_qualifier->flags.q.shared = 1; + this->default_uniform_qualifier->flags.q.column_major = 1; + this->default_uniform_qualifier->is_default_qualifier = true; + + this->default_shader_storage_qualifier = new(this) ast_type_qualifier(); + this->default_shader_storage_qualifier->flags.q.shared = 1; + this->default_shader_storage_qualifier->flags.q.column_major = 1; + this->default_shader_storage_qualifier->is_default_qualifier = true; + + this->fs_uses_gl_fragcoord = false; + this->fs_redeclares_gl_fragcoord = false; + this->fs_origin_upper_left = false; + this->fs_pixel_center_integer = false; + this->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = false; + + this->gs_input_prim_type_specified = false; + this->tcs_output_vertices_specified = false; + this->gs_input_size = 0; + this->in_qualifier = new(this) ast_type_qualifier(); + this->out_qualifier = new(this) ast_type_qualifier(); + this->fs_early_fragment_tests = false; + memset(this->atomic_counter_offsets, 0, + sizeof(this->atomic_counter_offsets)); + this->allow_extension_directive_midshader = + ctx->Const.AllowGLSLExtensionDirectiveMidShader; +} + +/** + * Determine whether the current GLSL version is sufficiently high to support + * a certain feature, and generate an error message if it isn't. + * + * \param required_glsl_version and \c required_glsl_es_version are + * interpreted as they are in _mesa_glsl_parse_state::is_version(). + * + * \param locp is the parser location where the error should be reported. + * + * \param fmt (and additional arguments) constitute a printf-style error + * message to report if the version check fails. Information about the + * current and required GLSL versions will be appended. So, for example, if + * the GLSL version being compiled is 1.20, and check_version(130, 300, locp, + * "foo unsupported") is called, the error message will be "foo unsupported in + * GLSL 1.20 (GLSL 1.30 or GLSL 3.00 ES required)". + */ +bool +_mesa_glsl_parse_state::check_version(unsigned required_glsl_version, + unsigned required_glsl_es_version, + YYLTYPE *locp, const char *fmt, ...) +{ + if (this->is_version(required_glsl_version, required_glsl_es_version)) + return true; + + va_list args; + va_start(args, fmt); + char *problem = ralloc_vasprintf(this, fmt, args); + va_end(args); + const char *glsl_version_string + = glsl_compute_version_string(this, false, required_glsl_version); + const char *glsl_es_version_string + = glsl_compute_version_string(this, true, required_glsl_es_version); + const char *requirement_string = ""; + if (required_glsl_version && required_glsl_es_version) { + requirement_string = ralloc_asprintf(this, " (%s or %s required)", + glsl_version_string, + glsl_es_version_string); + } else if (required_glsl_version) { + requirement_string = ralloc_asprintf(this, " (%s required)", + glsl_version_string); + } else if (required_glsl_es_version) { + requirement_string = ralloc_asprintf(this, " (%s required)", + glsl_es_version_string); + } + _mesa_glsl_error(locp, this, "%s in %s%s", + problem, this->get_version_string(), + requirement_string); + + return false; +} + +/** + * Process a GLSL #version directive. + * + * \param version is the integer that follows the #version token. + * + * \param ident is a string identifier that follows the integer, if any is + * present. Otherwise NULL. + */ +void +_mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version, + const char *ident) +{ + bool es_token_present = false; + if (ident) { + if (strcmp(ident, "es") == 0) { + es_token_present = true; + } else if (version >= 150) { + if (strcmp(ident, "core") == 0) { + /* Accept the token. There's no need to record that this is + * a core profile shader since that's the only profile we support. + */ + } else if (strcmp(ident, "compatibility") == 0) { + _mesa_glsl_error(locp, this, + "the compatibility profile is not supported"); + } else { + _mesa_glsl_error(locp, this, + "\"%s\" is not a valid shading language profile; " + "if present, it must be \"core\"", ident); + } + } else { + _mesa_glsl_error(locp, this, + "illegal text following version number"); + } + } + + this->es_shader = es_token_present; + if (version == 100) { + if (es_token_present) { + _mesa_glsl_error(locp, this, + "GLSL 1.00 ES should be selected using " + "`#version 100'"); + } else { + this->es_shader = true; + } + } + + if (this->es_shader) { + this->ARB_texture_rectangle_enable = false; + } + + if (this->forced_language_version) + this->language_version = this->forced_language_version; + else + this->language_version = version; + + bool supported = false; + for (unsigned i = 0; i < this->num_supported_versions; i++) { + if (this->supported_versions[i].ver == this->language_version + && this->supported_versions[i].es == this->es_shader) { + supported = true; + break; + } + } + + if (!supported) { + _mesa_glsl_error(locp, this, "%s is not supported. " + "Supported versions are: %s", + this->get_version_string(), + this->supported_version_string); + + /* On exit, the language_version must be set to a valid value. + * Later calls to _mesa_glsl_initialize_types will misbehave if + * the version is invalid. + */ + switch (this->ctx->API) { + case API_OPENGL_COMPAT: + case API_OPENGL_CORE: + this->language_version = this->ctx->Const.GLSLVersion; + break; + + case API_OPENGLES: + assert(!"Should not get here."); + /* FALLTHROUGH */ + + case API_OPENGLES2: + this->language_version = 100; + break; + } + } +} + + +/* This helper function will append the given message to the shader's + info log and report it via GL_ARB_debug_output. Per that extension, + 'type' is one of the enum values classifying the message, and + 'id' is the implementation-defined ID of the given message. */ +static void +_mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state, + GLenum type, const char *fmt, va_list ap) +{ + bool error = (type == MESA_DEBUG_TYPE_ERROR); + GLuint msg_id = 0; + + assert(state->info_log != NULL); + + /* Get the offset that the new message will be written to. */ + int msg_offset = strlen(state->info_log); + + ralloc_asprintf_append(&state->info_log, "%u:%u(%u): %s: ", + locp->source, + locp->first_line, + locp->first_column, + error ? "error" : "warning"); + ralloc_vasprintf_append(&state->info_log, fmt, ap); + + const char *const msg = &state->info_log[msg_offset]; + struct gl_context *ctx = state->ctx; + + /* Report the error via GL_ARB_debug_output. */ + _mesa_shader_debug(ctx, type, &msg_id, msg); + + ralloc_strcat(&state->info_log, "\n"); +} + +void +_mesa_glsl_error(YYLTYPE *locp, _mesa_glsl_parse_state *state, + const char *fmt, ...) +{ + va_list ap; + + state->error = true; + + va_start(ap, fmt); + _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_ERROR, fmt, ap); + va_end(ap); +} + + +void +_mesa_glsl_warning(const YYLTYPE *locp, _mesa_glsl_parse_state *state, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_OTHER, fmt, ap); + va_end(ap); +} + + +/** + * Enum representing the possible behaviors that can be specified in + * an #extension directive. + */ +enum ext_behavior { + extension_disable, + extension_enable, + extension_require, + extension_warn +}; + +/** + * Element type for _mesa_glsl_supported_extensions + */ +struct _mesa_glsl_extension { + /** + * Name of the extension when referred to in a GLSL extension + * statement + */ + const char *name; + + /** True if this extension is available to desktop GL shaders */ + bool avail_in_GL; + + /** True if this extension is available to GLES shaders */ + bool avail_in_ES; + + /** + * Flag in the gl_extensions struct indicating whether this + * extension is supported by the driver, or + * &gl_extensions::dummy_true if supported by all drivers. + * + * Note: the type (GLboolean gl_extensions::*) is a "pointer to + * member" type, the type-safe alternative to the "offsetof" macro. + * In a nutshell: + * + * - foo bar::* p declares p to be an "offset" to a field of type + * foo that exists within struct bar + * - &bar::baz computes the "offset" of field baz within struct bar + * - x.*p accesses the field of x that exists at "offset" p + * - x->*p is equivalent to (*x).*p + */ + const GLboolean gl_extensions::* supported_flag; + + /** + * Flag in the _mesa_glsl_parse_state struct that should be set + * when this extension is enabled. + * + * See note in _mesa_glsl_extension::supported_flag about "pointer + * to member" types. + */ + bool _mesa_glsl_parse_state::* enable_flag; + + /** + * Flag in the _mesa_glsl_parse_state struct that should be set + * when the shader requests "warn" behavior for this extension. + * + * See note in _mesa_glsl_extension::supported_flag about "pointer + * to member" types. + */ + bool _mesa_glsl_parse_state::* warn_flag; + + + bool compatible_with_state(const _mesa_glsl_parse_state *state) const; + void set_flags(_mesa_glsl_parse_state *state, ext_behavior behavior) const; +}; + +#define EXT(NAME, GL, ES, SUPPORTED_FLAG) \ + { "GL_" #NAME, GL, ES, &gl_extensions::SUPPORTED_FLAG, \ + &_mesa_glsl_parse_state::NAME##_enable, \ + &_mesa_glsl_parse_state::NAME##_warn } + +/** + * Table of extensions that can be enabled/disabled within a shader, + * and the conditions under which they are supported. + */ +static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { + /* API availability */ + /* name GL ES supported flag */ + + /* ARB extensions go here, sorted alphabetically. + */ + EXT(ARB_arrays_of_arrays, true, false, ARB_arrays_of_arrays), + EXT(ARB_compute_shader, true, false, ARB_compute_shader), + EXT(ARB_conservative_depth, true, false, ARB_conservative_depth), + EXT(ARB_derivative_control, true, false, ARB_derivative_control), + EXT(ARB_draw_buffers, true, false, dummy_true), + EXT(ARB_draw_instanced, true, false, ARB_draw_instanced), + EXT(ARB_enhanced_layouts, true, false, ARB_enhanced_layouts), + EXT(ARB_explicit_attrib_location, true, false, ARB_explicit_attrib_location), + EXT(ARB_explicit_uniform_location, true, false, ARB_explicit_uniform_location), + EXT(ARB_fragment_coord_conventions, true, false, ARB_fragment_coord_conventions), + EXT(ARB_fragment_layer_viewport, true, false, ARB_fragment_layer_viewport), + EXT(ARB_gpu_shader5, true, false, ARB_gpu_shader5), + EXT(ARB_gpu_shader_fp64, true, false, ARB_gpu_shader_fp64), + EXT(ARB_sample_shading, true, false, ARB_sample_shading), + EXT(ARB_separate_shader_objects, true, false, dummy_true), + EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters), + EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding), + EXT(ARB_shader_clock, true, false, ARB_shader_clock), + EXT(ARB_shader_draw_parameters, true, false, ARB_shader_draw_parameters), + EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store), + EXT(ARB_shader_image_size, true, false, ARB_shader_image_size), + EXT(ARB_shader_precision, true, false, ARB_shader_precision), + EXT(ARB_shader_stencil_export, true, false, ARB_shader_stencil_export), + EXT(ARB_shader_storage_buffer_object, true, true, ARB_shader_storage_buffer_object), + EXT(ARB_shader_subroutine, true, false, ARB_shader_subroutine), + EXT(ARB_shader_texture_image_samples, true, false, ARB_shader_texture_image_samples), + EXT(ARB_shader_texture_lod, true, false, ARB_shader_texture_lod), + EXT(ARB_shading_language_420pack, true, false, ARB_shading_language_420pack), + EXT(ARB_shading_language_packing, true, false, ARB_shading_language_packing), + EXT(ARB_tessellation_shader, true, false, ARB_tessellation_shader), + EXT(ARB_texture_cube_map_array, true, false, ARB_texture_cube_map_array), + EXT(ARB_texture_gather, true, false, ARB_texture_gather), + EXT(ARB_texture_multisample, true, false, ARB_texture_multisample), + EXT(ARB_texture_query_levels, true, false, ARB_texture_query_levels), + EXT(ARB_texture_query_lod, true, false, ARB_texture_query_lod), + EXT(ARB_texture_rectangle, true, false, dummy_true), + EXT(ARB_uniform_buffer_object, true, false, ARB_uniform_buffer_object), + EXT(ARB_vertex_attrib_64bit, true, false, ARB_vertex_attrib_64bit), + EXT(ARB_viewport_array, true, false, ARB_viewport_array), + + /* KHR extensions go here, sorted alphabetically. + */ + + /* OES extensions go here, sorted alphabetically. + */ + EXT(OES_EGL_image_external, false, true, OES_EGL_image_external), + EXT(OES_geometry_shader, false, true, OES_geometry_shader), + EXT(OES_standard_derivatives, false, true, OES_standard_derivatives), + EXT(OES_texture_3D, false, true, dummy_true), + EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample), + + /* All other extensions go here, sorted alphabetically. + */ + EXT(AMD_conservative_depth, true, false, ARB_conservative_depth), + EXT(AMD_shader_stencil_export, true, false, ARB_shader_stencil_export), + EXT(AMD_shader_trinary_minmax, true, false, dummy_true), + EXT(AMD_vertex_shader_layer, true, false, AMD_vertex_shader_layer), + EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index), + EXT(EXT_blend_func_extended, false, true, ARB_blend_func_extended), + EXT(EXT_draw_buffers, false, true, dummy_true), + EXT(EXT_separate_shader_objects, false, true, dummy_true), + EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix), + EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical), + EXT(EXT_texture_array, true, false, EXT_texture_array), +}; + +#undef EXT + + +/** + * Determine whether a given extension is compatible with the target, + * API, and extension information in the current parser state. + */ +bool _mesa_glsl_extension::compatible_with_state(const _mesa_glsl_parse_state * + state) const +{ + /* Check that this extension matches whether we are compiling + * for desktop GL or GLES. + */ + if (state->es_shader) { + if (!this->avail_in_ES) return false; + } else { + if (!this->avail_in_GL) return false; + } + + /* Check that this extension is supported by the OpenGL + * implementation. + * + * Note: the ->* operator indexes into state->extensions by the + * offset this->supported_flag. See + * _mesa_glsl_extension::supported_flag for more info. + */ + return state->extensions->*(this->supported_flag); +} + +/** + * Set the appropriate flags in the parser state to establish the + * given behavior for this extension. + */ +void _mesa_glsl_extension::set_flags(_mesa_glsl_parse_state *state, + ext_behavior behavior) const +{ + /* Note: the ->* operator indexes into state by the + * offsets this->enable_flag and this->warn_flag. See + * _mesa_glsl_extension::supported_flag for more info. + */ + state->*(this->enable_flag) = (behavior != extension_disable); + state->*(this->warn_flag) = (behavior == extension_warn); +} + +/** + * Find an extension by name in _mesa_glsl_supported_extensions. If + * the name is not found, return NULL. + */ +static const _mesa_glsl_extension *find_extension(const char *name) +{ + for (unsigned i = 0; i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) { + if (strcmp(name, _mesa_glsl_supported_extensions[i].name) == 0) { + return &_mesa_glsl_supported_extensions[i]; + } + } + return NULL; +} + + +bool +_mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp, + const char *behavior_string, YYLTYPE *behavior_locp, + _mesa_glsl_parse_state *state) +{ + ext_behavior behavior; + if (strcmp(behavior_string, "warn") == 0) { + behavior = extension_warn; + } else if (strcmp(behavior_string, "require") == 0) { + behavior = extension_require; + } else if (strcmp(behavior_string, "enable") == 0) { + behavior = extension_enable; + } else if (strcmp(behavior_string, "disable") == 0) { + behavior = extension_disable; + } else { + _mesa_glsl_error(behavior_locp, state, + "unknown extension behavior `%s'", + behavior_string); + return false; + } + + if (strcmp(name, "all") == 0) { + if ((behavior == extension_enable) || (behavior == extension_require)) { + _mesa_glsl_error(name_locp, state, "cannot %s all extensions", + (behavior == extension_enable) + ? "enable" : "require"); + return false; + } else { + for (unsigned i = 0; + i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) { + const _mesa_glsl_extension *extension + = &_mesa_glsl_supported_extensions[i]; + if (extension->compatible_with_state(state)) { + _mesa_glsl_supported_extensions[i].set_flags(state, behavior); + } + } + } + } else { + const _mesa_glsl_extension *extension = find_extension(name); + if (extension && extension->compatible_with_state(state)) { + extension->set_flags(state, behavior); + } else { + static const char fmt[] = "extension `%s' unsupported in %s shader"; + + if (behavior == extension_require) { + _mesa_glsl_error(name_locp, state, fmt, + name, _mesa_shader_stage_to_string(state->stage)); + return false; + } else { + _mesa_glsl_warning(name_locp, state, fmt, + name, _mesa_shader_stage_to_string(state->stage)); + } + } + } + + return true; +} + + +/** + * Recurses through <type> and <expr> if <expr> is an aggregate initializer + * and sets <expr>'s <constructor_type> field to <type>. Gives later functions + * (process_array_constructor, et al) sufficient information to do type + * checking. + * + * Operates on assignments involving an aggregate initializer. E.g., + * + * vec4 pos = {1.0, -1.0, 0.0, 1.0}; + * + * or more ridiculously, + * + * struct S { + * vec4 v[2]; + * }; + * + * struct { + * S a[2], b; + * int c; + * } aggregate = { + * { + * { + * { + * {1.0, 2.0, 3.0, 4.0}, // a[0].v[0] + * {5.0, 6.0, 7.0, 8.0} // a[0].v[1] + * } // a[0].v + * }, // a[0] + * { + * { + * {1.0, 2.0, 3.0, 4.0}, // a[1].v[0] + * {5.0, 6.0, 7.0, 8.0} // a[1].v[1] + * } // a[1].v + * } // a[1] + * }, // a + * { + * { + * {1.0, 2.0, 3.0, 4.0}, // b.v[0] + * {5.0, 6.0, 7.0, 8.0} // b.v[1] + * } // b.v + * }, // b + * 4 // c + * }; + * + * This pass is necessary because the right-hand side of <type> e = { ... } + * doesn't contain sufficient information to determine if the types match. + */ +void +_mesa_ast_set_aggregate_type(const glsl_type *type, + ast_expression *expr) +{ + ast_aggregate_initializer *ai = (ast_aggregate_initializer *)expr; + ai->constructor_type = type; + + /* If the aggregate is an array, recursively set its elements' types. */ + if (type->is_array()) { + /* Each array element has the type type->fields.array. + * + * E.g., if <type> if struct S[2] we want to set each element's type to + * struct S. + */ + for (exec_node *expr_node = ai->expressions.head; + !expr_node->is_tail_sentinel(); + expr_node = expr_node->next) { + ast_expression *expr = exec_node_data(ast_expression, expr_node, + link); + + if (expr->oper == ast_aggregate) + _mesa_ast_set_aggregate_type(type->fields.array, expr); + } + + /* If the aggregate is a struct, recursively set its fields' types. */ + } else if (type->is_record()) { + exec_node *expr_node = ai->expressions.head; + + /* Iterate through the struct's fields. */ + for (unsigned i = 0; !expr_node->is_tail_sentinel() && i < type->length; + i++, expr_node = expr_node->next) { + ast_expression *expr = exec_node_data(ast_expression, expr_node, + link); + + if (expr->oper == ast_aggregate) { + _mesa_ast_set_aggregate_type(type->fields.structure[i].type, expr); + } + } + /* If the aggregate is a matrix, set its columns' types. */ + } else if (type->is_matrix()) { + for (exec_node *expr_node = ai->expressions.head; + !expr_node->is_tail_sentinel(); + expr_node = expr_node->next) { + ast_expression *expr = exec_node_data(ast_expression, expr_node, + link); + + if (expr->oper == ast_aggregate) + _mesa_ast_set_aggregate_type(type->column_type(), expr); + } + } +} + +void +_mesa_ast_process_interface_block(YYLTYPE *locp, + _mesa_glsl_parse_state *state, + ast_interface_block *const block, + const struct ast_type_qualifier &q) +{ + if (q.flags.q.buffer) { + if (!state->has_shader_storage_buffer_objects()) { + _mesa_glsl_error(locp, state, + "#version 430 / GL_ARB_shader_storage_buffer_object " + "required for defining shader storage blocks"); + } else if (state->ARB_shader_storage_buffer_object_warn) { + _mesa_glsl_warning(locp, state, + "#version 430 / GL_ARB_shader_storage_buffer_object " + "required for defining shader storage blocks"); + } + } else if (q.flags.q.uniform) { + if (!state->has_uniform_buffer_objects()) { + _mesa_glsl_error(locp, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "required for defining uniform blocks"); + } else if (state->ARB_uniform_buffer_object_warn) { + _mesa_glsl_warning(locp, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "required for defining uniform blocks"); + } + } else { + if (state->es_shader || state->language_version < 150) { + _mesa_glsl_error(locp, state, + "#version 150 required for using " + "interface blocks"); + } + } + + /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"): + * "It is illegal to have an input block in a vertex shader + * or an output block in a fragment shader" + */ + if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) { + _mesa_glsl_error(locp, state, + "`in' interface block is not allowed for " + "a vertex shader"); + } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) { + _mesa_glsl_error(locp, state, + "`out' interface block is not allowed for " + "a fragment shader"); + } + + /* Since block arrays require names, and both features are added in + * the same language versions, we don't have to explicitly + * version-check both things. + */ + if (block->instance_name != NULL) { + state->check_version(150, 300, locp, "interface blocks with " + "an instance name are not allowed"); + } + + uint64_t interface_type_mask; + struct ast_type_qualifier temp_type_qualifier; + + /* Get a bitmask containing only the in/out/uniform/buffer + * flags, allowing us to ignore other irrelevant flags like + * interpolation qualifiers. + */ + temp_type_qualifier.flags.i = 0; + temp_type_qualifier.flags.q.uniform = true; + temp_type_qualifier.flags.q.in = true; + temp_type_qualifier.flags.q.out = true; + temp_type_qualifier.flags.q.buffer = true; + interface_type_mask = temp_type_qualifier.flags.i; + + /* Get the block's interface qualifier. The interface_qualifier + * production rule guarantees that only one bit will be set (and + * it will be in/out/uniform). + */ + uint64_t block_interface_qualifier = q.flags.i; + + block->layout.flags.i |= block_interface_qualifier; + + if (state->stage == MESA_SHADER_GEOMETRY && + state->has_explicit_attrib_stream()) { + /* Assign global layout's stream value. */ + block->layout.flags.q.stream = 1; + block->layout.flags.q.explicit_stream = 0; + block->layout.stream = state->out_qualifier->stream; + } + + foreach_list_typed (ast_declarator_list, member, link, &block->declarations) { + ast_type_qualifier& qualifier = member->type->qualifier; + if ((qualifier.flags.i & interface_type_mask) == 0) { + /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks): + * "If no optional qualifier is used in a member declaration, the + * qualifier of the variable is just in, out, or uniform as declared + * by interface-qualifier." + */ + qualifier.flags.i |= block_interface_qualifier; + } else if ((qualifier.flags.i & interface_type_mask) != + block_interface_qualifier) { + /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks): + * "If optional qualifiers are used, they can include interpolation + * and storage qualifiers and they must declare an input, output, + * or uniform variable consistent with the interface qualifier of + * the block." + */ + _mesa_glsl_error(locp, state, + "uniform/in/out qualifier on " + "interface block member does not match " + "the interface block"); + } + + /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks": + * + * "GLSL ES 3.0 does not support interface blocks for shader inputs or + * outputs." + * + * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":. + * + * "Only variables output from a shader can be candidates for + * invariance." + * + * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks": + * + * "If optional qualifiers are used, they can include interpolation + * qualifiers, auxiliary storage qualifiers, and storage qualifiers + * and they must declare an input, output, or uniform member + * consistent with the interface qualifier of the block" + */ + if (qualifier.flags.q.invariant) + _mesa_glsl_error(locp, state, + "invariant qualifiers cannot be used " + "with interface blocks members"); + } +} + +void +_mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q) +{ + if (q->flags.q.subroutine) + printf("subroutine "); + + if (q->flags.q.subroutine_def) { + printf("subroutine ("); + q->subroutine_list->print(); + printf(")"); + } + + if (q->flags.q.constant) + printf("const "); + + if (q->flags.q.invariant) + printf("invariant "); + + if (q->flags.q.attribute) + printf("attribute "); + + if (q->flags.q.varying) + printf("varying "); + + if (q->flags.q.in && q->flags.q.out) + printf("inout "); + else { + if (q->flags.q.in) + printf("in "); + + if (q->flags.q.out) + printf("out "); + } + + if (q->flags.q.centroid) + printf("centroid "); + if (q->flags.q.sample) + printf("sample "); + if (q->flags.q.patch) + printf("patch "); + if (q->flags.q.uniform) + printf("uniform "); + if (q->flags.q.buffer) + printf("buffer "); + if (q->flags.q.smooth) + printf("smooth "); + if (q->flags.q.flat) + printf("flat "); + if (q->flags.q.noperspective) + printf("noperspective "); +} + + +void +ast_node::print(void) const +{ + printf("unhandled node "); +} + + +ast_node::ast_node(void) +{ + this->location.source = 0; + this->location.first_line = 0; + this->location.first_column = 0; + this->location.last_line = 0; + this->location.last_column = 0; +} + + +static void +ast_opt_array_dimensions_print(const ast_array_specifier *array_specifier) +{ + if (array_specifier) + array_specifier->print(); +} + + +void +ast_compound_statement::print(void) const +{ + printf("{\n"); + + foreach_list_typed(ast_node, ast, link, &this->statements) { + ast->print(); + } + + printf("}\n"); +} + + +ast_compound_statement::ast_compound_statement(int new_scope, + ast_node *statements) +{ + this->new_scope = new_scope; + + if (statements != NULL) { + this->statements.push_degenerate_list_at_head(&statements->link); + } +} + + +void +ast_expression::print(void) const +{ + switch (oper) { + case ast_assign: + case ast_mul_assign: + case ast_div_assign: + case ast_mod_assign: + case ast_add_assign: + case ast_sub_assign: + case ast_ls_assign: + case ast_rs_assign: + case ast_and_assign: + case ast_xor_assign: + case ast_or_assign: + subexpressions[0]->print(); + printf("%s ", operator_string(oper)); + subexpressions[1]->print(); + break; + + case ast_field_selection: + subexpressions[0]->print(); + printf(". %s ", primary_expression.identifier); + break; + + case ast_plus: + case ast_neg: + case ast_bit_not: + case ast_logic_not: + case ast_pre_inc: + case ast_pre_dec: + printf("%s ", operator_string(oper)); + subexpressions[0]->print(); + break; + + case ast_post_inc: + case ast_post_dec: + subexpressions[0]->print(); + printf("%s ", operator_string(oper)); + break; + + case ast_conditional: + subexpressions[0]->print(); + printf("? "); + subexpressions[1]->print(); + printf(": "); + subexpressions[2]->print(); + break; + + case ast_array_index: + subexpressions[0]->print(); + printf("[ "); + subexpressions[1]->print(); + printf("] "); + break; + + case ast_function_call: { + subexpressions[0]->print(); + printf("( "); + + foreach_list_typed (ast_node, ast, link, &this->expressions) { + if (&ast->link != this->expressions.get_head()) + printf(", "); + + ast->print(); + } + + printf(") "); + break; + } + + case ast_identifier: + printf("%s ", primary_expression.identifier); + break; + + case ast_int_constant: + printf("%d ", primary_expression.int_constant); + break; + + case ast_uint_constant: + printf("%u ", primary_expression.uint_constant); + break; + + case ast_float_constant: + printf("%f ", primary_expression.float_constant); + break; + + case ast_double_constant: + printf("%f ", primary_expression.double_constant); + break; + + case ast_bool_constant: + printf("%s ", + primary_expression.bool_constant + ? "true" : "false"); + break; + + case ast_sequence: { + printf("( "); + foreach_list_typed (ast_node, ast, link, & this->expressions) { + if (&ast->link != this->expressions.get_head()) + printf(", "); + + ast->print(); + } + printf(") "); + break; + } + + case ast_aggregate: { + printf("{ "); + foreach_list_typed (ast_node, ast, link, & this->expressions) { + if (&ast->link != this->expressions.get_head()) + printf(", "); + + ast->print(); + } + printf("} "); + break; + } + + default: + assert(0); + break; + } +} + +ast_expression::ast_expression(int oper, + ast_expression *ex0, + ast_expression *ex1, + ast_expression *ex2) : + primary_expression() +{ + this->oper = ast_operators(oper); + this->subexpressions[0] = ex0; + this->subexpressions[1] = ex1; + this->subexpressions[2] = ex2; + this->non_lvalue_description = NULL; +} + + +void +ast_expression_statement::print(void) const +{ + if (expression) + expression->print(); + + printf("; "); +} + + +ast_expression_statement::ast_expression_statement(ast_expression *ex) : + expression(ex) +{ + /* empty */ +} + + +void +ast_function::print(void) const +{ + return_type->print(); + printf(" %s (", identifier); + + foreach_list_typed(ast_node, ast, link, & this->parameters) { + ast->print(); + } + + printf(")"); +} + + +ast_function::ast_function(void) + : return_type(NULL), identifier(NULL), is_definition(false), + signature(NULL) +{ + /* empty */ +} + + +void +ast_fully_specified_type::print(void) const +{ + _mesa_ast_type_qualifier_print(& qualifier); + specifier->print(); +} + + +void +ast_parameter_declarator::print(void) const +{ + type->print(); + if (identifier) + printf("%s ", identifier); + ast_opt_array_dimensions_print(array_specifier); +} + + +void +ast_function_definition::print(void) const +{ + prototype->print(); + body->print(); +} + + +void +ast_declaration::print(void) const +{ + printf("%s ", identifier); + ast_opt_array_dimensions_print(array_specifier); + + if (initializer) { + printf("= "); + initializer->print(); + } +} + + +ast_declaration::ast_declaration(const char *identifier, + ast_array_specifier *array_specifier, + ast_expression *initializer) +{ + this->identifier = identifier; + this->array_specifier = array_specifier; + this->initializer = initializer; +} + + +void +ast_declarator_list::print(void) const +{ + assert(type || invariant); + + if (type) + type->print(); + else if (invariant) + printf("invariant "); + else + printf("precise "); + + foreach_list_typed (ast_node, ast, link, & this->declarations) { + if (&ast->link != this->declarations.get_head()) + printf(", "); + + ast->print(); + } + + printf("; "); +} + + +ast_declarator_list::ast_declarator_list(ast_fully_specified_type *type) +{ + this->type = type; + this->invariant = false; + this->precise = false; +} + +void +ast_jump_statement::print(void) const +{ + switch (mode) { + case ast_continue: + printf("continue; "); + break; + case ast_break: + printf("break; "); + break; + case ast_return: + printf("return "); + if (opt_return_value) + opt_return_value->print(); + + printf("; "); + break; + case ast_discard: + printf("discard; "); + break; + } +} + + +ast_jump_statement::ast_jump_statement(int mode, ast_expression *return_value) + : opt_return_value(NULL) +{ + this->mode = ast_jump_modes(mode); + + if (mode == ast_return) + opt_return_value = return_value; +} + + +void +ast_selection_statement::print(void) const +{ + printf("if ( "); + condition->print(); + printf(") "); + + then_statement->print(); + + if (else_statement) { + printf("else "); + else_statement->print(); + } +} + + +ast_selection_statement::ast_selection_statement(ast_expression *condition, + ast_node *then_statement, + ast_node *else_statement) +{ + this->condition = condition; + this->then_statement = then_statement; + this->else_statement = else_statement; +} + + +void +ast_switch_statement::print(void) const +{ + printf("switch ( "); + test_expression->print(); + printf(") "); + + body->print(); +} + + +ast_switch_statement::ast_switch_statement(ast_expression *test_expression, + ast_node *body) +{ + this->test_expression = test_expression; + this->body = body; +} + + +void +ast_switch_body::print(void) const +{ + printf("{\n"); + if (stmts != NULL) { + stmts->print(); + } + printf("}\n"); +} + + +ast_switch_body::ast_switch_body(ast_case_statement_list *stmts) +{ + this->stmts = stmts; +} + + +void ast_case_label::print(void) const +{ + if (test_value != NULL) { + printf("case "); + test_value->print(); + printf(": "); + } else { + printf("default: "); + } +} + + +ast_case_label::ast_case_label(ast_expression *test_value) +{ + this->test_value = test_value; +} + + +void ast_case_label_list::print(void) const +{ + foreach_list_typed(ast_node, ast, link, & this->labels) { + ast->print(); + } + printf("\n"); +} + + +ast_case_label_list::ast_case_label_list(void) +{ +} + + +void ast_case_statement::print(void) const +{ + labels->print(); + foreach_list_typed(ast_node, ast, link, & this->stmts) { + ast->print(); + printf("\n"); + } +} + + +ast_case_statement::ast_case_statement(ast_case_label_list *labels) +{ + this->labels = labels; +} + + +void ast_case_statement_list::print(void) const +{ + foreach_list_typed(ast_node, ast, link, & this->cases) { + ast->print(); + } +} + + +ast_case_statement_list::ast_case_statement_list(void) +{ +} + + +void +ast_iteration_statement::print(void) const +{ + switch (mode) { + case ast_for: + printf("for( "); + if (init_statement) + init_statement->print(); + printf("; "); + + if (condition) + condition->print(); + printf("; "); + + if (rest_expression) + rest_expression->print(); + printf(") "); + + body->print(); + break; + + case ast_while: + printf("while ( "); + if (condition) + condition->print(); + printf(") "); + body->print(); + break; + + case ast_do_while: + printf("do "); + body->print(); + printf("while ( "); + if (condition) + condition->print(); + printf("); "); + break; + } +} + + +ast_iteration_statement::ast_iteration_statement(int mode, + ast_node *init, + ast_node *condition, + ast_expression *rest_expression, + ast_node *body) +{ + this->mode = ast_iteration_modes(mode); + this->init_statement = init; + this->condition = condition; + this->rest_expression = rest_expression; + this->body = body; +} + + +void +ast_struct_specifier::print(void) const +{ + printf("struct %s { ", name); + foreach_list_typed(ast_node, ast, link, &this->declarations) { + ast->print(); + } + printf("} "); +} + + +ast_struct_specifier::ast_struct_specifier(const char *identifier, + ast_declarator_list *declarator_list) +{ + if (identifier == NULL) { + static mtx_t mutex = _MTX_INITIALIZER_NP; + static unsigned anon_count = 1; + unsigned count; + + mtx_lock(&mutex); + count = anon_count++; + mtx_unlock(&mutex); + + identifier = ralloc_asprintf(this, "#anon_struct_%04x", count); + } + name = identifier; + this->declarations.push_degenerate_list_at_head(&declarator_list->link); + is_declaration = true; +} + +void ast_subroutine_list::print(void) const +{ + foreach_list_typed (ast_node, ast, link, & this->declarations) { + if (&ast->link != this->declarations.get_head()) + printf(", "); + ast->print(); + } +} + +static void +set_shader_inout_layout(struct gl_shader *shader, + struct _mesa_glsl_parse_state *state) +{ + /* Should have been prevented by the parser. */ + if (shader->Stage == MESA_SHADER_TESS_CTRL) { + assert(!state->in_qualifier->flags.i); + } else if (shader->Stage == MESA_SHADER_TESS_EVAL) { + assert(!state->out_qualifier->flags.i); + } else if (shader->Stage != MESA_SHADER_GEOMETRY) { + assert(!state->in_qualifier->flags.i); + assert(!state->out_qualifier->flags.i); + } + + if (shader->Stage != MESA_SHADER_COMPUTE) { + /* Should have been prevented by the parser. */ + assert(!state->cs_input_local_size_specified); + } + + if (shader->Stage != MESA_SHADER_FRAGMENT) { + /* Should have been prevented by the parser. */ + assert(!state->fs_uses_gl_fragcoord); + assert(!state->fs_redeclares_gl_fragcoord); + assert(!state->fs_pixel_center_integer); + assert(!state->fs_origin_upper_left); + assert(!state->fs_early_fragment_tests); + } + + switch (shader->Stage) { + case MESA_SHADER_TESS_CTRL: + shader->TessCtrl.VerticesOut = 0; + if (state->tcs_output_vertices_specified) { + unsigned vertices; + if (state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", &vertices, + false)) { + + YYLTYPE loc = state->out_qualifier->vertices->get_location(); + if (vertices > state->Const.MaxPatchVertices) { + _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " + "GL_MAX_PATCH_VERTICES", vertices); + } + shader->TessCtrl.VerticesOut = vertices; + } + } + break; + case MESA_SHADER_TESS_EVAL: + shader->TessEval.PrimitiveMode = PRIM_UNKNOWN; + if (state->in_qualifier->flags.q.prim_type) + shader->TessEval.PrimitiveMode = state->in_qualifier->prim_type; + + shader->TessEval.Spacing = 0; + if (state->in_qualifier->flags.q.vertex_spacing) + shader->TessEval.Spacing = state->in_qualifier->vertex_spacing; + + shader->TessEval.VertexOrder = 0; + if (state->in_qualifier->flags.q.ordering) + shader->TessEval.VertexOrder = state->in_qualifier->ordering; + + shader->TessEval.PointMode = -1; + if (state->in_qualifier->flags.q.point_mode) + shader->TessEval.PointMode = state->in_qualifier->point_mode; + break; + case MESA_SHADER_GEOMETRY: + shader->Geom.VerticesOut = 0; + if (state->out_qualifier->flags.q.max_vertices) { + unsigned qual_max_vertices; + if (state->out_qualifier->max_vertices-> + process_qualifier_constant(state, "max_vertices", + &qual_max_vertices, true)) { + shader->Geom.VerticesOut = qual_max_vertices; + } + } + + if (state->gs_input_prim_type_specified) { + shader->Geom.InputType = state->in_qualifier->prim_type; + } else { + shader->Geom.InputType = PRIM_UNKNOWN; + } + + if (state->out_qualifier->flags.q.prim_type) { + shader->Geom.OutputType = state->out_qualifier->prim_type; + } else { + shader->Geom.OutputType = PRIM_UNKNOWN; + } + + shader->Geom.Invocations = 0; + if (state->in_qualifier->flags.q.invocations) { + unsigned invocations; + if (state->in_qualifier->invocations-> + process_qualifier_constant(state, "invocations", + &invocations, false)) { + + YYLTYPE loc = state->in_qualifier->invocations->get_location(); + if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) { + _mesa_glsl_error(&loc, state, + "invocations (%d) exceeds " + "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", + invocations); + } + shader->Geom.Invocations = invocations; + } + } + break; + + case MESA_SHADER_COMPUTE: + if (state->cs_input_local_size_specified) { + for (int i = 0; i < 3; i++) + shader->Comp.LocalSize[i] = state->cs_input_local_size[i]; + } else { + for (int i = 0; i < 3; i++) + shader->Comp.LocalSize[i] = 0; + } + break; + + case MESA_SHADER_FRAGMENT: + shader->redeclares_gl_fragcoord = state->fs_redeclares_gl_fragcoord; + shader->uses_gl_fragcoord = state->fs_uses_gl_fragcoord; + shader->pixel_center_integer = state->fs_pixel_center_integer; + shader->origin_upper_left = state->fs_origin_upper_left; + shader->ARB_fragment_coord_conventions_enable = + state->ARB_fragment_coord_conventions_enable; + shader->EarlyFragmentTests = state->fs_early_fragment_tests; + break; + + default: + /* Nothing to do. */ + break; + } +} + +extern "C" { + +void +_mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, + bool dump_ast, bool dump_hir) +{ + struct _mesa_glsl_parse_state *state = + new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader); + const char *source = shader->Source; + + if (ctx->Const.GenerateTemporaryNames) + (void) p_atomic_cmpxchg(&ir_variable::temporaries_allocate_names, + false, true); + + state->error = glcpp_preprocess(state, &source, &state->info_log, + &ctx->Extensions, ctx); + + if (!state->error) { + _mesa_glsl_lexer_ctor(state, source); + _mesa_glsl_parse(state); + _mesa_glsl_lexer_dtor(state); + } + + if (dump_ast) { + foreach_list_typed(ast_node, ast, link, &state->translation_unit) { + ast->print(); + } + printf("\n\n"); + } + + ralloc_free(shader->ir); + shader->ir = new(shader) exec_list; + if (!state->error && !state->translation_unit.is_empty()) + _mesa_ast_to_hir(shader->ir, state); + + if (!state->error) { + validate_ir_tree(shader->ir); + + /* Print out the unoptimized IR. */ + if (dump_hir) { + _mesa_print_ir(stdout, shader->ir, state); + } + } + + + if (!state->error && !shader->ir->is_empty()) { + struct gl_shader_compiler_options *options = + &ctx->Const.ShaderCompilerOptions[shader->Stage]; + + lower_subroutine(shader->ir, state); + /* Do some optimization at compile time to reduce shader IR size + * and reduce later work if the same shader is linked multiple times + */ + while (do_common_optimization(shader->ir, false, false, options, + ctx->Const.NativeIntegers)) + ; + + validate_ir_tree(shader->ir); + + enum ir_variable_mode other; + switch (shader->Stage) { + case MESA_SHADER_VERTEX: + other = ir_var_shader_in; + break; + case MESA_SHADER_FRAGMENT: + other = ir_var_shader_out; + break; + default: + /* Something invalid to ensure optimize_dead_builtin_uniforms + * doesn't remove anything other than uniforms or constants. + */ + other = ir_var_mode_count; + break; + } + + optimize_dead_builtin_variables(shader->ir, other); + + validate_ir_tree(shader->ir); + } + + if (shader->InfoLog) + ralloc_free(shader->InfoLog); + + if (!state->error) + set_shader_inout_layout(shader, state); + + shader->symbols = new(shader->ir) glsl_symbol_table; + shader->CompileStatus = !state->error; + shader->InfoLog = state->info_log; + shader->Version = state->language_version; + shader->IsES = state->es_shader; + shader->uses_builtin_functions = state->uses_builtin_functions; + + /* Retain any live IR, but trash the rest. */ + reparent_ir(shader->ir, shader->ir); + + /* Destroy the symbol table. Create a new symbol table that contains only + * the variables and functions that still exist in the IR. The symbol + * table will be used later during linking. + * + * There must NOT be any freed objects still referenced by the symbol + * table. That could cause the linker to dereference freed memory. + * + * We don't have to worry about types or interface-types here because those + * are fly-weights that are looked up by glsl_type. + */ + foreach_in_list (ir_instruction, ir, shader->ir) { + switch (ir->ir_type) { + case ir_type_function: + shader->symbols->add_function((ir_function *) ir); + break; + case ir_type_variable: { + ir_variable *const var = (ir_variable *) ir; + + if (var->data.mode != ir_var_temporary) + shader->symbols->add_variable(var); + break; + } + default: + break; + } + } + + _mesa_glsl_initialize_derived_variables(shader); + + delete state->symbols; + ralloc_free(state); +} + +} /* extern "C" */ +/** + * Do the set of common optimizations passes + * + * \param ir List of instructions to be optimized + * \param linked Is the shader linked? This enables + * optimizations passes that remove code at + * global scope and could cause linking to + * fail. + * \param uniform_locations_assigned Have locations already been assigned for + * uniforms? This prevents the declarations + * of unused uniforms from being removed. + * The setting of this flag only matters if + * \c linked is \c true. + * \param max_unroll_iterations Maximum number of loop iterations to be + * unrolled. Setting to 0 disables loop + * unrolling. + * \param options The driver's preferred shader options. + */ +bool +do_common_optimization(exec_list *ir, bool linked, + bool uniform_locations_assigned, + const struct gl_shader_compiler_options *options, + bool native_integers) +{ + GLboolean progress = GL_FALSE; + + progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress; + + if (linked) { + progress = do_function_inlining(ir) || progress; + progress = do_dead_functions(ir) || progress; + progress = do_structure_splitting(ir) || progress; + } + progress = do_if_simplification(ir) || progress; + progress = opt_flatten_nested_if_blocks(ir) || progress; + progress = opt_conditional_discard(ir) || progress; + progress = do_copy_propagation(ir) || progress; + progress = do_copy_propagation_elements(ir) || progress; + + if (options->OptimizeForAOS && !linked) + progress = opt_flip_matrices(ir) || progress; + + if (linked && options->OptimizeForAOS) { + progress = do_vectorize(ir) || progress; + } + + if (linked) + progress = do_dead_code(ir, uniform_locations_assigned) || progress; + else + progress = do_dead_code_unlinked(ir) || progress; + progress = do_dead_code_local(ir) || progress; + progress = do_tree_grafting(ir) || progress; + progress = do_constant_propagation(ir) || progress; + if (linked) + progress = do_constant_variable(ir) || progress; + else + progress = do_constant_variable_unlinked(ir) || progress; + progress = do_constant_folding(ir) || progress; + progress = do_minmax_prune(ir) || progress; + progress = do_rebalance_tree(ir) || progress; + progress = do_algebraic(ir, native_integers, options) || progress; + progress = do_lower_jumps(ir) || progress; + progress = do_vec_index_to_swizzle(ir) || progress; + progress = lower_vector_insert(ir, false) || progress; + progress = do_swizzle_swizzle(ir) || progress; + progress = do_noop_swizzle(ir) || progress; + + progress = optimize_split_arrays(ir, linked) || progress; + progress = optimize_redundant_jumps(ir) || progress; + + loop_state *ls = analyze_loop_variables(ir); + if (ls->loop_found) { + progress = set_loop_controls(ir, ls) || progress; + progress = unroll_loops(ir, ls, options) || progress; + } + delete ls; + + return progress; +} + +extern "C" { + +/** + * To be called at GL teardown time, this frees compiler datastructures. + * + * After calling this, any previously compiled shaders and shader + * programs would be invalid. So this should happen at approximately + * program exit. + */ +void +_mesa_destroy_shader_compiler(void) +{ + _mesa_destroy_shader_compiler_caches(); + + _mesa_glsl_release_types(); +} + +/** + * Releases compiler caches to trade off performance for memory. + * + * Intended to be used with glReleaseShaderCompiler(). + */ +void +_mesa_destroy_shader_compiler_caches(void) +{ + _mesa_glsl_release_builtin_functions(); +} + +} diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h new file mode 100644 index 0000000..3f88e01 --- /dev/null +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -0,0 +1,752 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_PARSER_EXTRAS_H +#define GLSL_PARSER_EXTRAS_H + +/* + * Most of the definitions here only apply to C++ + */ +#ifdef __cplusplus + + +#include <stdlib.h> +#include "glsl_symbol_table.h" + +struct gl_context; + +struct glsl_switch_state { + /** Temporary variables needed for switch statement. */ + ir_variable *test_var; + ir_variable *is_fallthru_var; + class ast_switch_statement *switch_nesting_ast; + + /** Used to detect if 'continue' was called inside a switch. */ + ir_variable *continue_inside; + + /** Used to set condition if 'default' label should be chosen. */ + ir_variable *run_default; + + /** Table of constant values already used in case labels */ + struct hash_table *labels_ht; + class ast_case_label *previous_default; + + bool is_switch_innermost; // if switch stmt is closest to break, ... +}; + +const char * +glsl_compute_version_string(void *mem_ctx, bool is_es, unsigned version); + +typedef struct YYLTYPE { + int first_line; + int first_column; + int last_line; + int last_column; + unsigned source; +} YYLTYPE; +# define YYLTYPE_IS_DECLARED 1 +# define YYLTYPE_IS_TRIVIAL 1 + +extern void _mesa_glsl_error(YYLTYPE *locp, _mesa_glsl_parse_state *state, + const char *fmt, ...); + + +struct _mesa_glsl_parse_state { + _mesa_glsl_parse_state(struct gl_context *_ctx, gl_shader_stage stage, + void *mem_ctx); + + DECLARE_RALLOC_CXX_OPERATORS(_mesa_glsl_parse_state); + + /** + * Generate a string representing the GLSL version currently being compiled + * (useful for error messages). + */ + const char *get_version_string() + { + return glsl_compute_version_string(this, this->es_shader, + this->language_version); + } + + /** + * Determine whether the current GLSL version is sufficiently high to + * support a certain feature. + * + * \param required_glsl_version is the desktop GLSL version that is + * required to support the feature, or 0 if no version of desktop GLSL + * supports the feature. + * + * \param required_glsl_es_version is the GLSL ES version that is required + * to support the feature, or 0 if no version of GLSL ES supports the + * feature. + */ + bool is_version(unsigned required_glsl_version, + unsigned required_glsl_es_version) const + { + unsigned required_version = this->es_shader ? + required_glsl_es_version : required_glsl_version; + unsigned this_version = this->forced_language_version + ? this->forced_language_version : this->language_version; + return required_version != 0 + && this_version >= required_version; + } + + bool check_version(unsigned required_glsl_version, + unsigned required_glsl_es_version, + YYLTYPE *locp, const char *fmt, ...) PRINTFLIKE(5, 6); + + bool check_arrays_of_arrays_allowed(YYLTYPE *locp) + { + if (!(ARB_arrays_of_arrays_enable || is_version(430, 310))) { + const char *const requirement = this->es_shader + ? "GLSL ES 3.10" + : "GL_ARB_arrays_of_arrays or GLSL 4.30"; + _mesa_glsl_error(locp, this, + "%s required for defining arrays of arrays.", + requirement); + return false; + } + return true; + } + + bool check_precision_qualifiers_allowed(YYLTYPE *locp) + { + return check_version(130, 100, locp, + "precision qualifiers are forbidden"); + } + + bool check_bitwise_operations_allowed(YYLTYPE *locp) + { + return check_version(130, 300, locp, "bit-wise operations are forbidden"); + } + + bool check_explicit_attrib_stream_allowed(YYLTYPE *locp) + { + if (!this->has_explicit_attrib_stream()) { + const char *const requirement = "GL_ARB_gpu_shader5 extension or GLSL 4.00"; + + _mesa_glsl_error(locp, this, "explicit stream requires %s", + requirement); + return false; + } + + return true; + } + + bool check_explicit_attrib_location_allowed(YYLTYPE *locp, + const ir_variable *var) + { + if (!this->has_explicit_attrib_location()) { + const char *const requirement = this->es_shader + ? "GLSL ES 3.00" + : "GL_ARB_explicit_attrib_location extension or GLSL 3.30"; + + _mesa_glsl_error(locp, this, "%s explicit location requires %s", + mode_string(var), requirement); + return false; + } + + return true; + } + + bool check_separate_shader_objects_allowed(YYLTYPE *locp, + const ir_variable *var) + { + if (!this->has_separate_shader_objects()) { + const char *const requirement = this->es_shader + ? "GL_EXT_separate_shader_objects extension or GLSL ES 3.10" + : "GL_ARB_separate_shader_objects extension or GLSL 4.20"; + + _mesa_glsl_error(locp, this, "%s explicit location requires %s", + mode_string(var), requirement); + return false; + } + + return true; + } + + bool check_explicit_uniform_location_allowed(YYLTYPE *locp, + const ir_variable *) + { + if (!this->has_explicit_attrib_location() || + !this->has_explicit_uniform_location()) { + const char *const requirement = this->es_shader + ? "GLSL ES 3.10" + : "GL_ARB_explicit_uniform_location and either " + "GL_ARB_explicit_attrib_location or GLSL 3.30."; + + _mesa_glsl_error(locp, this, + "uniform explicit location requires %s", + requirement); + return false; + } + + return true; + } + + bool has_atomic_counters() const + { + return ARB_shader_atomic_counters_enable || is_version(420, 310); + } + + bool has_enhanced_layouts() const + { + return ARB_enhanced_layouts_enable || is_version(440, 0); + } + + bool has_explicit_attrib_stream() const + { + return ARB_gpu_shader5_enable || is_version(400, 0); + } + + bool has_explicit_attrib_location() const + { + return ARB_explicit_attrib_location_enable || is_version(330, 300); + } + + bool has_explicit_uniform_location() const + { + return ARB_explicit_uniform_location_enable || is_version(430, 310); + } + + bool has_uniform_buffer_objects() const + { + return ARB_uniform_buffer_object_enable || is_version(140, 300); + } + + bool has_shader_storage_buffer_objects() const + { + return ARB_shader_storage_buffer_object_enable || is_version(430, 310); + } + + bool has_separate_shader_objects() const + { + return ARB_separate_shader_objects_enable || is_version(410, 310) + || EXT_separate_shader_objects_enable; + } + + bool has_double() const + { + return ARB_gpu_shader_fp64_enable || is_version(400, 0); + } + + bool has_420pack() const + { + return ARB_shading_language_420pack_enable || is_version(420, 0); + } + + bool has_420pack_or_es31() const + { + return ARB_shading_language_420pack_enable || is_version(420, 310); + } + + bool has_compute_shader() const + { + return ARB_compute_shader_enable || is_version(430, 310); + } + + bool has_geometry_shader() const + { + return OES_geometry_shader_enable || is_version(150, 320); + } + + void process_version_directive(YYLTYPE *locp, int version, + const char *ident); + + struct gl_context *const ctx; + void *scanner; + exec_list translation_unit; + glsl_symbol_table *symbols; + + unsigned num_supported_versions; + struct { + unsigned ver; + bool es; + } supported_versions[15]; + + bool es_shader; + unsigned language_version; + unsigned forced_language_version; + gl_shader_stage stage; + + /** + * Number of nested struct_specifier levels + * + * Outside a struct_specifier, this is zero. + */ + unsigned struct_specifier_depth; + + /** + * Default uniform layout qualifiers tracked during parsing. + * Currently affects uniform blocks and uniform buffer variables in + * those blocks. + */ + struct ast_type_qualifier *default_uniform_qualifier; + + /** + * Default shader storage layout qualifiers tracked during parsing. + * Currently affects shader storage blocks and shader storage buffer + * variables in those blocks. + */ + struct ast_type_qualifier *default_shader_storage_qualifier; + + /** + * Variables to track different cases if a fragment shader redeclares + * built-in variable gl_FragCoord. + * + * Note: These values are computed at ast_to_hir time rather than at parse + * time. + */ + bool fs_redeclares_gl_fragcoord; + bool fs_origin_upper_left; + bool fs_pixel_center_integer; + bool fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; + + /** + * True if a geometry shader input primitive type or tessellation control + * output vertices were specified using a layout directive. + * + * Note: these values are computed at ast_to_hir time rather than at parse + * time. + */ + bool gs_input_prim_type_specified; + bool tcs_output_vertices_specified; + + /** + * Input layout qualifiers from GLSL 1.50 (geometry shader controls), + * and GLSL 4.00 (tessellation evaluation shader) + */ + struct ast_type_qualifier *in_qualifier; + + /** + * True if a compute shader input local size was specified using a layout + * directive. + * + * Note: this value is computed at ast_to_hir time rather than at parse + * time. + */ + bool cs_input_local_size_specified; + + /** + * If cs_input_local_size_specified is true, the local size that was + * specified. Otherwise ignored. + */ + unsigned cs_input_local_size[3]; + + /** + * Output layout qualifiers from GLSL 1.50 (geometry shader controls), + * and GLSL 4.00 (tessellation control shader). + */ + struct ast_type_qualifier *out_qualifier; + + /** + * Printable list of GLSL versions supported by the current context + * + * \note + * This string should probably be generated per-context instead of per + * invokation of the compiler. This should be changed when the method of + * tracking supported GLSL versions changes. + */ + const char *supported_version_string; + + /** + * Implementation defined limits that affect built-in variables, etc. + * + * \sa struct gl_constants (in mtypes.h) + */ + struct { + /* 1.10 */ + unsigned MaxLights; + unsigned MaxClipPlanes; + unsigned MaxTextureUnits; + unsigned MaxTextureCoords; + unsigned MaxVertexAttribs; + unsigned MaxVertexUniformComponents; + unsigned MaxVertexTextureImageUnits; + unsigned MaxCombinedTextureImageUnits; + unsigned MaxTextureImageUnits; + unsigned MaxFragmentUniformComponents; + + /* ARB_draw_buffers */ + unsigned MaxDrawBuffers; + + /* ARB_blend_func_extended */ + unsigned MaxDualSourceDrawBuffers; + + /* 3.00 ES */ + int MinProgramTexelOffset; + int MaxProgramTexelOffset; + + /* 1.50 */ + unsigned MaxVertexOutputComponents; + unsigned MaxGeometryInputComponents; + unsigned MaxGeometryOutputComponents; + unsigned MaxFragmentInputComponents; + unsigned MaxGeometryTextureImageUnits; + unsigned MaxGeometryOutputVertices; + unsigned MaxGeometryTotalOutputComponents; + unsigned MaxGeometryUniformComponents; + + /* ARB_shader_atomic_counters */ + unsigned MaxVertexAtomicCounters; + unsigned MaxTessControlAtomicCounters; + unsigned MaxTessEvaluationAtomicCounters; + unsigned MaxGeometryAtomicCounters; + unsigned MaxFragmentAtomicCounters; + unsigned MaxCombinedAtomicCounters; + unsigned MaxAtomicBufferBindings; + + /* These are also atomic counter related, but they weren't added to + * until atomic counters were added to core in GLSL 4.20 and GLSL ES + * 3.10. + */ + unsigned MaxVertexAtomicCounterBuffers; + unsigned MaxTessControlAtomicCounterBuffers; + unsigned MaxTessEvaluationAtomicCounterBuffers; + unsigned MaxGeometryAtomicCounterBuffers; + unsigned MaxFragmentAtomicCounterBuffers; + unsigned MaxCombinedAtomicCounterBuffers; + unsigned MaxAtomicCounterBufferSize; + + /* ARB_compute_shader */ + unsigned MaxComputeWorkGroupCount[3]; + unsigned MaxComputeWorkGroupSize[3]; + + /* ARB_shader_image_load_store */ + unsigned MaxImageUnits; + unsigned MaxCombinedShaderOutputResources; + unsigned MaxImageSamples; + unsigned MaxVertexImageUniforms; + unsigned MaxTessControlImageUniforms; + unsigned MaxTessEvaluationImageUniforms; + unsigned MaxGeometryImageUniforms; + unsigned MaxFragmentImageUniforms; + unsigned MaxCombinedImageUniforms; + + /* ARB_viewport_array */ + unsigned MaxViewports; + + /* ARB_tessellation_shader */ + unsigned MaxPatchVertices; + unsigned MaxTessGenLevel; + unsigned MaxTessControlInputComponents; + unsigned MaxTessControlOutputComponents; + unsigned MaxTessControlTextureImageUnits; + unsigned MaxTessEvaluationInputComponents; + unsigned MaxTessEvaluationOutputComponents; + unsigned MaxTessEvaluationTextureImageUnits; + unsigned MaxTessPatchComponents; + unsigned MaxTessControlTotalOutputComponents; + unsigned MaxTessControlUniformComponents; + unsigned MaxTessEvaluationUniformComponents; + } Const; + + /** + * During AST to IR conversion, pointer to current IR function + * + * Will be \c NULL whenever the AST to IR conversion is not inside a + * function definition. + */ + class ir_function_signature *current_function; + + /** + * During AST to IR conversion, pointer to the toplevel IR + * instruction list being generated. + */ + exec_list *toplevel_ir; + + /** Have we found a return statement in this function? */ + bool found_return; + + /** Was there an error during compilation? */ + bool error; + + /** + * Are all shader inputs / outputs invariant? + * + * This is set when the 'STDGL invariant(all)' pragma is used. + */ + bool all_invariant; + + /** Loop or switch statement containing the current instructions. */ + class ast_iteration_statement *loop_nesting_ast; + + struct glsl_switch_state switch_state; + + /** List of structures defined in user code. */ + const glsl_type **user_structures; + unsigned num_user_structures; + + char *info_log; + + /** + * \name Enable bits for GLSL extensions + */ + /*@{*/ + /* ARB extensions go here, sorted alphabetically. + */ + bool ARB_arrays_of_arrays_enable; + bool ARB_arrays_of_arrays_warn; + bool ARB_compute_shader_enable; + bool ARB_compute_shader_warn; + bool ARB_conservative_depth_enable; + bool ARB_conservative_depth_warn; + bool ARB_derivative_control_enable; + bool ARB_derivative_control_warn; + bool ARB_draw_buffers_enable; + bool ARB_draw_buffers_warn; + bool ARB_draw_instanced_enable; + bool ARB_draw_instanced_warn; + bool ARB_enhanced_layouts_enable; + bool ARB_enhanced_layouts_warn; + bool ARB_explicit_attrib_location_enable; + bool ARB_explicit_attrib_location_warn; + bool ARB_explicit_uniform_location_enable; + bool ARB_explicit_uniform_location_warn; + bool ARB_fragment_coord_conventions_enable; + bool ARB_fragment_coord_conventions_warn; + bool ARB_fragment_layer_viewport_enable; + bool ARB_fragment_layer_viewport_warn; + bool ARB_gpu_shader5_enable; + bool ARB_gpu_shader5_warn; + bool ARB_gpu_shader_fp64_enable; + bool ARB_gpu_shader_fp64_warn; + bool ARB_sample_shading_enable; + bool ARB_sample_shading_warn; + bool ARB_separate_shader_objects_enable; + bool ARB_separate_shader_objects_warn; + bool ARB_shader_atomic_counters_enable; + bool ARB_shader_atomic_counters_warn; + bool ARB_shader_bit_encoding_enable; + bool ARB_shader_bit_encoding_warn; + bool ARB_shader_clock_enable; + bool ARB_shader_clock_warn; + bool ARB_shader_draw_parameters_enable; + bool ARB_shader_draw_parameters_warn; + bool ARB_shader_image_load_store_enable; + bool ARB_shader_image_load_store_warn; + bool ARB_shader_image_size_enable; + bool ARB_shader_image_size_warn; + bool ARB_shader_precision_enable; + bool ARB_shader_precision_warn; + bool ARB_shader_stencil_export_enable; + bool ARB_shader_stencil_export_warn; + bool ARB_shader_storage_buffer_object_enable; + bool ARB_shader_storage_buffer_object_warn; + bool ARB_shader_subroutine_enable; + bool ARB_shader_subroutine_warn; + bool ARB_shader_texture_image_samples_enable; + bool ARB_shader_texture_image_samples_warn; + bool ARB_shader_texture_lod_enable; + bool ARB_shader_texture_lod_warn; + bool ARB_shading_language_420pack_enable; + bool ARB_shading_language_420pack_warn; + bool ARB_shading_language_packing_enable; + bool ARB_shading_language_packing_warn; + bool ARB_tessellation_shader_enable; + bool ARB_tessellation_shader_warn; + bool ARB_texture_cube_map_array_enable; + bool ARB_texture_cube_map_array_warn; + bool ARB_texture_gather_enable; + bool ARB_texture_gather_warn; + bool ARB_texture_multisample_enable; + bool ARB_texture_multisample_warn; + bool ARB_texture_query_levels_enable; + bool ARB_texture_query_levels_warn; + bool ARB_texture_query_lod_enable; + bool ARB_texture_query_lod_warn; + bool ARB_texture_rectangle_enable; + bool ARB_texture_rectangle_warn; + bool ARB_uniform_buffer_object_enable; + bool ARB_uniform_buffer_object_warn; + bool ARB_vertex_attrib_64bit_enable; + bool ARB_vertex_attrib_64bit_warn; + bool ARB_viewport_array_enable; + bool ARB_viewport_array_warn; + + /* KHR extensions go here, sorted alphabetically. + */ + + /* OES extensions go here, sorted alphabetically. + */ + bool OES_EGL_image_external_enable; + bool OES_EGL_image_external_warn; + bool OES_geometry_shader_enable; + bool OES_geometry_shader_warn; + bool OES_standard_derivatives_enable; + bool OES_standard_derivatives_warn; + bool OES_texture_3D_enable; + bool OES_texture_3D_warn; + bool OES_texture_storage_multisample_2d_array_enable; + bool OES_texture_storage_multisample_2d_array_warn; + + /* All other extensions go here, sorted alphabetically. + */ + bool AMD_conservative_depth_enable; + bool AMD_conservative_depth_warn; + bool AMD_shader_stencil_export_enable; + bool AMD_shader_stencil_export_warn; + bool AMD_shader_trinary_minmax_enable; + bool AMD_shader_trinary_minmax_warn; + bool AMD_vertex_shader_layer_enable; + bool AMD_vertex_shader_layer_warn; + bool AMD_vertex_shader_viewport_index_enable; + bool AMD_vertex_shader_viewport_index_warn; + bool EXT_blend_func_extended_enable; + bool EXT_blend_func_extended_warn; + bool EXT_draw_buffers_enable; + bool EXT_draw_buffers_warn; + bool EXT_separate_shader_objects_enable; + bool EXT_separate_shader_objects_warn; + bool EXT_shader_integer_mix_enable; + bool EXT_shader_integer_mix_warn; + bool EXT_shader_samples_identical_enable; + bool EXT_shader_samples_identical_warn; + bool EXT_texture_array_enable; + bool EXT_texture_array_warn; + /*@}*/ + + /** Extensions supported by the OpenGL implementation. */ + const struct gl_extensions *extensions; + + bool uses_builtin_functions; + bool fs_uses_gl_fragcoord; + + /** + * For geometry shaders, size of the most recently seen input declaration + * that was a sized array, or 0 if no sized input array declarations have + * been seen. + * + * Unused for other shader types. + */ + unsigned gs_input_size; + + bool fs_early_fragment_tests; + + /** + * For tessellation control shaders, size of the most recently seen output + * declaration that was a sized array, or 0 if no sized output array + * declarations have been seen. + * + * Unused for other shader types. + */ + unsigned tcs_output_size; + + /** Atomic counter offsets by binding */ + unsigned atomic_counter_offsets[MAX_COMBINED_ATOMIC_BUFFERS]; + + bool allow_extension_directive_midshader; + + /** + * Known subroutine type declarations. + */ + int num_subroutine_types; + ir_function **subroutine_types; + + /** + * Functions that are associated with + * subroutine types. + */ + int num_subroutines; + ir_function **subroutines; + + /** + * field selection temporary parser storage - + * did the parser just parse a dot. + */ + bool is_field; +}; + +# define YYLLOC_DEFAULT(Current, Rhs, N) \ +do { \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC(Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC(Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC(Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC(Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC(Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC(Rhs, 0).last_column; \ + } \ + (Current).source = 0; \ +} while (0) + +/** + * Emit a warning to the shader log + * + * \sa _mesa_glsl_error + */ +extern void _mesa_glsl_warning(const YYLTYPE *locp, + _mesa_glsl_parse_state *state, + const char *fmt, ...); + +extern void _mesa_glsl_lexer_ctor(struct _mesa_glsl_parse_state *state, + const char *string); + +extern void _mesa_glsl_lexer_dtor(struct _mesa_glsl_parse_state *state); + +union YYSTYPE; +extern int _mesa_glsl_lexer_lex(union YYSTYPE *yylval, YYLTYPE *yylloc, + void *scanner); + +extern int _mesa_glsl_parse(struct _mesa_glsl_parse_state *); + +/** + * Process elements of the #extension directive + * + * \return + * If \c name and \c behavior are valid, \c true is returned. Otherwise + * \c false is returned. + */ +extern bool _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp, + const char *behavior, + YYLTYPE *behavior_locp, + _mesa_glsl_parse_state *state); + +#endif /* __cplusplus */ + + +/* + * These definitions apply to C and C++ + */ +#ifdef __cplusplus +extern "C" { +#endif + +extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log, + const struct gl_extensions *extensions, struct gl_context *gl_ctx); + +extern void _mesa_destroy_shader_compiler(void); +extern void _mesa_destroy_shader_compiler_caches(void); + +#ifdef __cplusplus +} +#endif + + +#endif /* GLSL_PARSER_EXTRAS_H */ diff --git a/src/compiler/glsl/glsl_symbol_table.cpp b/src/compiler/glsl/glsl_symbol_table.cpp new file mode 100644 index 0000000..6c682ac --- /dev/null +++ b/src/compiler/glsl/glsl_symbol_table.cpp @@ -0,0 +1,280 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "glsl_symbol_table.h" +#include "ast.h" + +class symbol_table_entry { +public: + DECLARE_RALLOC_CXX_OPERATORS(symbol_table_entry); + + bool add_interface(const glsl_type *i, enum ir_variable_mode mode) + { + const glsl_type **dest; + + switch (mode) { + case ir_var_uniform: + dest = &ibu; + break; + case ir_var_shader_storage: + dest = &iss; + break; + case ir_var_shader_in: + dest = &ibi; + break; + case ir_var_shader_out: + dest = &ibo; + break; + default: + assert(!"Unsupported interface variable mode!"); + return false; + } + + if (*dest != NULL) { + return false; + } else { + *dest = i; + return true; + } + } + + const glsl_type *get_interface(enum ir_variable_mode mode) + { + switch (mode) { + case ir_var_uniform: + return ibu; + case ir_var_shader_storage: + return iss; + case ir_var_shader_in: + return ibi; + case ir_var_shader_out: + return ibo; + default: + assert(!"Unsupported interface variable mode!"); + return NULL; + } + } + + symbol_table_entry(ir_variable *v) : + v(v), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) {} + symbol_table_entry(ir_function *f) : + v(0), f(f), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) {} + symbol_table_entry(const glsl_type *t) : + v(0), f(0), t(t), ibu(0), iss(0), ibi(0), ibo(0), a(0) {} + symbol_table_entry(const glsl_type *t, enum ir_variable_mode mode) : + v(0), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) + { + assert(t->is_interface()); + add_interface(t, mode); + } + symbol_table_entry(const class ast_type_specifier *a): + v(0), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(a) {} + + ir_variable *v; + ir_function *f; + const glsl_type *t; + const glsl_type *ibu; + const glsl_type *iss; + const glsl_type *ibi; + const glsl_type *ibo; + const class ast_type_specifier *a; +}; + +glsl_symbol_table::glsl_symbol_table() +{ + this->separate_function_namespace = false; + this->table = _mesa_symbol_table_ctor(); + this->mem_ctx = ralloc_context(NULL); +} + +glsl_symbol_table::~glsl_symbol_table() +{ + _mesa_symbol_table_dtor(table); + ralloc_free(mem_ctx); +} + +void glsl_symbol_table::push_scope() +{ + _mesa_symbol_table_push_scope(table); +} + +void glsl_symbol_table::pop_scope() +{ + _mesa_symbol_table_pop_scope(table); +} + +bool glsl_symbol_table::name_declared_this_scope(const char *name) +{ + return _mesa_symbol_table_symbol_scope(table, -1, name) == 0; +} + +bool glsl_symbol_table::add_variable(ir_variable *v) +{ + assert(v->data.mode != ir_var_temporary); + + if (this->separate_function_namespace) { + /* In 1.10, functions and variables have separate namespaces. */ + symbol_table_entry *existing = get_entry(v->name); + if (name_declared_this_scope(v->name)) { + /* If there's already an existing function (not a constructor!) in + * the current scope, just update the existing entry to include 'v'. + */ + if (existing->v == NULL && existing->t == NULL) { + existing->v = v; + return true; + } + } else { + /* If not declared at this scope, add a new entry. But if an existing + * entry includes a function, propagate that to this block - otherwise + * the new variable declaration would shadow the function. + */ + symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(v); + if (existing != NULL) + entry->f = existing->f; + int added = _mesa_symbol_table_add_symbol(table, -1, v->name, entry); + assert(added == 0); + (void)added; + return true; + } + return false; + } + + /* 1.20+ rules: */ + symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(v); + return _mesa_symbol_table_add_symbol(table, -1, v->name, entry) == 0; +} + +bool glsl_symbol_table::add_type(const char *name, const glsl_type *t) +{ + symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(t); + return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0; +} + +bool glsl_symbol_table::add_interface(const char *name, const glsl_type *i, + enum ir_variable_mode mode) +{ + assert(i->is_interface()); + symbol_table_entry *entry = get_entry(name); + if (entry == NULL) { + symbol_table_entry *entry = + new(mem_ctx) symbol_table_entry(i, mode); + bool add_interface_symbol_result = + _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0; + assert(add_interface_symbol_result); + return add_interface_symbol_result; + } else { + return entry->add_interface(i, mode); + } +} + +bool glsl_symbol_table::add_function(ir_function *f) +{ + if (this->separate_function_namespace && name_declared_this_scope(f->name)) { + /* In 1.10, functions and variables have separate namespaces. */ + symbol_table_entry *existing = get_entry(f->name); + if ((existing->f == NULL) && (existing->t == NULL)) { + existing->f = f; + return true; + } + } + symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f); + return _mesa_symbol_table_add_symbol(table, -1, f->name, entry) == 0; +} + +bool glsl_symbol_table::add_default_precision_qualifier(const char *type_name, + int precision) +{ + char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name); + + ast_type_specifier *default_specifier = new(mem_ctx) ast_type_specifier(name); + default_specifier->default_precision = precision; + + symbol_table_entry *entry = + new(mem_ctx) symbol_table_entry(default_specifier); + + return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0; +} + +void glsl_symbol_table::add_global_function(ir_function *f) +{ + symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f); + int added = _mesa_symbol_table_add_global_symbol(table, -1, f->name, entry); + assert(added == 0); + (void)added; +} + +ir_variable *glsl_symbol_table::get_variable(const char *name) +{ + symbol_table_entry *entry = get_entry(name); + return entry != NULL ? entry->v : NULL; +} + +const glsl_type *glsl_symbol_table::get_type(const char *name) +{ + symbol_table_entry *entry = get_entry(name); + return entry != NULL ? entry->t : NULL; +} + +const glsl_type *glsl_symbol_table::get_interface(const char *name, + enum ir_variable_mode mode) +{ + symbol_table_entry *entry = get_entry(name); + return entry != NULL ? entry->get_interface(mode) : NULL; +} + +ir_function *glsl_symbol_table::get_function(const char *name) +{ + symbol_table_entry *entry = get_entry(name); + return entry != NULL ? entry->f : NULL; +} + +int glsl_symbol_table::get_default_precision_qualifier(const char *type_name) +{ + char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name); + symbol_table_entry *entry = get_entry(name); + if (!entry) + return ast_precision_none; + return entry->a->default_precision; +} + +symbol_table_entry *glsl_symbol_table::get_entry(const char *name) +{ + return (symbol_table_entry *) + _mesa_symbol_table_find_symbol(table, -1, name); +} + +void +glsl_symbol_table::disable_variable(const char *name) +{ + /* Ideally we would remove the variable's entry from the symbol table, but + * that would be difficult. Fortunately, since this is only used for + * built-in variables, it won't be possible for the shader to re-introduce + * the variable later, so all we really need to do is to make sure that + * further attempts to access it using get_variable() will return NULL. + */ + symbol_table_entry *entry = get_entry(name); + if (entry != NULL) { + entry->v = NULL; + } +} diff --git a/src/compiler/glsl/glsl_symbol_table.h b/src/compiler/glsl/glsl_symbol_table.h new file mode 100644 index 0000000..5d654e5 --- /dev/null +++ b/src/compiler/glsl/glsl_symbol_table.h @@ -0,0 +1,110 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_SYMBOL_TABLE +#define GLSL_SYMBOL_TABLE + +#include <new> + +extern "C" { +#include "program/symbol_table.h" +} +#include "ir.h" + +class symbol_table_entry; +struct glsl_type; + +/** + * Facade class for _mesa_symbol_table + * + * Wraps the existing \c _mesa_symbol_table data structure to enforce some + * type safe and some symbol table invariants. + */ +struct glsl_symbol_table { + DECLARE_RALLOC_CXX_OPERATORS(glsl_symbol_table) + + glsl_symbol_table(); + ~glsl_symbol_table(); + + /* In 1.10, functions and variables have separate namespaces. */ + bool separate_function_namespace; + + void push_scope(); + void pop_scope(); + + /** + * Determine whether a name was declared at the current scope + */ + bool name_declared_this_scope(const char *name); + + /** + * \name Methods to add symbols to the table + * + * There is some temptation to rename all these functions to \c add_symbol + * or similar. However, this breaks symmetry with the getter functions and + * reduces the clarity of the intention of code that uses these methods. + */ + /*@{*/ + bool add_variable(ir_variable *v); + bool add_type(const char *name, const glsl_type *t); + bool add_function(ir_function *f); + bool add_interface(const char *name, const glsl_type *i, + enum ir_variable_mode mode); + bool add_default_precision_qualifier(const char *type_name, int precision); + /*@}*/ + + /** + * Add an function at global scope without checking for scoping conflicts. + */ + void add_global_function(ir_function *f); + + /** + * \name Methods to get symbols from the table + */ + /*@{*/ + ir_variable *get_variable(const char *name); + const glsl_type *get_type(const char *name); + ir_function *get_function(const char *name); + const glsl_type *get_interface(const char *name, + enum ir_variable_mode mode); + int get_default_precision_qualifier(const char *type_name); + /*@}*/ + + /** + * Disable a previously-added variable so that it no longer appears to be + * in the symbol table. This is necessary when gl_PerVertex is redeclared, + * to ensure that previously-available built-in variables are no longer + * available. + */ + void disable_variable(const char *name); + +private: + symbol_table_entry *get_entry(const char *name); + + struct _mesa_symbol_table *table; + void *mem_ctx; +}; + +#endif /* GLSL_SYMBOL_TABLE */ diff --git a/src/compiler/glsl/hir_field_selection.cpp b/src/compiler/glsl/hir_field_selection.cpp new file mode 100644 index 0000000..eab08ad --- /dev/null +++ b/src/compiler/glsl/hir_field_selection.cpp @@ -0,0 +1,81 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "glsl_parser_extras.h" +#include "ast.h" +#include "compiler/glsl_types.h" + +ir_rvalue * +_mesa_ast_field_selection_to_hir(const ast_expression *expr, + exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + ir_rvalue *result = NULL; + ir_rvalue *op; + + op = expr->subexpressions[0]->hir(instructions, state); + + /* There are two kinds of field selection. There is the selection of a + * specific field from a structure, and there is the selection of a + * swizzle / mask from a vector. Which is which is determined entirely + * by the base type of the thing to which the field selection operator is + * being applied. + */ + YYLTYPE loc = expr->get_location(); + if (op->type->is_error()) { + /* silently propagate the error */ + } else if (op->type->base_type == GLSL_TYPE_STRUCT + || op->type->base_type == GLSL_TYPE_INTERFACE) { + result = new(ctx) ir_dereference_record(op, + expr->primary_expression.identifier); + + if (result->type->is_error()) { + _mesa_glsl_error(& loc, state, "cannot access field `%s' of " + "structure", + expr->primary_expression.identifier); + } + } else if (op->type->is_vector() || + (state->has_420pack() && op->type->is_scalar())) { + ir_swizzle *swiz = ir_swizzle::create(op, + expr->primary_expression.identifier, + op->type->vector_elements); + if (swiz != NULL) { + result = swiz; + } else { + /* FINISHME: Logging of error messages should be moved into + * FINISHME: ir_swizzle::create. This allows the generation of more + * FINISHME: specific error messages. + */ + _mesa_glsl_error(& loc, state, "invalid swizzle / mask `%s'", + expr->primary_expression.identifier); + } + } else { + _mesa_glsl_error(& loc, state, "cannot access field `%s' of " + "non-structure / non-vector", + expr->primary_expression.identifier); + } + + return result ? result : ir_rvalue::error_value(ctx); +} diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp new file mode 100644 index 0000000..de9d314 --- /dev/null +++ b/src/compiler/glsl/ir.cpp @@ -0,0 +1,2039 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <string.h> +#include "main/core.h" /* for MAX2 */ +#include "ir.h" +#include "compiler/glsl_types.h" + +ir_rvalue::ir_rvalue(enum ir_node_type t) + : ir_instruction(t) +{ + this->type = glsl_type::error_type; +} + +bool ir_rvalue::is_zero() const +{ + return false; +} + +bool ir_rvalue::is_one() const +{ + return false; +} + +bool ir_rvalue::is_negative_one() const +{ + return false; +} + +/** + * Modify the swizzle make to move one component to another + * + * \param m IR swizzle to be modified + * \param from Component in the RHS that is to be swizzled + * \param to Desired swizzle location of \c from + */ +static void +update_rhs_swizzle(ir_swizzle_mask &m, unsigned from, unsigned to) +{ + switch (to) { + case 0: m.x = from; break; + case 1: m.y = from; break; + case 2: m.z = from; break; + case 3: m.w = from; break; + default: assert(!"Should not get here."); + } +} + +void +ir_assignment::set_lhs(ir_rvalue *lhs) +{ + void *mem_ctx = this; + bool swizzled = false; + + while (lhs != NULL) { + ir_swizzle *swiz = lhs->as_swizzle(); + + if (swiz == NULL) + break; + + unsigned write_mask = 0; + ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 }; + + for (unsigned i = 0; i < swiz->mask.num_components; i++) { + unsigned c = 0; + + switch (i) { + case 0: c = swiz->mask.x; break; + case 1: c = swiz->mask.y; break; + case 2: c = swiz->mask.z; break; + case 3: c = swiz->mask.w; break; + default: assert(!"Should not get here."); + } + + write_mask |= (((this->write_mask >> i) & 1) << c); + update_rhs_swizzle(rhs_swiz, i, c); + rhs_swiz.num_components = swiz->val->type->vector_elements; + } + + this->write_mask = write_mask; + lhs = swiz->val; + + this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz); + swizzled = true; + } + + if (swizzled) { + /* Now, RHS channels line up with the LHS writemask. Collapse it + * to just the channels that will be written. + */ + ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 }; + int rhs_chan = 0; + for (int i = 0; i < 4; i++) { + if (write_mask & (1 << i)) + update_rhs_swizzle(rhs_swiz, i, rhs_chan++); + } + rhs_swiz.num_components = rhs_chan; + this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz); + } + + assert((lhs == NULL) || lhs->as_dereference()); + + this->lhs = (ir_dereference *) lhs; +} + +ir_variable * +ir_assignment::whole_variable_written() +{ + ir_variable *v = this->lhs->whole_variable_referenced(); + + if (v == NULL) + return NULL; + + if (v->type->is_scalar()) + return v; + + if (v->type->is_vector()) { + const unsigned mask = (1U << v->type->vector_elements) - 1; + + if (mask != this->write_mask) + return NULL; + } + + /* Either all the vector components are assigned or the variable is some + * composite type (and the whole thing is assigned. + */ + return v; +} + +ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, + ir_rvalue *condition, unsigned write_mask) + : ir_instruction(ir_type_assignment) +{ + this->condition = condition; + this->rhs = rhs; + this->lhs = lhs; + this->write_mask = write_mask; + + if (lhs->type->is_scalar() || lhs->type->is_vector()) { + int lhs_components = 0; + for (int i = 0; i < 4; i++) { + if (write_mask & (1 << i)) + lhs_components++; + } + + assert(lhs_components == this->rhs->type->vector_elements); + } +} + +ir_assignment::ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, + ir_rvalue *condition) + : ir_instruction(ir_type_assignment) +{ + this->condition = condition; + this->rhs = rhs; + + /* If the RHS is a vector type, assume that all components of the vector + * type are being written to the LHS. The write mask comes from the RHS + * because we can have a case where the LHS is a vec4 and the RHS is a + * vec3. In that case, the assignment is: + * + * (assign (...) (xyz) (var_ref lhs) (var_ref rhs)) + */ + if (rhs->type->is_vector()) + this->write_mask = (1U << rhs->type->vector_elements) - 1; + else if (rhs->type->is_scalar()) + this->write_mask = 1; + else + this->write_mask = 0; + + this->set_lhs(lhs); +} + +ir_expression::ir_expression(int op, const struct glsl_type *type, + ir_rvalue *op0, ir_rvalue *op1, + ir_rvalue *op2, ir_rvalue *op3) + : ir_rvalue(ir_type_expression) +{ + this->type = type; + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = op1; + this->operands[2] = op2; + this->operands[3] = op3; +#ifndef NDEBUG + int num_operands = get_num_operands(this->operation); + for (int i = num_operands; i < 4; i++) { + assert(this->operands[i] == NULL); + } +#endif +} + +ir_expression::ir_expression(int op, ir_rvalue *op0) + : ir_rvalue(ir_type_expression) +{ + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = NULL; + this->operands[2] = NULL; + this->operands[3] = NULL; + + assert(op <= ir_last_unop); + + switch (this->operation) { + case ir_unop_bit_not: + case ir_unop_logic_not: + case ir_unop_neg: + case ir_unop_abs: + case ir_unop_sign: + case ir_unop_rcp: + case ir_unop_rsq: + case ir_unop_sqrt: + case ir_unop_exp: + case ir_unop_log: + case ir_unop_exp2: + case ir_unop_log2: + case ir_unop_trunc: + case ir_unop_ceil: + case ir_unop_floor: + case ir_unop_fract: + case ir_unop_round_even: + case ir_unop_sin: + case ir_unop_cos: + case ir_unop_dFdx: + case ir_unop_dFdx_coarse: + case ir_unop_dFdx_fine: + case ir_unop_dFdy: + case ir_unop_dFdy_coarse: + case ir_unop_dFdy_fine: + case ir_unop_bitfield_reverse: + case ir_unop_interpolate_at_centroid: + case ir_unop_saturate: + this->type = op0->type; + break; + + case ir_unop_f2i: + case ir_unop_b2i: + case ir_unop_u2i: + case ir_unop_d2i: + case ir_unop_bitcast_f2i: + case ir_unop_bit_count: + case ir_unop_find_msb: + case ir_unop_find_lsb: + case ir_unop_subroutine_to_int: + this->type = glsl_type::get_instance(GLSL_TYPE_INT, + op0->type->vector_elements, 1); + break; + + case ir_unop_b2f: + case ir_unop_i2f: + case ir_unop_u2f: + case ir_unop_d2f: + case ir_unop_bitcast_i2f: + case ir_unop_bitcast_u2f: + this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + op0->type->vector_elements, 1); + break; + + case ir_unop_f2b: + case ir_unop_i2b: + case ir_unop_d2b: + this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, + op0->type->vector_elements, 1); + break; + + case ir_unop_f2d: + case ir_unop_i2d: + case ir_unop_u2d: + this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, + op0->type->vector_elements, 1); + break; + + case ir_unop_i2u: + case ir_unop_f2u: + case ir_unop_d2u: + case ir_unop_bitcast_f2u: + this->type = glsl_type::get_instance(GLSL_TYPE_UINT, + op0->type->vector_elements, 1); + break; + + case ir_unop_noise: + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + this->type = glsl_type::float_type; + break; + + case ir_unop_unpack_double_2x32: + this->type = glsl_type::uvec2_type; + break; + + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: + case ir_unop_pack_half_2x16: + this->type = glsl_type::uint_type; + break; + + case ir_unop_pack_double_2x32: + this->type = glsl_type::double_type; + break; + + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + this->type = glsl_type::vec2_type; + break; + + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + this->type = glsl_type::vec4_type; + break; + + case ir_unop_frexp_sig: + this->type = op0->type; + break; + case ir_unop_frexp_exp: + this->type = glsl_type::get_instance(GLSL_TYPE_INT, + op0->type->vector_elements, 1); + break; + + case ir_unop_get_buffer_size: + case ir_unop_ssbo_unsized_array_length: + this->type = glsl_type::int_type; + break; + + default: + assert(!"not reached: missing automatic type setup for ir_expression"); + this->type = op0->type; + break; + } +} + +ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) + : ir_rvalue(ir_type_expression) +{ + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = op1; + this->operands[2] = NULL; + this->operands[3] = NULL; + + assert(op > ir_last_unop); + + switch (this->operation) { + case ir_binop_all_equal: + case ir_binop_any_nequal: + this->type = glsl_type::bool_type; + break; + + case ir_binop_add: + case ir_binop_sub: + case ir_binop_min: + case ir_binop_max: + case ir_binop_pow: + case ir_binop_mul: + case ir_binop_div: + case ir_binop_mod: + if (op0->type->is_scalar()) { + this->type = op1->type; + } else if (op1->type->is_scalar()) { + this->type = op0->type; + } else { + if (this->operation == ir_binop_mul) { + this->type = glsl_type::get_mul_type(op0->type, op1->type); + } else { + assert(op0->type == op1->type); + this->type = op0->type; + } + } + break; + + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + assert(!op0->type->is_matrix()); + assert(!op1->type->is_matrix()); + if (op0->type->is_scalar()) { + this->type = op1->type; + } else if (op1->type->is_scalar()) { + this->type = op0->type; + } else { + assert(op0->type->vector_elements == op1->type->vector_elements); + this->type = op0->type; + } + break; + + case ir_binop_equal: + case ir_binop_nequal: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_greater: + assert(op0->type == op1->type); + this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, + op0->type->vector_elements, 1); + break; + + case ir_binop_dot: + this->type = op0->type->get_base_type(); + break; + + case ir_binop_pack_half_2x16_split: + this->type = glsl_type::uint_type; + break; + + case ir_binop_imul_high: + case ir_binop_carry: + case ir_binop_borrow: + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_ldexp: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + this->type = op0->type; + break; + + case ir_binop_vector_extract: + this->type = op0->type->get_scalar_type(); + break; + + default: + assert(!"not reached: missing automatic type setup for ir_expression"); + this->type = glsl_type::float_type; + } +} + +ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, + ir_rvalue *op2) + : ir_rvalue(ir_type_expression) +{ + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = op1; + this->operands[2] = op2; + this->operands[3] = NULL; + + assert(op > ir_last_binop && op <= ir_last_triop); + + switch (this->operation) { + case ir_triop_fma: + case ir_triop_lrp: + case ir_triop_bitfield_extract: + case ir_triop_vector_insert: + this->type = op0->type; + break; + + case ir_triop_csel: + this->type = op1->type; + break; + + default: + assert(!"not reached: missing automatic type setup for ir_expression"); + this->type = glsl_type::float_type; + } +} + +unsigned int +ir_expression::get_num_operands(ir_expression_operation op) +{ + assert(op <= ir_last_opcode); + + if (op <= ir_last_unop) + return 1; + + if (op <= ir_last_binop) + return 2; + + if (op <= ir_last_triop) + return 3; + + if (op <= ir_last_quadop) + return 4; + + assert(false); + return 0; +} + +static const char *const operator_strs[] = { + "~", + "!", + "neg", + "abs", + "sign", + "rcp", + "rsq", + "sqrt", + "exp", + "log", + "exp2", + "log2", + "f2i", + "f2u", + "i2f", + "f2b", + "b2f", + "i2b", + "b2i", + "u2f", + "i2u", + "u2i", + "d2f", + "f2d", + "d2i", + "i2d", + "d2u", + "u2d", + "d2b", + "bitcast_i2f", + "bitcast_f2i", + "bitcast_u2f", + "bitcast_f2u", + "trunc", + "ceil", + "floor", + "fract", + "round_even", + "sin", + "cos", + "dFdx", + "dFdxCoarse", + "dFdxFine", + "dFdy", + "dFdyCoarse", + "dFdyFine", + "packSnorm2x16", + "packSnorm4x8", + "packUnorm2x16", + "packUnorm4x8", + "packHalf2x16", + "unpackSnorm2x16", + "unpackSnorm4x8", + "unpackUnorm2x16", + "unpackUnorm4x8", + "unpackHalf2x16", + "unpackHalf2x16_split_x", + "unpackHalf2x16_split_y", + "bitfield_reverse", + "bit_count", + "find_msb", + "find_lsb", + "sat", + "packDouble2x32", + "unpackDouble2x32", + "frexp_sig", + "frexp_exp", + "noise", + "subroutine_to_int", + "interpolate_at_centroid", + "get_buffer_size", + "ssbo_unsized_array_length", + "+", + "-", + "*", + "imul_high", + "/", + "carry", + "borrow", + "%", + "<", + ">", + "<=", + ">=", + "==", + "!=", + "all_equal", + "any_nequal", + "<<", + ">>", + "&", + "^", + "|", + "&&", + "^^", + "||", + "dot", + "min", + "max", + "pow", + "packHalf2x16_split", + "ubo_load", + "ldexp", + "vector_extract", + "interpolate_at_offset", + "interpolate_at_sample", + "fma", + "lrp", + "csel", + "bitfield_extract", + "vector_insert", + "bitfield_insert", + "vector", +}; + +const char *ir_expression::operator_string(ir_expression_operation op) +{ + assert((unsigned int) op < ARRAY_SIZE(operator_strs)); + assert(ARRAY_SIZE(operator_strs) == (ir_quadop_vector + 1)); + return operator_strs[op]; +} + +const char *ir_expression::operator_string() +{ + return operator_string(this->operation); +} + +const char* +depth_layout_string(ir_depth_layout layout) +{ + switch(layout) { + case ir_depth_layout_none: return ""; + case ir_depth_layout_any: return "depth_any"; + case ir_depth_layout_greater: return "depth_greater"; + case ir_depth_layout_less: return "depth_less"; + case ir_depth_layout_unchanged: return "depth_unchanged"; + + default: + assert(0); + return ""; + } +} + +ir_expression_operation +ir_expression::get_operator(const char *str) +{ + const int operator_count = sizeof(operator_strs) / sizeof(operator_strs[0]); + for (int op = 0; op < operator_count; op++) { + if (strcmp(str, operator_strs[op]) == 0) + return (ir_expression_operation) op; + } + return (ir_expression_operation) -1; +} + +ir_variable * +ir_expression::variable_referenced() const +{ + switch (operation) { + case ir_binop_vector_extract: + case ir_triop_vector_insert: + /* We get these for things like a[0] where a is a vector type. In these + * cases we want variable_referenced() to return the actual vector + * variable this is wrapping. + */ + return operands[0]->variable_referenced(); + default: + return ir_rvalue::variable_referenced(); + } +} + +ir_constant::ir_constant() + : ir_rvalue(ir_type_constant) +{ +} + +ir_constant::ir_constant(const struct glsl_type *type, + const ir_constant_data *data) + : ir_rvalue(ir_type_constant) +{ + assert((type->base_type >= GLSL_TYPE_UINT) + && (type->base_type <= GLSL_TYPE_BOOL)); + + this->type = type; + memcpy(& this->value, data, sizeof(this->value)); +} + +ir_constant::ir_constant(float f, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.f[i] = f; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.f[i] = 0; + } +} + +ir_constant::ir_constant(double d, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.d[i] = d; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.d[i] = 0.0; + } +} + +ir_constant::ir_constant(unsigned int u, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_UINT, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.u[i] = u; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.u[i] = 0; + } +} + +ir_constant::ir_constant(int integer, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_INT, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.i[i] = integer; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.i[i] = 0; + } +} + +ir_constant::ir_constant(bool b, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.b[i] = b; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.b[i] = false; + } +} + +ir_constant::ir_constant(const ir_constant *c, unsigned i) + : ir_rvalue(ir_type_constant) +{ + this->type = c->type->get_base_type(); + + switch (this->type->base_type) { + case GLSL_TYPE_UINT: this->value.u[0] = c->value.u[i]; break; + case GLSL_TYPE_INT: this->value.i[0] = c->value.i[i]; break; + case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break; + case GLSL_TYPE_BOOL: this->value.b[0] = c->value.b[i]; break; + case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break; + default: assert(!"Should not get here."); break; + } +} + +ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list) + : ir_rvalue(ir_type_constant) +{ + this->type = type; + + assert(type->is_scalar() || type->is_vector() || type->is_matrix() + || type->is_record() || type->is_array()); + + if (type->is_array()) { + this->array_elements = ralloc_array(this, ir_constant *, type->length); + unsigned i = 0; + foreach_in_list(ir_constant, value, value_list) { + assert(value->as_constant() != NULL); + + this->array_elements[i++] = value; + } + return; + } + + /* If the constant is a record, the types of each of the entries in + * value_list must be a 1-for-1 match with the structure components. Each + * entry must also be a constant. Just move the nodes from the value_list + * to the list in the ir_constant. + */ + /* FINISHME: Should there be some type checking and / or assertions here? */ + /* FINISHME: Should the new constant take ownership of the nodes from + * FINISHME: value_list, or should it make copies? + */ + if (type->is_record()) { + value_list->move_nodes_to(& this->components); + return; + } + + for (unsigned i = 0; i < 16; i++) { + this->value.u[i] = 0; + } + + ir_constant *value = (ir_constant *) (value_list->head); + + /* Constructors with exactly one scalar argument are special for vectors + * and matrices. For vectors, the scalar value is replicated to fill all + * the components. For matrices, the scalar fills the components of the + * diagonal while the rest is filled with 0. + */ + if (value->type->is_scalar() && value->next->is_tail_sentinel()) { + if (type->is_matrix()) { + /* Matrix - fill diagonal (rest is already set to 0) */ + assert(type->base_type == GLSL_TYPE_FLOAT || + type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned i = 0; i < type->matrix_columns; i++) { + if (type->base_type == GLSL_TYPE_FLOAT) + this->value.f[i * type->vector_elements + i] = + value->value.f[0]; + else + this->value.d[i * type->vector_elements + i] = + value->value.d[0]; + } + } else { + /* Vector or scalar - fill all components */ + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + for (unsigned i = 0; i < type->components(); i++) + this->value.u[i] = value->value.u[0]; + break; + case GLSL_TYPE_FLOAT: + for (unsigned i = 0; i < type->components(); i++) + this->value.f[i] = value->value.f[0]; + break; + case GLSL_TYPE_DOUBLE: + for (unsigned i = 0; i < type->components(); i++) + this->value.d[i] = value->value.d[0]; + break; + case GLSL_TYPE_BOOL: + for (unsigned i = 0; i < type->components(); i++) + this->value.b[i] = value->value.b[0]; + break; + default: + assert(!"Should not get here."); + break; + } + } + return; + } + + if (type->is_matrix() && value->type->is_matrix()) { + assert(value->next->is_tail_sentinel()); + + /* From section 5.4.2 of the GLSL 1.20 spec: + * "If a matrix is constructed from a matrix, then each component + * (column i, row j) in the result that has a corresponding component + * (column i, row j) in the argument will be initialized from there." + */ + unsigned cols = MIN2(type->matrix_columns, value->type->matrix_columns); + unsigned rows = MIN2(type->vector_elements, value->type->vector_elements); + for (unsigned i = 0; i < cols; i++) { + for (unsigned j = 0; j < rows; j++) { + const unsigned src = i * value->type->vector_elements + j; + const unsigned dst = i * type->vector_elements + j; + this->value.f[dst] = value->value.f[src]; + } + } + + /* "All other components will be initialized to the identity matrix." */ + for (unsigned i = cols; i < type->matrix_columns; i++) + this->value.f[i * type->vector_elements + i] = 1.0; + + return; + } + + /* Use each component from each entry in the value_list to initialize one + * component of the constant being constructed. + */ + for (unsigned i = 0; i < type->components(); /* empty */) { + assert(value->as_constant() != NULL); + assert(!value->is_tail_sentinel()); + + for (unsigned j = 0; j < value->type->components(); j++) { + switch (type->base_type) { + case GLSL_TYPE_UINT: + this->value.u[i] = value->get_uint_component(j); + break; + case GLSL_TYPE_INT: + this->value.i[i] = value->get_int_component(j); + break; + case GLSL_TYPE_FLOAT: + this->value.f[i] = value->get_float_component(j); + break; + case GLSL_TYPE_BOOL: + this->value.b[i] = value->get_bool_component(j); + break; + case GLSL_TYPE_DOUBLE: + this->value.d[i] = value->get_double_component(j); + break; + default: + /* FINISHME: What to do? Exceptions are not the answer. + */ + break; + } + + i++; + if (i >= type->components()) + break; + } + + value = (ir_constant *) value->next; + } +} + +ir_constant * +ir_constant::zero(void *mem_ctx, const glsl_type *type) +{ + assert(type->is_scalar() || type->is_vector() || type->is_matrix() + || type->is_record() || type->is_array()); + + ir_constant *c = new(mem_ctx) ir_constant; + c->type = type; + memset(&c->value, 0, sizeof(c->value)); + + if (type->is_array()) { + c->array_elements = ralloc_array(c, ir_constant *, type->length); + + for (unsigned i = 0; i < type->length; i++) + c->array_elements[i] = ir_constant::zero(c, type->fields.array); + } + + if (type->is_record()) { + for (unsigned i = 0; i < type->length; i++) { + ir_constant *comp = ir_constant::zero(mem_ctx, type->fields.structure[i].type); + c->components.push_tail(comp); + } + } + + return c; +} + +bool +ir_constant::get_bool_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return this->value.u[i] != 0; + case GLSL_TYPE_INT: return this->value.i[i] != 0; + case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0; + case GLSL_TYPE_BOOL: return this->value.b[i]; + case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return false; +} + +float +ir_constant::get_float_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return (float) this->value.u[i]; + case GLSL_TYPE_INT: return (float) this->value.i[i]; + case GLSL_TYPE_FLOAT: return this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0f : 0.0f; + case GLSL_TYPE_DOUBLE: return (float) this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0.0; +} + +double +ir_constant::get_double_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return (double) this->value.u[i]; + case GLSL_TYPE_INT: return (double) this->value.i[i]; + case GLSL_TYPE_FLOAT: return (double) this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0 : 0.0; + case GLSL_TYPE_DOUBLE: return this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0.0; +} + +int +ir_constant::get_int_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return this->value.u[i]; + case GLSL_TYPE_INT: return this->value.i[i]; + case GLSL_TYPE_FLOAT: return (int) this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0; + case GLSL_TYPE_DOUBLE: return (int) this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0; +} + +unsigned +ir_constant::get_uint_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return this->value.u[i]; + case GLSL_TYPE_INT: return this->value.i[i]; + case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0; + case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0; +} + +ir_constant * +ir_constant::get_array_element(unsigned i) const +{ + assert(this->type->is_array()); + + /* From page 35 (page 41 of the PDF) of the GLSL 1.20 spec: + * + * "Behavior is undefined if a shader subscripts an array with an index + * less than 0 or greater than or equal to the size the array was + * declared with." + * + * Most out-of-bounds accesses are removed before things could get this far. + * There are cases where non-constant array index values can get constant + * folded. + */ + if (int(i) < 0) + i = 0; + else if (i >= this->type->length) + i = this->type->length - 1; + + return array_elements[i]; +} + +ir_constant * +ir_constant::get_record_field(const char *name) +{ + int idx = this->type->field_index(name); + + if (idx < 0) + return NULL; + + if (this->components.is_empty()) + return NULL; + + exec_node *node = this->components.head; + for (int i = 0; i < idx; i++) { + node = node->next; + + /* If the end of the list is encountered before the element matching the + * requested field is found, return NULL. + */ + if (node->is_tail_sentinel()) + return NULL; + } + + return (ir_constant *) node; +} + +void +ir_constant::copy_offset(ir_constant *src, int offset) +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: { + unsigned int size = src->type->components(); + assert (size <= this->type->components() - offset); + for (unsigned int i=0; i<size; i++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + value.u[i+offset] = src->get_uint_component(i); + break; + case GLSL_TYPE_INT: + value.i[i+offset] = src->get_int_component(i); + break; + case GLSL_TYPE_FLOAT: + value.f[i+offset] = src->get_float_component(i); + break; + case GLSL_TYPE_BOOL: + value.b[i+offset] = src->get_bool_component(i); + break; + case GLSL_TYPE_DOUBLE: + value.d[i+offset] = src->get_double_component(i); + break; + default: // Shut up the compiler + break; + } + } + break; + } + + case GLSL_TYPE_STRUCT: { + assert (src->type == this->type); + this->components.make_empty(); + foreach_in_list(ir_constant, orig, &src->components) { + this->components.push_tail(orig->clone(this, NULL)); + } + break; + } + + case GLSL_TYPE_ARRAY: { + assert (src->type == this->type); + for (unsigned i = 0; i < this->type->length; i++) { + this->array_elements[i] = src->array_elements[i]->clone(this, NULL); + } + break; + } + + default: + assert(!"Should not get here."); + break; + } +} + +void +ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask) +{ + assert (!type->is_array() && !type->is_record()); + + if (!type->is_vector() && !type->is_matrix()) { + offset = 0; + mask = 1; + } + + int id = 0; + for (int i=0; i<4; i++) { + if (mask & (1 << i)) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + value.u[i+offset] = src->get_uint_component(id++); + break; + case GLSL_TYPE_INT: + value.i[i+offset] = src->get_int_component(id++); + break; + case GLSL_TYPE_FLOAT: + value.f[i+offset] = src->get_float_component(id++); + break; + case GLSL_TYPE_BOOL: + value.b[i+offset] = src->get_bool_component(id++); + break; + case GLSL_TYPE_DOUBLE: + value.d[i+offset] = src->get_double_component(id++); + break; + default: + assert(!"Should not get here."); + return; + } + } + } +} + +bool +ir_constant::has_value(const ir_constant *c) const +{ + if (this->type != c->type) + return false; + + if (this->type->is_array()) { + for (unsigned i = 0; i < this->type->length; i++) { + if (!this->array_elements[i]->has_value(c->array_elements[i])) + return false; + } + return true; + } + + if (this->type->base_type == GLSL_TYPE_STRUCT) { + const exec_node *a_node = this->components.head; + const exec_node *b_node = c->components.head; + + while (!a_node->is_tail_sentinel()) { + assert(!b_node->is_tail_sentinel()); + + const ir_constant *const a_field = (ir_constant *) a_node; + const ir_constant *const b_field = (ir_constant *) b_node; + + if (!a_field->has_value(b_field)) + return false; + + a_node = a_node->next; + b_node = b_node->next; + } + + return true; + } + + for (unsigned i = 0; i < this->type->components(); i++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + if (this->value.u[i] != c->value.u[i]) + return false; + break; + case GLSL_TYPE_INT: + if (this->value.i[i] != c->value.i[i]) + return false; + break; + case GLSL_TYPE_FLOAT: + if (this->value.f[i] != c->value.f[i]) + return false; + break; + case GLSL_TYPE_BOOL: + if (this->value.b[i] != c->value.b[i]) + return false; + break; + case GLSL_TYPE_DOUBLE: + if (this->value.d[i] != c->value.d[i]) + return false; + break; + default: + assert(!"Should not get here."); + return false; + } + } + + return true; +} + +bool +ir_constant::is_value(float f, int i) const +{ + if (!this->type->is_scalar() && !this->type->is_vector()) + return false; + + /* Only accept boolean values for 0/1. */ + if (int(bool(i)) != i && this->type->is_boolean()) + return false; + + for (unsigned c = 0; c < this->type->vector_elements; c++) { + switch (this->type->base_type) { + case GLSL_TYPE_FLOAT: + if (this->value.f[c] != f) + return false; + break; + case GLSL_TYPE_INT: + if (this->value.i[c] != i) + return false; + break; + case GLSL_TYPE_UINT: + if (this->value.u[c] != unsigned(i)) + return false; + break; + case GLSL_TYPE_BOOL: + if (this->value.b[c] != bool(i)) + return false; + break; + case GLSL_TYPE_DOUBLE: + if (this->value.d[c] != double(f)) + return false; + break; + default: + /* The only other base types are structures, arrays, and samplers. + * Samplers cannot be constants, and the others should have been + * filtered out above. + */ + assert(!"Should not get here."); + return false; + } + } + + return true; +} + +bool +ir_constant::is_zero() const +{ + return is_value(0.0, 0); +} + +bool +ir_constant::is_one() const +{ + return is_value(1.0, 1); +} + +bool +ir_constant::is_negative_one() const +{ + return is_value(-1.0, -1); +} + +bool +ir_constant::is_uint16_constant() const +{ + if (!type->is_integer()) + return false; + + return value.u[0] < (1 << 16); +} + +ir_loop::ir_loop() + : ir_instruction(ir_type_loop) +{ +} + + +ir_dereference_variable::ir_dereference_variable(ir_variable *var) + : ir_dereference(ir_type_dereference_variable) +{ + assert(var != NULL); + + this->var = var; + this->type = var->type; +} + + +ir_dereference_array::ir_dereference_array(ir_rvalue *value, + ir_rvalue *array_index) + : ir_dereference(ir_type_dereference_array) +{ + this->array_index = array_index; + this->set_array(value); +} + + +ir_dereference_array::ir_dereference_array(ir_variable *var, + ir_rvalue *array_index) + : ir_dereference(ir_type_dereference_array) +{ + void *ctx = ralloc_parent(var); + + this->array_index = array_index; + this->set_array(new(ctx) ir_dereference_variable(var)); +} + + +void +ir_dereference_array::set_array(ir_rvalue *value) +{ + assert(value != NULL); + + this->array = value; + + const glsl_type *const vt = this->array->type; + + if (vt->is_array()) { + type = vt->fields.array; + } else if (vt->is_matrix()) { + type = vt->column_type(); + } else if (vt->is_vector()) { + type = vt->get_base_type(); + } +} + + +ir_dereference_record::ir_dereference_record(ir_rvalue *value, + const char *field) + : ir_dereference(ir_type_dereference_record) +{ + assert(value != NULL); + + this->record = value; + this->field = ralloc_strdup(this, field); + this->type = this->record->type->field_type(field); +} + + +ir_dereference_record::ir_dereference_record(ir_variable *var, + const char *field) + : ir_dereference(ir_type_dereference_record) +{ + void *ctx = ralloc_parent(var); + + this->record = new(ctx) ir_dereference_variable(var); + this->field = ralloc_strdup(this, field); + this->type = this->record->type->field_type(field); +} + +bool +ir_dereference::is_lvalue() const +{ + ir_variable *var = this->variable_referenced(); + + /* Every l-value derference chain eventually ends in a variable. + */ + if ((var == NULL) || var->data.read_only) + return false; + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "Opaque variables cannot be treated as l-values; hence cannot + * be used as out or inout function parameters, nor can they be + * assigned into." + */ + if (this->type->contains_opaque()) + return false; + + return true; +} + + +static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" }; + +const char *ir_texture::opcode_string() +{ + assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs)); + return tex_opcode_strs[op]; +} + +ir_texture_opcode +ir_texture::get_opcode(const char *str) +{ + const int count = sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]); + for (int op = 0; op < count; op++) { + if (strcmp(str, tex_opcode_strs[op]) == 0) + return (ir_texture_opcode) op; + } + return (ir_texture_opcode) -1; +} + + +void +ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type) +{ + assert(sampler != NULL); + assert(type != NULL); + this->sampler = sampler; + this->type = type; + + if (this->op == ir_txs || this->op == ir_query_levels || + this->op == ir_texture_samples) { + assert(type->base_type == GLSL_TYPE_INT); + } else if (this->op == ir_lod) { + assert(type->vector_elements == 2); + assert(type->base_type == GLSL_TYPE_FLOAT); + } else if (this->op == ir_samples_identical) { + assert(type == glsl_type::bool_type); + assert(sampler->type->base_type == GLSL_TYPE_SAMPLER); + assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS); + } else { + assert(sampler->type->sampler_type == (int) type->base_type); + if (sampler->type->sampler_shadow) + assert(type->vector_elements == 4 || type->vector_elements == 1); + else + assert(type->vector_elements == 4); + } +} + + +void +ir_swizzle::init_mask(const unsigned *comp, unsigned count) +{ + assert((count >= 1) && (count <= 4)); + + memset(&this->mask, 0, sizeof(this->mask)); + this->mask.num_components = count; + + unsigned dup_mask = 0; + switch (count) { + case 4: + assert(comp[3] <= 3); + dup_mask |= (1U << comp[3]) + & ((1U << comp[0]) | (1U << comp[1]) | (1U << comp[2])); + this->mask.w = comp[3]; + + case 3: + assert(comp[2] <= 3); + dup_mask |= (1U << comp[2]) + & ((1U << comp[0]) | (1U << comp[1])); + this->mask.z = comp[2]; + + case 2: + assert(comp[1] <= 3); + dup_mask |= (1U << comp[1]) + & ((1U << comp[0])); + this->mask.y = comp[1]; + + case 1: + assert(comp[0] <= 3); + this->mask.x = comp[0]; + } + + this->mask.has_duplicates = dup_mask != 0; + + /* Based on the number of elements in the swizzle and the base type + * (i.e., float, int, unsigned, or bool) of the vector being swizzled, + * generate the type of the resulting value. + */ + type = glsl_type::get_instance(val->type->base_type, mask.num_components, 1); +} + +ir_swizzle::ir_swizzle(ir_rvalue *val, unsigned x, unsigned y, unsigned z, + unsigned w, unsigned count) + : ir_rvalue(ir_type_swizzle), val(val) +{ + const unsigned components[4] = { x, y, z, w }; + this->init_mask(components, count); +} + +ir_swizzle::ir_swizzle(ir_rvalue *val, const unsigned *comp, + unsigned count) + : ir_rvalue(ir_type_swizzle), val(val) +{ + this->init_mask(comp, count); +} + +ir_swizzle::ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask) + : ir_rvalue(ir_type_swizzle) +{ + this->val = val; + this->mask = mask; + this->type = glsl_type::get_instance(val->type->base_type, + mask.num_components, 1); +} + +#define X 1 +#define R 5 +#define S 9 +#define I 13 + +ir_swizzle * +ir_swizzle::create(ir_rvalue *val, const char *str, unsigned vector_length) +{ + void *ctx = ralloc_parent(val); + + /* For each possible swizzle character, this table encodes the value in + * \c idx_map that represents the 0th element of the vector. For invalid + * swizzle characters (e.g., 'k'), a special value is used that will allow + * detection of errors. + */ + static const unsigned char base_idx[26] = { + /* a b c d e f g h i j k l m */ + R, R, I, I, I, I, R, I, I, I, I, I, I, + /* n o p q r s t u v w x y z */ + I, I, S, S, R, S, S, I, I, X, X, X, X + }; + + /* Each valid swizzle character has an entry in the previous table. This + * table encodes the base index encoded in the previous table plus the actual + * index of the swizzle character. When processing swizzles, the first + * character in the string is indexed in the previous table. Each character + * in the string is indexed in this table, and the value found there has the + * value form the first table subtracted. The result must be on the range + * [0,3]. + * + * For example, the string "wzyx" will get X from the first table. Each of + * the charcaters will get X+3, X+2, X+1, and X+0 from this table. After + * subtraction, the swizzle values are { 3, 2, 1, 0 }. + * + * The string "wzrg" will get X from the first table. Each of the characters + * will get X+3, X+2, R+0, and R+1 from this table. After subtraction, the + * swizzle values are { 3, 2, 4, 5 }. Since 4 and 5 are outside the range + * [0,3], the error is detected. + */ + static const unsigned char idx_map[26] = { + /* a b c d e f g h i j k l m */ + R+3, R+2, 0, 0, 0, 0, R+1, 0, 0, 0, 0, 0, 0, + /* n o p q r s t u v w x y z */ + 0, 0, S+2, S+3, R+0, S+0, S+1, 0, 0, X+3, X+0, X+1, X+2 + }; + + int swiz_idx[4] = { 0, 0, 0, 0 }; + unsigned i; + + + /* Validate the first character in the swizzle string and look up the base + * index value as described above. + */ + if ((str[0] < 'a') || (str[0] > 'z')) + return NULL; + + const unsigned base = base_idx[str[0] - 'a']; + + + for (i = 0; (i < 4) && (str[i] != '\0'); i++) { + /* Validate the next character, and, as described above, convert it to a + * swizzle index. + */ + if ((str[i] < 'a') || (str[i] > 'z')) + return NULL; + + swiz_idx[i] = idx_map[str[i] - 'a'] - base; + if ((swiz_idx[i] < 0) || (swiz_idx[i] >= (int) vector_length)) + return NULL; + } + + if (str[i] != '\0') + return NULL; + + return new(ctx) ir_swizzle(val, swiz_idx[0], swiz_idx[1], swiz_idx[2], + swiz_idx[3], i); +} + +#undef X +#undef R +#undef S +#undef I + +ir_variable * +ir_swizzle::variable_referenced() const +{ + return this->val->variable_referenced(); +} + + +bool ir_variable::temporaries_allocate_names = false; + +const char ir_variable::tmp_name[] = "compiler_temp"; + +ir_variable::ir_variable(const struct glsl_type *type, const char *name, + ir_variable_mode mode) + : ir_instruction(ir_type_variable) +{ + this->type = type; + + if (mode == ir_var_temporary && !ir_variable::temporaries_allocate_names) + name = NULL; + + /* The ir_variable clone method may call this constructor with name set to + * tmp_name. + */ + assert(name != NULL + || mode == ir_var_temporary + || mode == ir_var_function_in + || mode == ir_var_function_out + || mode == ir_var_function_inout); + assert(name != ir_variable::tmp_name + || mode == ir_var_temporary); + if (mode == ir_var_temporary + && (name == NULL || name == ir_variable::tmp_name)) { + this->name = ir_variable::tmp_name; + } else { + this->name = ralloc_strdup(this, name); + } + + this->u.max_ifc_array_access = NULL; + + this->data.explicit_location = false; + this->data.has_initializer = false; + this->data.location = -1; + this->data.location_frac = 0; + this->data.binding = 0; + this->data.warn_extension_index = 0; + this->constant_value = NULL; + this->constant_initializer = NULL; + this->data.origin_upper_left = false; + this->data.pixel_center_integer = false; + this->data.depth_layout = ir_depth_layout_none; + this->data.used = false; + this->data.always_active_io = false; + this->data.read_only = false; + this->data.centroid = false; + this->data.sample = false; + this->data.patch = false; + this->data.invariant = false; + this->data.how_declared = ir_var_declared_normally; + this->data.mode = mode; + this->data.interpolation = INTERP_QUALIFIER_NONE; + this->data.max_array_access = 0; + this->data.offset = 0; + this->data.precision = GLSL_PRECISION_NONE; + this->data.image_read_only = false; + this->data.image_write_only = false; + this->data.image_coherent = false; + this->data.image_volatile = false; + this->data.image_restrict = false; + this->data.from_ssbo_unsized_array = false; + + if (type != NULL) { + if (type->base_type == GLSL_TYPE_SAMPLER) + this->data.read_only = true; + + if (type->is_interface()) + this->init_interface_type(type); + else if (type->without_array()->is_interface()) + this->init_interface_type(type->without_array()); + } +} + + +const char * +interpolation_string(unsigned interpolation) +{ + switch (interpolation) { + case INTERP_QUALIFIER_NONE: return "no"; + case INTERP_QUALIFIER_SMOOTH: return "smooth"; + case INTERP_QUALIFIER_FLAT: return "flat"; + case INTERP_QUALIFIER_NOPERSPECTIVE: return "noperspective"; + } + + assert(!"Should not get here."); + return ""; +} + + +glsl_interp_qualifier +ir_variable::determine_interpolation_mode(bool flat_shade) +{ + if (this->data.interpolation != INTERP_QUALIFIER_NONE) + return (glsl_interp_qualifier) this->data.interpolation; + int location = this->data.location; + bool is_gl_Color = + location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1; + if (flat_shade && is_gl_Color) + return INTERP_QUALIFIER_FLAT; + else + return INTERP_QUALIFIER_SMOOTH; +} + +const char *const ir_variable::warn_extension_table[] = { + "", + "GL_ARB_shader_stencil_export", + "GL_AMD_shader_stencil_export", +}; + +void +ir_variable::enable_extension_warning(const char *extension) +{ + for (unsigned i = 0; i < ARRAY_SIZE(warn_extension_table); i++) { + if (strcmp(warn_extension_table[i], extension) == 0) { + this->data.warn_extension_index = i; + return; + } + } + + assert(!"Should not get here."); + this->data.warn_extension_index = 0; +} + +const char * +ir_variable::get_extension_warning() const +{ + return this->data.warn_extension_index == 0 + ? NULL : warn_extension_table[this->data.warn_extension_index]; +} + +ir_function_signature::ir_function_signature(const glsl_type *return_type, + builtin_available_predicate b) + : ir_instruction(ir_type_function_signature), + return_type(return_type), is_defined(false), is_intrinsic(false), + builtin_avail(b), _function(NULL) +{ + this->origin = NULL; +} + + +bool +ir_function_signature::is_builtin() const +{ + return builtin_avail != NULL; +} + + +bool +ir_function_signature::is_builtin_available(const _mesa_glsl_parse_state *state) const +{ + /* We can't call the predicate without a state pointer, so just say that + * the signature is available. At compile time, we need the filtering, + * but also receive a valid state pointer. At link time, we're resolving + * imported built-in prototypes to their definitions, which will always + * be an exact match. So we can skip the filtering. + */ + if (state == NULL) + return true; + + assert(builtin_avail != NULL); + return builtin_avail(state); +} + + +static bool +modes_match(unsigned a, unsigned b) +{ + if (a == b) + return true; + + /* Accept "in" vs. "const in" */ + if ((a == ir_var_const_in && b == ir_var_function_in) || + (b == ir_var_const_in && a == ir_var_function_in)) + return true; + + return false; +} + + +const char * +ir_function_signature::qualifiers_match(exec_list *params) +{ + /* check that the qualifiers match. */ + foreach_two_lists(a_node, &this->parameters, b_node, params) { + ir_variable *a = (ir_variable *) a_node; + ir_variable *b = (ir_variable *) b_node; + + if (a->data.read_only != b->data.read_only || + !modes_match(a->data.mode, b->data.mode) || + a->data.interpolation != b->data.interpolation || + a->data.centroid != b->data.centroid || + a->data.sample != b->data.sample || + a->data.patch != b->data.patch || + a->data.image_read_only != b->data.image_read_only || + a->data.image_write_only != b->data.image_write_only || + a->data.image_coherent != b->data.image_coherent || + a->data.image_volatile != b->data.image_volatile || + a->data.image_restrict != b->data.image_restrict) { + + /* parameter a's qualifiers don't match */ + return a->name; + } + } + return NULL; +} + + +void +ir_function_signature::replace_parameters(exec_list *new_params) +{ + /* Destroy all of the previous parameter information. If the previous + * parameter information comes from the function prototype, it may either + * specify incorrect parameter names or not have names at all. + */ + new_params->move_nodes_to(¶meters); +} + + +ir_function::ir_function(const char *name) + : ir_instruction(ir_type_function) +{ + this->subroutine_index = -1; + this->name = ralloc_strdup(this, name); +} + + +bool +ir_function::has_user_signature() +{ + foreach_in_list(ir_function_signature, sig, &this->signatures) { + if (!sig->is_builtin()) + return true; + } + return false; +} + + +ir_rvalue * +ir_rvalue::error_value(void *mem_ctx) +{ + ir_rvalue *v = new(mem_ctx) ir_rvalue(ir_type_unset); + + v->type = glsl_type::error_type; + return v; +} + + +void +visit_exec_list(exec_list *list, ir_visitor *visitor) +{ + foreach_in_list_safe(ir_instruction, node, list) { + node->accept(visitor); + } +} + + +static void +steal_memory(ir_instruction *ir, void *new_ctx) +{ + ir_variable *var = ir->as_variable(); + ir_function *fn = ir->as_function(); + ir_constant *constant = ir->as_constant(); + if (var != NULL && var->constant_value != NULL) + steal_memory(var->constant_value, ir); + + if (var != NULL && var->constant_initializer != NULL) + steal_memory(var->constant_initializer, ir); + + if (fn != NULL && fn->subroutine_types) + ralloc_steal(new_ctx, fn->subroutine_types); + + /* The components of aggregate constants are not visited by the normal + * visitor, so steal their values by hand. + */ + if (constant != NULL) { + if (constant->type->is_record()) { + foreach_in_list(ir_constant, field, &constant->components) { + steal_memory(field, ir); + } + } else if (constant->type->is_array()) { + for (unsigned int i = 0; i < constant->type->length; i++) { + steal_memory(constant->array_elements[i], ir); + } + } + } + + ralloc_steal(new_ctx, ir); +} + + +void +reparent_ir(exec_list *list, void *mem_ctx) +{ + foreach_in_list(ir_instruction, node, list) { + visit_tree(node, steal_memory, mem_ctx); + } +} + + +static ir_rvalue * +try_min_one(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (!expr || expr->operation != ir_binop_min) + return NULL; + + if (expr->operands[0]->is_one()) + return expr->operands[1]; + + if (expr->operands[1]->is_one()) + return expr->operands[0]; + + return NULL; +} + +static ir_rvalue * +try_max_zero(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (!expr || expr->operation != ir_binop_max) + return NULL; + + if (expr->operands[0]->is_zero()) + return expr->operands[1]; + + if (expr->operands[1]->is_zero()) + return expr->operands[0]; + + return NULL; +} + +ir_rvalue * +ir_rvalue::as_rvalue_to_saturate() +{ + ir_expression *expr = this->as_expression(); + + if (!expr) + return NULL; + + ir_rvalue *max_zero = try_max_zero(expr); + if (max_zero) { + return try_min_one(max_zero); + } else { + ir_rvalue *min_one = try_min_one(expr); + if (min_one) { + return try_max_zero(min_one); + } + } + + return NULL; +} + + +unsigned +vertices_per_prim(GLenum prim) +{ + switch (prim) { + case GL_POINTS: + return 1; + case GL_LINES: + return 2; + case GL_TRIANGLES: + return 3; + case GL_LINES_ADJACENCY: + return 4; + case GL_TRIANGLES_ADJACENCY: + return 6; + default: + assert(!"Bad primitive"); + return 3; + } +} + +/** + * Generate a string describing the mode of a variable + */ +const char * +mode_string(const ir_variable *var) +{ + switch (var->data.mode) { + case ir_var_auto: + return (var->data.read_only) ? "global constant" : "global variable"; + + case ir_var_uniform: + return "uniform"; + + case ir_var_shader_storage: + return "buffer"; + + case ir_var_shader_in: + return "shader input"; + + case ir_var_shader_out: + return "shader output"; + + case ir_var_function_in: + case ir_var_const_in: + return "function input"; + + case ir_var_function_out: + return "function output"; + + case ir_var_function_inout: + return "function inout"; + + case ir_var_system_value: + return "shader input"; + + case ir_var_temporary: + return "compiler temporary"; + + case ir_var_mode_count: + break; + } + + assert(!"Should not get here."); + return "invalid variable"; +} diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h new file mode 100644 index 0000000..bd7b550 --- /dev/null +++ b/src/compiler/glsl/ir.h @@ -0,0 +1,2632 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_H +#define IR_H + +#include <stdio.h> +#include <stdlib.h> + +#include "util/ralloc.h" +#include "compiler/glsl_types.h" +#include "list.h" +#include "ir_visitor.h" +#include "ir_hierarchical_visitor.h" +#include "main/mtypes.h" + +#ifdef __cplusplus + +/** + * \defgroup IR Intermediate representation nodes + * + * @{ + */ + +/** + * Class tags + * + * Each concrete class derived from \c ir_instruction has a value in this + * enumerant. The value for the type is stored in \c ir_instruction::ir_type + * by the constructor. While using type tags is not very C++, it is extremely + * convenient. For example, during debugging you can simply inspect + * \c ir_instruction::ir_type to find out the actual type of the object. + * + * In addition, it is possible to use a switch-statement based on \c + * \c ir_instruction::ir_type to select different behavior for different object + * types. For functions that have only slight differences for several object + * types, this allows writing very straightforward, readable code. + */ +enum ir_node_type { + ir_type_dereference_array, + ir_type_dereference_record, + ir_type_dereference_variable, + ir_type_constant, + ir_type_expression, + ir_type_swizzle, + ir_type_texture, + ir_type_variable, + ir_type_assignment, + ir_type_call, + ir_type_function, + ir_type_function_signature, + ir_type_if, + ir_type_loop, + ir_type_loop_jump, + ir_type_return, + ir_type_discard, + ir_type_emit_vertex, + ir_type_end_primitive, + ir_type_barrier, + ir_type_max, /**< maximum ir_type enum number, for validation */ + ir_type_unset = ir_type_max +}; + + +/** + * Base class of all IR instructions + */ +class ir_instruction : public exec_node { +public: + enum ir_node_type ir_type; + + /** + * GCC 4.7+ and clang warn when deleting an ir_instruction unless + * there's a virtual destructor present. Because we almost + * universally use ralloc for our memory management of + * ir_instructions, the destructor doesn't need to do any work. + */ + virtual ~ir_instruction() + { + } + + /** ir_print_visitor helper for debugging. */ + void print(void) const; + void fprint(FILE *f) const; + + virtual void accept(ir_visitor *) = 0; + virtual ir_visitor_status accept(ir_hierarchical_visitor *) = 0; + virtual ir_instruction *clone(void *mem_ctx, + struct hash_table *ht) const = 0; + + bool is_rvalue() const + { + return ir_type == ir_type_dereference_array || + ir_type == ir_type_dereference_record || + ir_type == ir_type_dereference_variable || + ir_type == ir_type_constant || + ir_type == ir_type_expression || + ir_type == ir_type_swizzle || + ir_type == ir_type_texture; + } + + bool is_dereference() const + { + return ir_type == ir_type_dereference_array || + ir_type == ir_type_dereference_record || + ir_type == ir_type_dereference_variable; + } + + bool is_jump() const + { + return ir_type == ir_type_loop_jump || + ir_type == ir_type_return || + ir_type == ir_type_discard; + } + + /** + * \name IR instruction downcast functions + * + * These functions either cast the object to a derived class or return + * \c NULL if the object's type does not match the specified derived class. + * Additional downcast functions will be added as needed. + */ + /*@{*/ + #define AS_BASE(TYPE) \ + class ir_##TYPE *as_##TYPE() \ + { \ + assume(this != NULL); \ + return is_##TYPE() ? (ir_##TYPE *) this : NULL; \ + } \ + const class ir_##TYPE *as_##TYPE() const \ + { \ + assume(this != NULL); \ + return is_##TYPE() ? (ir_##TYPE *) this : NULL; \ + } + + AS_BASE(rvalue) + AS_BASE(dereference) + AS_BASE(jump) + #undef AS_BASE + + #define AS_CHILD(TYPE) \ + class ir_##TYPE * as_##TYPE() \ + { \ + assume(this != NULL); \ + return ir_type == ir_type_##TYPE ? (ir_##TYPE *) this : NULL; \ + } \ + const class ir_##TYPE * as_##TYPE() const \ + { \ + assume(this != NULL); \ + return ir_type == ir_type_##TYPE ? (const ir_##TYPE *) this : NULL; \ + } + AS_CHILD(variable) + AS_CHILD(function) + AS_CHILD(dereference_array) + AS_CHILD(dereference_variable) + AS_CHILD(dereference_record) + AS_CHILD(expression) + AS_CHILD(loop) + AS_CHILD(assignment) + AS_CHILD(call) + AS_CHILD(return) + AS_CHILD(if) + AS_CHILD(swizzle) + AS_CHILD(texture) + AS_CHILD(constant) + AS_CHILD(discard) + #undef AS_CHILD + /*@}*/ + + /** + * IR equality method: Return true if the referenced instruction would + * return the same value as this one. + * + * This intended to be used for CSE and algebraic optimizations, on rvalues + * in particular. No support for other instruction types (assignments, + * jumps, calls, etc.) is planned. + */ + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + +protected: + ir_instruction(enum ir_node_type t) + : ir_type(t) + { + } + +private: + ir_instruction() + { + assert(!"Should not get here."); + } +}; + + +/** + * The base class for all "values"/expression trees. + */ +class ir_rvalue : public ir_instruction { +public: + const struct glsl_type *type; + + virtual ir_rvalue *clone(void *mem_ctx, struct hash_table *) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + ir_rvalue *as_rvalue_to_saturate(); + + virtual bool is_lvalue() const + { + return false; + } + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return NULL; + } + + + /** + * If an r-value is a reference to a whole variable, get that variable + * + * \return + * Pointer to a variable that is completely dereferenced by the r-value. If + * the r-value is not a dereference or the dereference does not access the + * entire variable (i.e., it's just one array element, struct field), \c NULL + * is returned. + */ + virtual ir_variable *whole_variable_referenced() + { + return NULL; + } + + /** + * Determine if an r-value has the value zero + * + * The base implementation of this function always returns \c false. The + * \c ir_constant class over-rides this function to return \c true \b only + * for vector and scalar types that have all elements set to the value + * zero (or \c false for booleans). + * + * \sa ir_constant::has_value, ir_rvalue::is_one, ir_rvalue::is_negative_one + */ + virtual bool is_zero() const; + + /** + * Determine if an r-value has the value one + * + * The base implementation of this function always returns \c false. The + * \c ir_constant class over-rides this function to return \c true \b only + * for vector and scalar types that have all elements set to the value + * one (or \c true for booleans). + * + * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_negative_one + */ + virtual bool is_one() const; + + /** + * Determine if an r-value has the value negative one + * + * The base implementation of this function always returns \c false. The + * \c ir_constant class over-rides this function to return \c true \b only + * for vector and scalar types that have all elements set to the value + * negative one. For boolean types, the result is always \c false. + * + * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_one + */ + virtual bool is_negative_one() const; + + /** + * Determine if an r-value is an unsigned integer constant which can be + * stored in 16 bits. + * + * \sa ir_constant::is_uint16_constant. + */ + virtual bool is_uint16_constant() const { return false; } + + /** + * Return a generic value of error_type. + * + * Allocation will be performed with 'mem_ctx' as ralloc owner. + */ + static ir_rvalue *error_value(void *mem_ctx); + +protected: + ir_rvalue(enum ir_node_type t); +}; + + +/** + * Variable storage classes + */ +enum ir_variable_mode { + ir_var_auto = 0, /**< Function local variables and globals. */ + ir_var_uniform, /**< Variable declared as a uniform. */ + ir_var_shader_storage, /**< Variable declared as an ssbo. */ + ir_var_shader_shared, /**< Variable declared as shared. */ + ir_var_shader_in, + ir_var_shader_out, + ir_var_function_in, + ir_var_function_out, + ir_var_function_inout, + ir_var_const_in, /**< "in" param that must be a constant expression */ + ir_var_system_value, /**< Ex: front-face, instance-id, etc. */ + ir_var_temporary, /**< Temporary variable generated during compilation. */ + ir_var_mode_count /**< Number of variable modes */ +}; + +/** + * Enum keeping track of how a variable was declared. For error checking of + * the gl_PerVertex redeclaration rules. + */ +enum ir_var_declaration_type { + /** + * Normal declaration (for most variables, this means an explicit + * declaration. Exception: temporaries are always implicitly declared, but + * they still use ir_var_declared_normally). + * + * Note: an ir_variable that represents a named interface block uses + * ir_var_declared_normally. + */ + ir_var_declared_normally = 0, + + /** + * Variable was explicitly declared (or re-declared) in an unnamed + * interface block. + */ + ir_var_declared_in_block, + + /** + * Variable is an implicitly declared built-in that has not been explicitly + * re-declared by the shader. + */ + ir_var_declared_implicitly, + + /** + * Variable is implicitly generated by the compiler and should not be + * visible via the API. + */ + ir_var_hidden, +}; + +/** + * \brief Layout qualifiers for gl_FragDepth. + * + * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared + * with a layout qualifier. + */ +enum ir_depth_layout { + ir_depth_layout_none, /**< No depth layout is specified. */ + ir_depth_layout_any, + ir_depth_layout_greater, + ir_depth_layout_less, + ir_depth_layout_unchanged +}; + +/** + * \brief Convert depth layout qualifier to string. + */ +const char* +depth_layout_string(ir_depth_layout layout); + +/** + * Description of built-in state associated with a uniform + * + * \sa ir_variable::state_slots + */ +struct ir_state_slot { + int tokens[5]; + int swizzle; +}; + + +/** + * Get the string value for an interpolation qualifier + * + * \return The string that would be used in a shader to specify \c + * mode will be returned. + * + * This function is used to generate error messages of the form "shader + * uses %s interpolation qualifier", so in the case where there is no + * interpolation qualifier, it returns "no". + * + * This function should only be used on a shader input or output variable. + */ +const char *interpolation_string(unsigned interpolation); + + +class ir_variable : public ir_instruction { +public: + ir_variable(const struct glsl_type *, const char *, ir_variable_mode); + + virtual ir_variable *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + + /** + * Determine how this variable should be interpolated based on its + * interpolation qualifier (if present), whether it is gl_Color or + * gl_SecondaryColor, and whether flatshading is enabled in the current GL + * state. + * + * The return value will always be either INTERP_QUALIFIER_SMOOTH, + * INTERP_QUALIFIER_NOPERSPECTIVE, or INTERP_QUALIFIER_FLAT. + */ + glsl_interp_qualifier determine_interpolation_mode(bool flat_shade); + + /** + * Determine whether or not a variable is part of a uniform or + * shader storage block. + */ + inline bool is_in_buffer_block() const + { + return (this->data.mode == ir_var_uniform || + this->data.mode == ir_var_shader_storage) && + this->interface_type != NULL; + } + + /** + * Determine whether or not a variable is part of a shader storage block. + */ + inline bool is_in_shader_storage_block() const + { + return this->data.mode == ir_var_shader_storage && + this->interface_type != NULL; + } + + /** + * Determine whether or not a variable is the declaration of an interface + * block + * + * For the first declaration below, there will be an \c ir_variable named + * "instance" whose type and whose instance_type will be the same + * \cglsl_type. For the second declaration, there will be an \c ir_variable + * named "f" whose type is float and whose instance_type is B2. + * + * "instance" is an interface instance variable, but "f" is not. + * + * uniform B1 { + * float f; + * } instance; + * + * uniform B2 { + * float f; + * }; + */ + inline bool is_interface_instance() const + { + return this->type->without_array() == this->interface_type; + } + + /** + * Set this->interface_type on a newly created variable. + */ + void init_interface_type(const struct glsl_type *type) + { + assert(this->interface_type == NULL); + this->interface_type = type; + if (this->is_interface_instance()) { + this->u.max_ifc_array_access = + rzalloc_array(this, unsigned, type->length); + } + } + + /** + * Change this->interface_type on a variable that previously had a + * different, but compatible, interface_type. This is used during linking + * to set the size of arrays in interface blocks. + */ + void change_interface_type(const struct glsl_type *type) + { + if (this->u.max_ifc_array_access != NULL) { + /* max_ifc_array_access has already been allocated, so make sure the + * new interface has the same number of fields as the old one. + */ + assert(this->interface_type->length == type->length); + } + this->interface_type = type; + } + + /** + * Change this->interface_type on a variable that previously had a + * different, and incompatible, interface_type. This is used during + * compilation to handle redeclaration of the built-in gl_PerVertex + * interface block. + */ + void reinit_interface_type(const struct glsl_type *type) + { + if (this->u.max_ifc_array_access != NULL) { +#ifndef NDEBUG + /* Redeclaring gl_PerVertex is only allowed if none of the built-ins + * it defines have been accessed yet; so it's safe to throw away the + * old max_ifc_array_access pointer, since all of its values are + * zero. + */ + for (unsigned i = 0; i < this->interface_type->length; i++) + assert(this->u.max_ifc_array_access[i] == 0); +#endif + ralloc_free(this->u.max_ifc_array_access); + this->u.max_ifc_array_access = NULL; + } + this->interface_type = NULL; + init_interface_type(type); + } + + const glsl_type *get_interface_type() const + { + return this->interface_type; + } + + /** + * Get the max_ifc_array_access pointer + * + * A "set" function is not needed because the array is dynmically allocated + * as necessary. + */ + inline unsigned *get_max_ifc_array_access() + { + assert(this->data._num_state_slots == 0); + return this->u.max_ifc_array_access; + } + + inline unsigned get_num_state_slots() const + { + assert(!this->is_interface_instance() + || this->data._num_state_slots == 0); + return this->data._num_state_slots; + } + + inline void set_num_state_slots(unsigned n) + { + assert(!this->is_interface_instance() + || n == 0); + this->data._num_state_slots = n; + } + + inline ir_state_slot *get_state_slots() + { + return this->is_interface_instance() ? NULL : this->u.state_slots; + } + + inline const ir_state_slot *get_state_slots() const + { + return this->is_interface_instance() ? NULL : this->u.state_slots; + } + + inline ir_state_slot *allocate_state_slots(unsigned n) + { + assert(!this->is_interface_instance()); + + this->u.state_slots = ralloc_array(this, ir_state_slot, n); + this->data._num_state_slots = 0; + + if (this->u.state_slots != NULL) + this->data._num_state_slots = n; + + return this->u.state_slots; + } + + inline bool is_name_ralloced() const + { + return this->name != ir_variable::tmp_name; + } + + /** + * Enable emitting extension warnings for this variable + */ + void enable_extension_warning(const char *extension); + + /** + * Get the extension warning string for this variable + * + * If warnings are not enabled, \c NULL is returned. + */ + const char *get_extension_warning() const; + + /** + * Declared type of the variable + */ + const struct glsl_type *type; + + /** + * Declared name of the variable + */ + const char *name; + + struct ir_variable_data { + + /** + * Is the variable read-only? + * + * This is set for variables declared as \c const, shader inputs, + * and uniforms. + */ + unsigned read_only:1; + unsigned centroid:1; + unsigned sample:1; + unsigned patch:1; + unsigned invariant:1; + unsigned precise:1; + + /** + * Has this variable been used for reading or writing? + * + * Several GLSL semantic checks require knowledge of whether or not a + * variable has been used. For example, it is an error to redeclare a + * variable as invariant after it has been used. + * + * This is only maintained in the ast_to_hir.cpp path, not in + * Mesa's fixed function or ARB program paths. + */ + unsigned used:1; + + /** + * Has this variable been statically assigned? + * + * This answers whether the variable was assigned in any path of + * the shader during ast_to_hir. This doesn't answer whether it is + * still written after dead code removal, nor is it maintained in + * non-ast_to_hir.cpp (GLSL parsing) paths. + */ + unsigned assigned:1; + + /** + * When separate shader programs are enabled, only input/outputs between + * the stages of a multi-stage separate program can be safely removed + * from the shader interface. Other input/outputs must remains active. + */ + unsigned always_active_io:1; + + /** + * Enum indicating how the variable was declared. See + * ir_var_declaration_type. + * + * This is used to detect certain kinds of illegal variable redeclarations. + */ + unsigned how_declared:2; + + /** + * Storage class of the variable. + * + * \sa ir_variable_mode + */ + unsigned mode:4; + + /** + * Interpolation mode for shader inputs / outputs + * + * \sa ir_variable_interpolation + */ + unsigned interpolation:2; + + /** + * \name ARB_fragment_coord_conventions + * @{ + */ + unsigned origin_upper_left:1; + unsigned pixel_center_integer:1; + /*@}*/ + + /** + * Was the location explicitly set in the shader? + * + * If the location is explicitly set in the shader, it \b cannot be changed + * by the linker or by the API (e.g., calls to \c glBindAttribLocation have + * no effect). + */ + unsigned explicit_location:1; + unsigned explicit_index:1; + + /** + * Was an initial binding explicitly set in the shader? + * + * If so, constant_value contains an integer ir_constant representing the + * initial binding point. + */ + unsigned explicit_binding:1; + + /** + * Does this variable have an initializer? + * + * This is used by the linker to cross-validiate initializers of global + * variables. + */ + unsigned has_initializer:1; + + /** + * Is this variable a generic output or input that has not yet been matched + * up to a variable in another stage of the pipeline? + * + * This is used by the linker as scratch storage while assigning locations + * to generic inputs and outputs. + */ + unsigned is_unmatched_generic_inout:1; + + /** + * If non-zero, then this variable may be packed along with other variables + * into a single varying slot, so this offset should be applied when + * accessing components. For example, an offset of 1 means that the x + * component of this variable is actually stored in component y of the + * location specified by \c location. + */ + unsigned location_frac:2; + + /** + * Layout of the matrix. Uses glsl_matrix_layout values. + */ + unsigned matrix_layout:2; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was not an array. + * + * Note that this variable and \c from_named_ifc_block_array will never + * both be non-zero. + */ + unsigned from_named_ifc_block_nonarray:1; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was an array. + * + * Note that this variable and \c from_named_ifc_block_nonarray will never + * both be non-zero. + */ + unsigned from_named_ifc_block_array:1; + + /** + * Non-zero if the variable must be a shader input. This is useful for + * constraints on function parameters. + */ + unsigned must_be_shader_input:1; + + /** + * Output index for dual source blending. + * + * \note + * The GLSL spec only allows the values 0 or 1 for the index in \b dual + * source blending. + */ + unsigned index:1; + + /** + * Precision qualifier. + * + * In desktop GLSL we do not care about precision qualifiers at all, in + * fact, the spec says that precision qualifiers are ignored. + * + * To make things easy, we make it so that this field is always + * GLSL_PRECISION_NONE on desktop shaders. This way all the variables + * have the same precision value and the checks we add in the compiler + * for this field will never break a desktop shader compile. + */ + unsigned precision:2; + + /** + * \brief Layout qualifier for gl_FragDepth. + * + * This is not equal to \c ir_depth_layout_none if and only if this + * variable is \c gl_FragDepth and a layout qualifier is specified. + */ + ir_depth_layout depth_layout:3; + + /** + * ARB_shader_image_load_store qualifiers. + */ + unsigned image_read_only:1; /**< "readonly" qualifier. */ + unsigned image_write_only:1; /**< "writeonly" qualifier. */ + unsigned image_coherent:1; + unsigned image_volatile:1; + unsigned image_restrict:1; + + /** + * ARB_shader_storage_buffer_object + */ + unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. */ + + /** + * Emit a warning if this variable is accessed. + */ + private: + uint8_t warn_extension_index; + + public: + /** Image internal format if specified explicitly, otherwise GL_NONE. */ + uint16_t image_format; + + private: + /** + * Number of state slots used + * + * \note + * This could be stored in as few as 7-bits, if necessary. If it is made + * smaller, add an assertion to \c ir_variable::allocate_state_slots to + * be safe. + */ + uint16_t _num_state_slots; + + public: + /** + * Initial binding point for a sampler, atomic, or UBO. + * + * For array types, this represents the binding point for the first element. + */ + int16_t binding; + + /** + * Storage location of the base of this variable + * + * The precise meaning of this field depends on the nature of the variable. + * + * - Vertex shader input: one of the values from \c gl_vert_attrib. + * - Vertex shader output: one of the values from \c gl_varying_slot. + * - Geometry shader input: one of the values from \c gl_varying_slot. + * - Geometry shader output: one of the values from \c gl_varying_slot. + * - Fragment shader input: one of the values from \c gl_varying_slot. + * - Fragment shader output: one of the values from \c gl_frag_result. + * - Uniforms: Per-stage uniform slot number for default uniform block. + * - Uniforms: Index within the uniform block definition for UBO members. + * - Non-UBO Uniforms: explicit location until linking then reused to + * store uniform slot number. + * - Other: This field is not currently used. + * + * If the variable is a uniform, shader input, or shader output, and the + * slot has not been assigned, the value will be -1. + */ + int location; + + /** + * Vertex stream output identifier. + */ + unsigned stream; + + /** + * Location an atomic counter is stored at. + */ + unsigned offset; + + /** + * Highest element accessed with a constant expression array index + * + * Not used for non-array variables. + */ + unsigned max_array_access; + + /** + * Allow (only) ir_variable direct access private members. + */ + friend class ir_variable; + } data; + + /** + * Value assigned in the initializer of a variable declared "const" + */ + ir_constant *constant_value; + + /** + * Constant expression assigned in the initializer of the variable + * + * \warning + * This field and \c ::constant_value are distinct. Even if the two fields + * refer to constants with the same value, they must point to separate + * objects. + */ + ir_constant *constant_initializer; + +private: + static const char *const warn_extension_table[]; + + union { + /** + * For variables which satisfy the is_interface_instance() predicate, + * this points to an array of integers such that if the ith member of + * the interface block is an array, max_ifc_array_access[i] is the + * maximum array element of that member that has been accessed. If the + * ith member of the interface block is not an array, + * max_ifc_array_access[i] is unused. + * + * For variables whose type is not an interface block, this pointer is + * NULL. + */ + unsigned *max_ifc_array_access; + + /** + * Built-in state that backs this uniform + * + * Once set at variable creation, \c state_slots must remain invariant. + * + * If the variable is not a uniform, \c _num_state_slots will be zero + * and \c state_slots will be \c NULL. + */ + ir_state_slot *state_slots; + } u; + + /** + * For variables that are in an interface block or are an instance of an + * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. + * + * \sa ir_variable::location + */ + const glsl_type *interface_type; + + /** + * Name used for anonymous compiler temporaries + */ + static const char tmp_name[]; + +public: + /** + * Should the construct keep names for ir_var_temporary variables? + * + * When this global is false, names passed to the constructor for + * \c ir_var_temporary variables will be dropped. Instead, the variable will + * be named "compiler_temp". This name will be in static storage. + * + * \warning + * \b NEVER change the mode of an \c ir_var_temporary. + * + * \warning + * This variable is \b not thread-safe. It is global, \b not + * per-context. It begins life false. A context can, at some point, make + * it true. From that point on, it will be true forever. This should be + * okay since it will only be set true while debugging. + */ + static bool temporaries_allocate_names; +}; + +/** + * A function that returns whether a built-in function is available in the + * current shading language (based on version, ES or desktop, and extensions). + */ +typedef bool (*builtin_available_predicate)(const _mesa_glsl_parse_state *); + +/*@{*/ +/** + * The representation of a function instance; may be the full definition or + * simply a prototype. + */ +class ir_function_signature : public ir_instruction { + /* An ir_function_signature will be part of the list of signatures in + * an ir_function. + */ +public: + ir_function_signature(const glsl_type *return_type, + builtin_available_predicate builtin_avail = NULL); + + virtual ir_function_signature *clone(void *mem_ctx, + struct hash_table *ht) const; + ir_function_signature *clone_prototype(void *mem_ctx, + struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Attempt to evaluate this function as a constant expression, + * given a list of the actual parameters and the variable context. + * Returns NULL for non-built-ins. + */ + ir_constant *constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context); + + /** + * Get the name of the function for which this is a signature + */ + const char *function_name() const; + + /** + * Get a handle to the function for which this is a signature + * + * There is no setter function, this function returns a \c const pointer, + * and \c ir_function_signature::_function is private for a reason. The + * only way to make a connection between a function and function signature + * is via \c ir_function::add_signature. This helps ensure that certain + * invariants (i.e., a function signature is in the list of signatures for + * its \c _function) are met. + * + * \sa ir_function::add_signature + */ + inline const class ir_function *function() const + { + return this->_function; + } + + /** + * Check whether the qualifiers match between this signature's parameters + * and the supplied parameter list. If not, returns the name of the first + * parameter with mismatched qualifiers (for use in error messages). + */ + const char *qualifiers_match(exec_list *params); + + /** + * Replace the current parameter list with the given one. This is useful + * if the current information came from a prototype, and either has invalid + * or missing parameter names. + */ + void replace_parameters(exec_list *new_params); + + /** + * Function return type. + * + * \note This discards the optional precision qualifier. + */ + const struct glsl_type *return_type; + + /** + * List of ir_variable of function parameters. + * + * This represents the storage. The paramaters passed in a particular + * call will be in ir_call::actual_paramaters. + */ + struct exec_list parameters; + + /** Whether or not this function has a body (which may be empty). */ + unsigned is_defined:1; + + /** Whether or not this function signature is a built-in. */ + bool is_builtin() const; + + /** + * Whether or not this function is an intrinsic to be implemented + * by the driver. + */ + bool is_intrinsic; + + /** Whether or not a built-in is available for this shader. */ + bool is_builtin_available(const _mesa_glsl_parse_state *state) const; + + /** Body of instructions in the function. */ + struct exec_list body; + +private: + /** + * A function pointer to a predicate that answers whether a built-in + * function is available in the current shader. NULL if not a built-in. + */ + builtin_available_predicate builtin_avail; + + /** Function of which this signature is one overload. */ + class ir_function *_function; + + /** Function signature of which this one is a prototype clone */ + const ir_function_signature *origin; + + friend class ir_function; + + /** + * Helper function to run a list of instructions for constant + * expression evaluation. + * + * The hash table represents the values of the visible variables. + * There are no scoping issues because the table is indexed on + * ir_variable pointers, not variable names. + * + * Returns false if the expression is not constant, true otherwise, + * and the value in *result if result is non-NULL. + */ + bool constant_expression_evaluate_expression_list(const struct exec_list &body, + struct hash_table *variable_context, + ir_constant **result); +}; + + +/** + * Header for tracking multiple overloaded functions with the same name. + * Contains a list of ir_function_signatures representing each of the + * actual functions. + */ +class ir_function : public ir_instruction { +public: + ir_function(const char *name); + + virtual ir_function *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + void add_signature(ir_function_signature *sig) + { + sig->_function = this; + this->signatures.push_tail(sig); + } + + /** + * Find a signature that matches a set of actual parameters, taking implicit + * conversions into account. Also flags whether the match was exact. + */ + ir_function_signature *matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_param, + bool allow_builtins, + bool *match_is_exact); + + /** + * Find a signature that matches a set of actual parameters, taking implicit + * conversions into account. + */ + ir_function_signature *matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_param, + bool allow_builtins); + + /** + * Find a signature that exactly matches a set of actual parameters without + * any implicit type conversions. + */ + ir_function_signature *exact_matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_ps); + + /** + * Name of the function. + */ + const char *name; + + /** Whether or not this function has a signature that isn't a built-in. */ + bool has_user_signature(); + + /** + * List of ir_function_signature for each overloaded function with this name. + */ + struct exec_list signatures; + + /** + * is this function a subroutine type declaration + * e.g. subroutine void type1(float arg1); + */ + bool is_subroutine; + + /** + * is this function associated to a subroutine type + * e.g. subroutine (type1, type2) function_name { function_body }; + * would have num_subroutine_types 2, + * and pointers to the type1 and type2 types. + */ + int num_subroutine_types; + const struct glsl_type **subroutine_types; + + int subroutine_index; +}; + +inline const char *ir_function_signature::function_name() const +{ + return this->_function->name; +} +/*@}*/ + + +/** + * IR instruction representing high-level if-statements + */ +class ir_if : public ir_instruction { +public: + ir_if(ir_rvalue *condition) + : ir_instruction(ir_type_if), condition(condition) + { + } + + virtual ir_if *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *condition; + /** List of ir_instruction for the body of the then branch */ + exec_list then_instructions; + /** List of ir_instruction for the body of the else branch */ + exec_list else_instructions; +}; + + +/** + * IR instruction representing a high-level loop structure. + */ +class ir_loop : public ir_instruction { +public: + ir_loop(); + + virtual ir_loop *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** List of ir_instruction that make up the body of the loop. */ + exec_list body_instructions; +}; + + +class ir_assignment : public ir_instruction { +public: + ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, ir_rvalue *condition = NULL); + + /** + * Construct an assignment with an explicit write mask + * + * \note + * Since a write mask is supplied, the LHS must already be a bare + * \c ir_dereference. The cannot be any swizzles in the LHS. + */ + ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, ir_rvalue *condition, + unsigned write_mask); + + virtual ir_assignment *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Get a whole variable written by an assignment + * + * If the LHS of the assignment writes a whole variable, the variable is + * returned. Otherwise \c NULL is returned. Examples of whole-variable + * assignment are: + * + * - Assigning to a scalar + * - Assigning to all components of a vector + * - Whole array (or matrix) assignment + * - Whole structure assignment + */ + ir_variable *whole_variable_written(); + + /** + * Set the LHS of an assignment + */ + void set_lhs(ir_rvalue *lhs); + + /** + * Left-hand side of the assignment. + * + * This should be treated as read only. If you need to set the LHS of an + * assignment, use \c ir_assignment::set_lhs. + */ + ir_dereference *lhs; + + /** + * Value being assigned + */ + ir_rvalue *rhs; + + /** + * Optional condition for the assignment. + */ + ir_rvalue *condition; + + + /** + * Component mask written + * + * For non-vector types in the LHS, this field will be zero. For vector + * types, a bit will be set for each component that is written. Note that + * for \c vec2 and \c vec3 types only the lower bits will ever be set. + * + * A partially-set write mask means that each enabled channel gets + * the value from a consecutive channel of the rhs. For example, + * to write just .xyw of gl_FrontColor with color: + * + * (assign (constant bool (1)) (xyw) + * (var_ref gl_FragColor) + * (swiz xyw (var_ref color))) + */ + unsigned write_mask:4; +}; + +/* Update ir_expression::get_num_operands() and operator_strs when + * updating this list. + */ +enum ir_expression_operation { + ir_unop_bit_not, + ir_unop_logic_not, + ir_unop_neg, + ir_unop_abs, + ir_unop_sign, + ir_unop_rcp, + ir_unop_rsq, + ir_unop_sqrt, + ir_unop_exp, /**< Log base e on gentype */ + ir_unop_log, /**< Natural log on gentype */ + ir_unop_exp2, + ir_unop_log2, + ir_unop_f2i, /**< Float-to-integer conversion. */ + ir_unop_f2u, /**< Float-to-unsigned conversion. */ + ir_unop_i2f, /**< Integer-to-float conversion. */ + ir_unop_f2b, /**< Float-to-boolean conversion */ + ir_unop_b2f, /**< Boolean-to-float conversion */ + ir_unop_i2b, /**< int-to-boolean conversion */ + ir_unop_b2i, /**< Boolean-to-int conversion */ + ir_unop_u2f, /**< Unsigned-to-float conversion. */ + ir_unop_i2u, /**< Integer-to-unsigned conversion. */ + ir_unop_u2i, /**< Unsigned-to-integer conversion. */ + ir_unop_d2f, /**< Double-to-float conversion. */ + ir_unop_f2d, /**< Float-to-double conversion. */ + ir_unop_d2i, /**< Double-to-integer conversion. */ + ir_unop_i2d, /**< Integer-to-double conversion. */ + ir_unop_d2u, /**< Double-to-unsigned conversion. */ + ir_unop_u2d, /**< Unsigned-to-double conversion. */ + ir_unop_d2b, /**< Double-to-boolean conversion. */ + ir_unop_bitcast_i2f, /**< Bit-identical int-to-float "conversion" */ + ir_unop_bitcast_f2i, /**< Bit-identical float-to-int "conversion" */ + ir_unop_bitcast_u2f, /**< Bit-identical uint-to-float "conversion" */ + ir_unop_bitcast_f2u, /**< Bit-identical float-to-uint "conversion" */ + + /** + * \name Unary floating-point rounding operations. + */ + /*@{*/ + ir_unop_trunc, + ir_unop_ceil, + ir_unop_floor, + ir_unop_fract, + ir_unop_round_even, + /*@}*/ + + /** + * \name Trigonometric operations. + */ + /*@{*/ + ir_unop_sin, + ir_unop_cos, + /*@}*/ + + /** + * \name Partial derivatives. + */ + /*@{*/ + ir_unop_dFdx, + ir_unop_dFdx_coarse, + ir_unop_dFdx_fine, + ir_unop_dFdy, + ir_unop_dFdy_coarse, + ir_unop_dFdy_fine, + /*@}*/ + + /** + * \name Floating point pack and unpack operations. + */ + /*@{*/ + ir_unop_pack_snorm_2x16, + ir_unop_pack_snorm_4x8, + ir_unop_pack_unorm_2x16, + ir_unop_pack_unorm_4x8, + ir_unop_pack_half_2x16, + ir_unop_unpack_snorm_2x16, + ir_unop_unpack_snorm_4x8, + ir_unop_unpack_unorm_2x16, + ir_unop_unpack_unorm_4x8, + ir_unop_unpack_half_2x16, + /*@}*/ + + /** + * \name Lowered floating point unpacking operations. + * + * \see lower_packing_builtins_visitor::split_unpack_half_2x16 + */ + /*@{*/ + ir_unop_unpack_half_2x16_split_x, + ir_unop_unpack_half_2x16_split_y, + /*@}*/ + + /** + * \name Bit operations, part of ARB_gpu_shader5. + */ + /*@{*/ + ir_unop_bitfield_reverse, + ir_unop_bit_count, + ir_unop_find_msb, + ir_unop_find_lsb, + /*@}*/ + + ir_unop_saturate, + + /** + * \name Double packing, part of ARB_gpu_shader_fp64. + */ + /*@{*/ + ir_unop_pack_double_2x32, + ir_unop_unpack_double_2x32, + /*@}*/ + + ir_unop_frexp_sig, + ir_unop_frexp_exp, + + ir_unop_noise, + + ir_unop_subroutine_to_int, + /** + * Interpolate fs input at centroid + * + * operand0 is the fs input. + */ + ir_unop_interpolate_at_centroid, + + /** + * Ask the driver for the total size of a buffer block. + * + * operand0 is the ir_constant buffer block index in the linked shader. + */ + ir_unop_get_buffer_size, + + /** + * Calculate length of an unsized array inside a buffer block. + * This opcode is going to be replaced in a lowering pass inside + * the linker. + * + * operand0 is the unsized array's ir_value for the calculation + * of its length. + */ + ir_unop_ssbo_unsized_array_length, + + /** + * A sentinel marking the last of the unary operations. + */ + ir_last_unop = ir_unop_ssbo_unsized_array_length, + + ir_binop_add, + ir_binop_sub, + ir_binop_mul, /**< Floating-point or low 32-bit integer multiply. */ + ir_binop_imul_high, /**< Calculates the high 32-bits of a 64-bit multiply. */ + ir_binop_div, + + /** + * Returns the carry resulting from the addition of the two arguments. + */ + /*@{*/ + ir_binop_carry, + /*@}*/ + + /** + * Returns the borrow resulting from the subtraction of the second argument + * from the first argument. + */ + /*@{*/ + ir_binop_borrow, + /*@}*/ + + /** + * Takes one of two combinations of arguments: + * + * - mod(vecN, vecN) + * - mod(vecN, float) + * + * Does not take integer types. + */ + ir_binop_mod, + + /** + * \name Binary comparison operators which return a boolean vector. + * The type of both operands must be equal. + */ + /*@{*/ + ir_binop_less, + ir_binop_greater, + ir_binop_lequal, + ir_binop_gequal, + ir_binop_equal, + ir_binop_nequal, + /** + * Returns single boolean for whether all components of operands[0] + * equal the components of operands[1]. + */ + ir_binop_all_equal, + /** + * Returns single boolean for whether any component of operands[0] + * is not equal to the corresponding component of operands[1]. + */ + ir_binop_any_nequal, + /*@}*/ + + /** + * \name Bit-wise binary operations. + */ + /*@{*/ + ir_binop_lshift, + ir_binop_rshift, + ir_binop_bit_and, + ir_binop_bit_xor, + ir_binop_bit_or, + /*@}*/ + + ir_binop_logic_and, + ir_binop_logic_xor, + ir_binop_logic_or, + + ir_binop_dot, + ir_binop_min, + ir_binop_max, + + ir_binop_pow, + + /** + * \name Lowered floating point packing operations. + * + * \see lower_packing_builtins_visitor::split_pack_half_2x16 + */ + /*@{*/ + ir_binop_pack_half_2x16_split, + /*@}*/ + + /** + * Load a value the size of a given GLSL type from a uniform block. + * + * operand0 is the ir_constant uniform block index in the linked shader. + * operand1 is a byte offset within the uniform block. + */ + ir_binop_ubo_load, + + /** + * \name Multiplies a number by two to a power, part of ARB_gpu_shader5. + */ + /*@{*/ + ir_binop_ldexp, + /*@}*/ + + /** + * Extract a scalar from a vector + * + * operand0 is the vector + * operand1 is the index of the field to read from operand0 + */ + ir_binop_vector_extract, + + /** + * Interpolate fs input at offset + * + * operand0 is the fs input + * operand1 is the offset from the pixel center + */ + ir_binop_interpolate_at_offset, + + /** + * Interpolate fs input at sample position + * + * operand0 is the fs input + * operand1 is the sample ID + */ + ir_binop_interpolate_at_sample, + + /** + * A sentinel marking the last of the binary operations. + */ + ir_last_binop = ir_binop_interpolate_at_sample, + + /** + * \name Fused floating-point multiply-add, part of ARB_gpu_shader5. + */ + /*@{*/ + ir_triop_fma, + /*@}*/ + + ir_triop_lrp, + + /** + * \name Conditional Select + * + * A vector conditional select instruction (like ?:, but operating per- + * component on vectors). + * + * \see lower_instructions_visitor::ldexp_to_arith + */ + /*@{*/ + ir_triop_csel, + /*@}*/ + + ir_triop_bitfield_extract, + + /** + * Generate a value with one field of a vector changed + * + * operand0 is the vector + * operand1 is the value to write into the vector result + * operand2 is the index in operand0 to be modified + */ + ir_triop_vector_insert, + + /** + * A sentinel marking the last of the ternary operations. + */ + ir_last_triop = ir_triop_vector_insert, + + ir_quadop_bitfield_insert, + + ir_quadop_vector, + + /** + * A sentinel marking the last of the ternary operations. + */ + ir_last_quadop = ir_quadop_vector, + + /** + * A sentinel marking the last of all operations. + */ + ir_last_opcode = ir_quadop_vector +}; + +class ir_expression : public ir_rvalue { +public: + ir_expression(int op, const struct glsl_type *type, + ir_rvalue *op0, ir_rvalue *op1 = NULL, + ir_rvalue *op2 = NULL, ir_rvalue *op3 = NULL); + + /** + * Constructor for unary operation expressions + */ + ir_expression(int op, ir_rvalue *); + + /** + * Constructor for binary operation expressions + */ + ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1); + + /** + * Constructor for ternary operation expressions + */ + ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, ir_rvalue *op2); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + virtual ir_expression *clone(void *mem_ctx, struct hash_table *ht) const; + + /** + * Attempt to constant-fold the expression + * + * The "variable_context" hash table links ir_variable * to ir_constant * + * that represent the variables' values. \c NULL represents an empty + * context. + * + * If the expression cannot be constant folded, this method will return + * \c NULL. + */ + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + /** + * Determine the number of operands used by an expression + */ + static unsigned int get_num_operands(ir_expression_operation); + + /** + * Determine the number of operands used by an expression + */ + unsigned int get_num_operands() const + { + return (this->operation == ir_quadop_vector) + ? this->type->vector_elements : get_num_operands(operation); + } + + /** + * Return whether the expression operates on vectors horizontally. + */ + bool is_horizontal() const + { + return operation == ir_binop_all_equal || + operation == ir_binop_any_nequal || + operation == ir_binop_dot || + operation == ir_binop_vector_extract || + operation == ir_triop_vector_insert || + operation == ir_quadop_vector; + } + + /** + * Return a string representing this expression's operator. + */ + const char *operator_string(); + + /** + * Return a string representing this expression's operator. + */ + static const char *operator_string(ir_expression_operation); + + + /** + * Do a reverse-lookup to translate the given string into an operator. + */ + static ir_expression_operation get_operator(const char *); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual ir_variable *variable_referenced() const; + + ir_expression_operation operation; + ir_rvalue *operands[4]; +}; + + +/** + * HIR instruction representing a high-level function call, containing a list + * of parameters and returning a value in the supplied temporary. + */ +class ir_call : public ir_instruction { +public: + ir_call(ir_function_signature *callee, + ir_dereference_variable *return_deref, + exec_list *actual_parameters) + : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(NULL), array_idx(NULL) + { + assert(callee->return_type != NULL); + actual_parameters->move_nodes_to(& this->actual_parameters); + this->use_builtin = callee->is_builtin(); + } + + ir_call(ir_function_signature *callee, + ir_dereference_variable *return_deref, + exec_list *actual_parameters, + ir_variable *var, ir_rvalue *array_idx) + : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(var), array_idx(array_idx) + { + assert(callee->return_type != NULL); + actual_parameters->move_nodes_to(& this->actual_parameters); + this->use_builtin = callee->is_builtin(); + } + + virtual ir_call *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Get the name of the function being called. + */ + const char *callee_name() const + { + return callee->function_name(); + } + + /** + * Generates an inline version of the function before @ir, + * storing the return value in return_deref. + */ + void generate_inline(ir_instruction *ir); + + /** + * Storage for the function's return value. + * This must be NULL if the return type is void. + */ + ir_dereference_variable *return_deref; + + /** + * The specific function signature being called. + */ + ir_function_signature *callee; + + /* List of ir_rvalue of paramaters passed in this call. */ + exec_list actual_parameters; + + /** Should this call only bind to a built-in function? */ + bool use_builtin; + + /* + * ARB_shader_subroutine support - + * the subroutine uniform variable and array index + * rvalue to be used in the lowering pass later. + */ + ir_variable *sub_var; + ir_rvalue *array_idx; +}; + + +/** + * \name Jump-like IR instructions. + * + * These include \c break, \c continue, \c return, and \c discard. + */ +/*@{*/ +class ir_jump : public ir_instruction { +protected: + ir_jump(enum ir_node_type t) + : ir_instruction(t) + { + } +}; + +class ir_return : public ir_jump { +public: + ir_return() + : ir_jump(ir_type_return), value(NULL) + { + } + + ir_return(ir_rvalue *value) + : ir_jump(ir_type_return), value(value) + { + } + + virtual ir_return *clone(void *mem_ctx, struct hash_table *) const; + + ir_rvalue *get_value() const + { + return value; + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *value; +}; + + +/** + * Jump instructions used inside loops + * + * These include \c break and \c continue. The \c break within a loop is + * different from the \c break within a switch-statement. + * + * \sa ir_switch_jump + */ +class ir_loop_jump : public ir_jump { +public: + enum jump_mode { + jump_break, + jump_continue + }; + + ir_loop_jump(jump_mode mode) + : ir_jump(ir_type_loop_jump) + { + this->mode = mode; + } + + virtual ir_loop_jump *clone(void *mem_ctx, struct hash_table *) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + bool is_break() const + { + return mode == jump_break; + } + + bool is_continue() const + { + return mode == jump_continue; + } + + /** Mode selector for the jump instruction. */ + enum jump_mode mode; +}; + +/** + * IR instruction representing discard statements. + */ +class ir_discard : public ir_jump { +public: + ir_discard() + : ir_jump(ir_type_discard) + { + this->condition = NULL; + } + + ir_discard(ir_rvalue *cond) + : ir_jump(ir_type_discard) + { + this->condition = cond; + } + + virtual ir_discard *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *condition; +}; +/*@}*/ + + +/** + * Texture sampling opcodes used in ir_texture + */ +enum ir_texture_opcode { + ir_tex, /**< Regular texture look-up */ + ir_txb, /**< Texture look-up with LOD bias */ + ir_txl, /**< Texture look-up with explicit LOD */ + ir_txd, /**< Texture look-up with partial derivatvies */ + ir_txf, /**< Texel fetch with explicit LOD */ + ir_txf_ms, /**< Multisample texture fetch */ + ir_txs, /**< Texture size */ + ir_lod, /**< Texture lod query */ + ir_tg4, /**< Texture gather */ + ir_query_levels, /**< Texture levels query */ + ir_texture_samples, /**< Texture samples query */ + ir_samples_identical, /**< Query whether all samples are definitely identical. */ +}; + + +/** + * IR instruction to sample a texture + * + * The specific form of the IR instruction depends on the \c mode value + * selected from \c ir_texture_opcodes. In the printed IR, these will + * appear as: + * + * Texel offset (0 or an expression) + * | Projection divisor + * | | Shadow comparitor + * | | | + * v v v + * (tex <type> <sampler> <coordinate> 0 1 ( )) + * (txb <type> <sampler> <coordinate> 0 1 ( ) <bias>) + * (txl <type> <sampler> <coordinate> 0 1 ( ) <lod>) + * (txd <type> <sampler> <coordinate> 0 1 ( ) (dPdx dPdy)) + * (txf <type> <sampler> <coordinate> 0 <lod>) + * (txf_ms + * <type> <sampler> <coordinate> <sample_index>) + * (txs <type> <sampler> <lod>) + * (lod <type> <sampler> <coordinate>) + * (tg4 <type> <sampler> <coordinate> <offset> <component>) + * (query_levels <type> <sampler>) + * (samples_identical <sampler> <coordinate>) + */ +class ir_texture : public ir_rvalue { +public: + ir_texture(enum ir_texture_opcode op) + : ir_rvalue(ir_type_texture), + op(op), sampler(NULL), coordinate(NULL), projector(NULL), + shadow_comparitor(NULL), offset(NULL) + { + memset(&lod_info, 0, sizeof(lod_info)); + } + + virtual ir_texture *clone(void *mem_ctx, struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Return a string representing the ir_texture_opcode. + */ + const char *opcode_string(); + + /** Set the sampler and type. */ + void set_sampler(ir_dereference *sampler, const glsl_type *type); + + /** + * Do a reverse-lookup to translate a string into an ir_texture_opcode. + */ + static ir_texture_opcode get_opcode(const char *); + + enum ir_texture_opcode op; + + /** Sampler to use for the texture access. */ + ir_dereference *sampler; + + /** Texture coordinate to sample */ + ir_rvalue *coordinate; + + /** + * Value used for projective divide. + * + * If there is no projective divide (the common case), this will be + * \c NULL. Optimization passes should check for this to point to a constant + * of 1.0 and replace that with \c NULL. + */ + ir_rvalue *projector; + + /** + * Coordinate used for comparison on shadow look-ups. + * + * If there is no shadow comparison, this will be \c NULL. For the + * \c ir_txf opcode, this *must* be \c NULL. + */ + ir_rvalue *shadow_comparitor; + + /** Texel offset. */ + ir_rvalue *offset; + + union { + ir_rvalue *lod; /**< Floating point LOD */ + ir_rvalue *bias; /**< Floating point LOD bias */ + ir_rvalue *sample_index; /**< MSAA sample index */ + ir_rvalue *component; /**< Gather component selector */ + struct { + ir_rvalue *dPdx; /**< Partial derivative of coordinate wrt X */ + ir_rvalue *dPdy; /**< Partial derivative of coordinate wrt Y */ + } grad; + } lod_info; +}; + + +struct ir_swizzle_mask { + unsigned x:2; + unsigned y:2; + unsigned z:2; + unsigned w:2; + + /** + * Number of components in the swizzle. + */ + unsigned num_components:3; + + /** + * Does the swizzle contain duplicate components? + * + * L-value swizzles cannot contain duplicate components. + */ + unsigned has_duplicates:1; +}; + + +class ir_swizzle : public ir_rvalue { +public: + ir_swizzle(ir_rvalue *, unsigned x, unsigned y, unsigned z, unsigned w, + unsigned count); + + ir_swizzle(ir_rvalue *val, const unsigned *components, unsigned count); + + ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask); + + virtual ir_swizzle *clone(void *mem_ctx, struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + /** + * Construct an ir_swizzle from the textual representation. Can fail. + */ + static ir_swizzle *create(ir_rvalue *, const char *, unsigned vector_length); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + bool is_lvalue() const + { + return val->is_lvalue() && !mask.has_duplicates; + } + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const; + + ir_rvalue *val; + ir_swizzle_mask mask; + +private: + /** + * Initialize the mask component of a swizzle + * + * This is used by the \c ir_swizzle constructors. + */ + void init_mask(const unsigned *components, unsigned count); +}; + + +class ir_dereference : public ir_rvalue { +public: + virtual ir_dereference *clone(void *mem_ctx, struct hash_table *) const = 0; + + bool is_lvalue() const; + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const = 0; + +protected: + ir_dereference(enum ir_node_type t) + : ir_rvalue(t) + { + } +}; + + +class ir_dereference_variable : public ir_dereference { +public: + ir_dereference_variable(ir_variable *var); + + virtual ir_dereference_variable *clone(void *mem_ctx, + struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return this->var; + } + + virtual ir_variable *whole_variable_referenced() + { + /* ir_dereference_variable objects always dereference the entire + * variable. However, if this dereference is dereferenced by anything + * else, the complete deferefernce chain is not a whole-variable + * dereference. This method should only be called on the top most + * ir_rvalue in a dereference chain. + */ + return this->var; + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Object being dereferenced. + */ + ir_variable *var; +}; + + +class ir_dereference_array : public ir_dereference { +public: + ir_dereference_array(ir_rvalue *value, ir_rvalue *array_index); + + ir_dereference_array(ir_variable *var, ir_rvalue *array_index); + + virtual ir_dereference_array *clone(void *mem_ctx, + struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return this->array->variable_referenced(); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *array; + ir_rvalue *array_index; + +private: + void set_array(ir_rvalue *value); +}; + + +class ir_dereference_record : public ir_dereference { +public: + ir_dereference_record(ir_rvalue *value, const char *field); + + ir_dereference_record(ir_variable *var, const char *field); + + virtual ir_dereference_record *clone(void *mem_ctx, + struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return this->record->variable_referenced(); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *record; + const char *field; +}; + + +/** + * Data stored in an ir_constant + */ +union ir_constant_data { + unsigned u[16]; + int i[16]; + float f[16]; + bool b[16]; + double d[16]; +}; + + +class ir_constant : public ir_rvalue { +public: + ir_constant(const struct glsl_type *type, const ir_constant_data *data); + ir_constant(bool b, unsigned vector_elements=1); + ir_constant(unsigned int u, unsigned vector_elements=1); + ir_constant(int i, unsigned vector_elements=1); + ir_constant(float f, unsigned vector_elements=1); + ir_constant(double d, unsigned vector_elements=1); + + /** + * Construct an ir_constant from a list of ir_constant values + */ + ir_constant(const struct glsl_type *type, exec_list *values); + + /** + * Construct an ir_constant from a scalar component of another ir_constant + * + * The new \c ir_constant inherits the type of the component from the + * source constant. + * + * \note + * In the case of a matrix constant, the new constant is a scalar, \b not + * a vector. + */ + ir_constant(const ir_constant *c, unsigned i); + + /** + * Return a new ir_constant of the specified type containing all zeros. + */ + static ir_constant *zero(void *mem_ctx, const glsl_type *type); + + virtual ir_constant *clone(void *mem_ctx, struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Get a particular component of a constant as a specific type + * + * This is useful, for example, to get a value from an integer constant + * as a float or bool. This appears frequently when constructors are + * called with all constant parameters. + */ + /*@{*/ + bool get_bool_component(unsigned i) const; + float get_float_component(unsigned i) const; + double get_double_component(unsigned i) const; + int get_int_component(unsigned i) const; + unsigned get_uint_component(unsigned i) const; + /*@}*/ + + ir_constant *get_array_element(unsigned i) const; + + ir_constant *get_record_field(const char *name); + + /** + * Copy the values on another constant at a given offset. + * + * The offset is ignored for array or struct copies, it's only for + * scalars or vectors into vectors or matrices. + * + * With identical types on both sides and zero offset it's clone() + * without creating a new object. + */ + + void copy_offset(ir_constant *src, int offset); + + /** + * Copy the values on another constant at a given offset and + * following an assign-like mask. + * + * The mask is ignored for scalars. + * + * Note that this function only handles what assign can handle, + * i.e. at most a vector as source and a column of a matrix as + * destination. + */ + + void copy_masked_offset(ir_constant *src, int offset, unsigned int mask); + + /** + * Determine whether a constant has the same value as another constant + * + * \sa ir_constant::is_zero, ir_constant::is_one, + * ir_constant::is_negative_one + */ + bool has_value(const ir_constant *) const; + + /** + * Return true if this ir_constant represents the given value. + * + * For vectors, this checks that each component is the given value. + */ + virtual bool is_value(float f, int i) const; + virtual bool is_zero() const; + virtual bool is_one() const; + virtual bool is_negative_one() const; + + /** + * Return true for constants that could be stored as 16-bit unsigned values. + * + * Note that this will return true even for signed integer ir_constants, as + * long as the value is non-negative and fits in 16-bits. + */ + virtual bool is_uint16_constant() const; + + /** + * Value of the constant. + * + * The field used to back the values supplied by the constant is determined + * by the type associated with the \c ir_instruction. Constants may be + * scalars, vectors, or matrices. + */ + union ir_constant_data value; + + /* Array elements */ + ir_constant **array_elements; + + /* Structure fields */ + exec_list components; + +private: + /** + * Parameterless constructor only used by the clone method + */ + ir_constant(void); +}; + +/** + * IR instruction to emit a vertex in a geometry shader. + */ +class ir_emit_vertex : public ir_instruction { +public: + ir_emit_vertex(ir_rvalue *stream) + : ir_instruction(ir_type_emit_vertex), + stream(stream) + { + assert(stream); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_emit_vertex *clone(void *mem_ctx, struct hash_table *ht) const + { + return new(mem_ctx) ir_emit_vertex(this->stream->clone(mem_ctx, ht)); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + int stream_id() const + { + return stream->as_constant()->value.i[0]; + } + + ir_rvalue *stream; +}; + +/** + * IR instruction to complete the current primitive and start a new one in a + * geometry shader. + */ +class ir_end_primitive : public ir_instruction { +public: + ir_end_primitive(ir_rvalue *stream) + : ir_instruction(ir_type_end_primitive), + stream(stream) + { + assert(stream); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_end_primitive *clone(void *mem_ctx, struct hash_table *ht) const + { + return new(mem_ctx) ir_end_primitive(this->stream->clone(mem_ctx, ht)); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + int stream_id() const + { + return stream->as_constant()->value.i[0]; + } + + ir_rvalue *stream; +}; + +/** + * IR instruction for tessellation control and compute shader barrier. + */ +class ir_barrier : public ir_instruction { +public: + ir_barrier() + : ir_instruction(ir_type_barrier) + { + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_barrier *clone(void *mem_ctx, struct hash_table *) const + { + return new(mem_ctx) ir_barrier(); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); +}; + +/*@}*/ + +/** + * Apply a visitor to each IR node in a list + */ +void +visit_exec_list(exec_list *list, ir_visitor *visitor); + +/** + * Validate invariants on each IR node in a list + */ +void validate_ir_tree(exec_list *instructions); + +struct _mesa_glsl_parse_state; +struct gl_shader_program; + +/** + * Detect whether an unlinked shader contains static recursion + * + * If the list of instructions is determined to contain static recursion, + * \c _mesa_glsl_error will be called to emit error messages for each function + * that is in the recursion cycle. + */ +void +detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, + exec_list *instructions); + +/** + * Detect whether a linked shader contains static recursion + * + * If the list of instructions is determined to contain static recursion, + * \c link_error_printf will be called to emit error messages for each function + * that is in the recursion cycle. In addition, + * \c gl_shader_program::LinkStatus will be set to false. + */ +void +detect_recursion_linked(struct gl_shader_program *prog, + exec_list *instructions); + +/** + * Make a clone of each IR instruction in a list + * + * \param in List of IR instructions that are to be cloned + * \param out List to hold the cloned instructions + */ +void +clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in); + +extern void +_mesa_glsl_initialize_variables(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +extern void +_mesa_glsl_initialize_derived_variables(gl_shader *shader); + +extern void +_mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state); + +extern void +_mesa_glsl_initialize_builtin_functions(); + +extern ir_function_signature * +_mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, + const char *name, exec_list *actual_parameters); + +extern ir_function * +_mesa_glsl_find_builtin_function_by_name(const char *name); + +extern gl_shader * +_mesa_glsl_get_builtin_function_shader(void); + +extern ir_function_signature * +_mesa_get_main_function_signature(gl_shader *sh); + +extern void +_mesa_glsl_release_functions(void); + +extern void +_mesa_glsl_release_builtin_functions(void); + +extern void +reparent_ir(exec_list *list, void *mem_ctx); + +struct glsl_symbol_table; + +extern void +import_prototypes(const exec_list *source, exec_list *dest, + struct glsl_symbol_table *symbols, void *mem_ctx); + +extern bool +ir_has_call(ir_instruction *ir); + +extern void +do_set_program_inouts(exec_list *instructions, struct gl_program *prog, + gl_shader_stage shader_stage); + +extern char * +prototype_string(const glsl_type *return_type, const char *name, + exec_list *parameters); + +const char * +mode_string(const ir_variable *var); + +/** + * Built-in / reserved GL variables names start with "gl_" + */ +static inline bool +is_gl_identifier(const char *s) +{ + return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_'; +} + +extern "C" { +#endif /* __cplusplus */ + +extern void _mesa_print_ir(FILE *f, struct exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +extern void +fprint_ir(FILE *f, const void *instruction); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +unsigned +vertices_per_prim(GLenum prim); + +#endif /* IR_H */ diff --git a/src/compiler/glsl/ir_basic_block.cpp b/src/compiler/glsl/ir_basic_block.cpp new file mode 100644 index 0000000..15481aa --- /dev/null +++ b/src/compiler/glsl/ir_basic_block.cpp @@ -0,0 +1,99 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_basic_block.cpp + * + * Basic block analysis of instruction streams. + */ + +#include "ir.h" +#include "ir_basic_block.h" + +/** + * Calls a user function for every basic block in the instruction stream. + * + * Basic block analysis is pretty easy in our IR thanks to the lack of + * unstructured control flow. We've got: + * + * ir_loop (for () {}, while () {}, do {} while ()) + * ir_loop_jump ( + * ir_if () {} + * ir_return + * ir_call() + * + * Note that the basic blocks returned by this don't encompass all + * operations performed by the program -- for example, if conditions + * don't get returned, nor do the assignments that will be generated + * for ir_call parameters. + */ +void call_for_basic_blocks(exec_list *instructions, + void (*callback)(ir_instruction *first, + ir_instruction *last, + void *data), + void *data) +{ + ir_instruction *leader = NULL; + ir_instruction *last = NULL; + + foreach_in_list(ir_instruction, ir, instructions) { + ir_if *ir_if; + ir_loop *ir_loop; + ir_function *ir_function; + + if (!leader) + leader = ir; + + if ((ir_if = ir->as_if())) { + callback(leader, ir, data); + leader = NULL; + + call_for_basic_blocks(&ir_if->then_instructions, callback, data); + call_for_basic_blocks(&ir_if->else_instructions, callback, data); + } else if ((ir_loop = ir->as_loop())) { + callback(leader, ir, data); + leader = NULL; + call_for_basic_blocks(&ir_loop->body_instructions, callback, data); + } else if (ir->as_jump() || ir->as_call()) { + callback(leader, ir, data); + leader = NULL; + } else if ((ir_function = ir->as_function())) { + /* A function definition doesn't interrupt our basic block + * since execution doesn't go into it. We should process the + * bodies of its signatures for BBs, though. + * + * Note that we miss an opportunity for producing more + * maximal BBs between the instructions that precede main() + * and the body of main(). Perhaps those instructions ought + * to live inside of main(). + */ + foreach_in_list(ir_function_signature, ir_sig, &ir_function->signatures) { + call_for_basic_blocks(&ir_sig->body, callback, data); + } + } + last = ir; + } + if (leader) { + callback(leader, last, data); + } +} diff --git a/src/compiler/glsl/ir_basic_block.h b/src/compiler/glsl/ir_basic_block.h new file mode 100644 index 0000000..dbd678b --- /dev/null +++ b/src/compiler/glsl/ir_basic_block.h @@ -0,0 +1,28 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +void call_for_basic_blocks(exec_list *instructions, + void (*callback)(ir_instruction *first, + ir_instruction *last, + void *data), + void *data); diff --git a/src/compiler/glsl/ir_builder.cpp b/src/compiler/glsl/ir_builder.cpp new file mode 100644 index 0000000..c9cf124 --- /dev/null +++ b/src/compiler/glsl/ir_builder.cpp @@ -0,0 +1,612 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "ir_builder.h" +#include "program/prog_instruction.h" + +using namespace ir_builder; + +namespace ir_builder { + +void +ir_factory::emit(ir_instruction *ir) +{ + instructions->push_tail(ir); +} + +ir_variable * +ir_factory::make_temp(const glsl_type *type, const char *name) +{ + ir_variable *var; + + var = new(mem_ctx) ir_variable(type, name, ir_var_temporary); + emit(var); + + return var; +} + +ir_assignment * +assign(deref lhs, operand rhs, operand condition, int writemask) +{ + void *mem_ctx = ralloc_parent(lhs.val); + + ir_assignment *assign = new(mem_ctx) ir_assignment(lhs.val, + rhs.val, + condition.val, + writemask); + + return assign; +} + +ir_assignment * +assign(deref lhs, operand rhs) +{ + return assign(lhs, rhs, (1 << lhs.val->type->vector_elements) - 1); +} + +ir_assignment * +assign(deref lhs, operand rhs, int writemask) +{ + return assign(lhs, rhs, (ir_rvalue *) NULL, writemask); +} + +ir_assignment * +assign(deref lhs, operand rhs, operand condition) +{ + return assign(lhs, rhs, condition, (1 << lhs.val->type->vector_elements) - 1); +} + +ir_return * +ret(operand retval) +{ + void *mem_ctx = ralloc_parent(retval.val); + return new(mem_ctx) ir_return(retval.val); +} + +ir_swizzle * +swizzle(operand a, int swizzle, int components) +{ + void *mem_ctx = ralloc_parent(a.val); + + return new(mem_ctx) ir_swizzle(a.val, + GET_SWZ(swizzle, 0), + GET_SWZ(swizzle, 1), + GET_SWZ(swizzle, 2), + GET_SWZ(swizzle, 3), + components); +} + +ir_swizzle * +swizzle_for_size(operand a, unsigned components) +{ + void *mem_ctx = ralloc_parent(a.val); + + if (a.val->type->vector_elements < components) + components = a.val->type->vector_elements; + + unsigned s[4] = { 0, 1, 2, 3 }; + for (int i = components; i < 4; i++) + s[i] = components - 1; + + return new(mem_ctx) ir_swizzle(a.val, s, components); +} + +ir_swizzle * +swizzle_xxxx(operand a) +{ + return swizzle(a, SWIZZLE_XXXX, 4); +} + +ir_swizzle * +swizzle_yyyy(operand a) +{ + return swizzle(a, SWIZZLE_YYYY, 4); +} + +ir_swizzle * +swizzle_zzzz(operand a) +{ + return swizzle(a, SWIZZLE_ZZZZ, 4); +} + +ir_swizzle * +swizzle_wwww(operand a) +{ + return swizzle(a, SWIZZLE_WWWW, 4); +} + +ir_swizzle * +swizzle_x(operand a) +{ + return swizzle(a, SWIZZLE_XXXX, 1); +} + +ir_swizzle * +swizzle_y(operand a) +{ + return swizzle(a, SWIZZLE_YYYY, 1); +} + +ir_swizzle * +swizzle_z(operand a) +{ + return swizzle(a, SWIZZLE_ZZZZ, 1); +} + +ir_swizzle * +swizzle_w(operand a) +{ + return swizzle(a, SWIZZLE_WWWW, 1); +} + +ir_swizzle * +swizzle_xy(operand a) +{ + return swizzle(a, SWIZZLE_XYZW, 2); +} + +ir_swizzle * +swizzle_xyz(operand a) +{ + return swizzle(a, SWIZZLE_XYZW, 3); +} + +ir_swizzle * +swizzle_xyzw(operand a) +{ + return swizzle(a, SWIZZLE_XYZW, 4); +} + +ir_expression * +expr(ir_expression_operation op, operand a) +{ + void *mem_ctx = ralloc_parent(a.val); + + return new(mem_ctx) ir_expression(op, a.val); +} + +ir_expression * +expr(ir_expression_operation op, operand a, operand b) +{ + void *mem_ctx = ralloc_parent(a.val); + + return new(mem_ctx) ir_expression(op, a.val, b.val); +} + +ir_expression * +expr(ir_expression_operation op, operand a, operand b, operand c) +{ + void *mem_ctx = ralloc_parent(a.val); + + return new(mem_ctx) ir_expression(op, a.val, b.val, c.val); +} + +ir_expression *add(operand a, operand b) +{ + return expr(ir_binop_add, a, b); +} + +ir_expression *sub(operand a, operand b) +{ + return expr(ir_binop_sub, a, b); +} + +ir_expression *min2(operand a, operand b) +{ + return expr(ir_binop_min, a, b); +} + +ir_expression *max2(operand a, operand b) +{ + return expr(ir_binop_max, a, b); +} + +ir_expression *mul(operand a, operand b) +{ + return expr(ir_binop_mul, a, b); +} + +ir_expression *imul_high(operand a, operand b) +{ + return expr(ir_binop_imul_high, a, b); +} + +ir_expression *div(operand a, operand b) +{ + return expr(ir_binop_div, a, b); +} + +ir_expression *carry(operand a, operand b) +{ + return expr(ir_binop_carry, a, b); +} + +ir_expression *borrow(operand a, operand b) +{ + return expr(ir_binop_borrow, a, b); +} + +ir_expression *trunc(operand a) +{ + return expr(ir_unop_trunc, a); +} + +ir_expression *round_even(operand a) +{ + return expr(ir_unop_round_even, a); +} + +ir_expression *fract(operand a) +{ + return expr(ir_unop_fract, a); +} + +/* dot for vectors, mul for scalars */ +ir_expression *dot(operand a, operand b) +{ + assert(a.val->type == b.val->type); + + if (a.val->type->vector_elements == 1) + return expr(ir_binop_mul, a, b); + + return expr(ir_binop_dot, a, b); +} + +ir_expression* +clamp(operand a, operand b, operand c) +{ + return expr(ir_binop_min, expr(ir_binop_max, a, b), c); +} + +ir_expression * +saturate(operand a) +{ + return expr(ir_unop_saturate, a); +} + +ir_expression * +abs(operand a) +{ + return expr(ir_unop_abs, a); +} + +ir_expression * +neg(operand a) +{ + return expr(ir_unop_neg, a); +} + +ir_expression * +sin(operand a) +{ + return expr(ir_unop_sin, a); +} + +ir_expression * +cos(operand a) +{ + return expr(ir_unop_cos, a); +} + +ir_expression * +exp(operand a) +{ + return expr(ir_unop_exp, a); +} + +ir_expression * +rsq(operand a) +{ + return expr(ir_unop_rsq, a); +} + +ir_expression * +sqrt(operand a) +{ + return expr(ir_unop_sqrt, a); +} + +ir_expression * +log(operand a) +{ + return expr(ir_unop_log, a); +} + +ir_expression * +sign(operand a) +{ + return expr(ir_unop_sign, a); +} + +ir_expression * +subr_to_int(operand a) +{ + return expr(ir_unop_subroutine_to_int, a); +} + +ir_expression* +equal(operand a, operand b) +{ + return expr(ir_binop_equal, a, b); +} + +ir_expression* +nequal(operand a, operand b) +{ + return expr(ir_binop_nequal, a, b); +} + +ir_expression* +less(operand a, operand b) +{ + return expr(ir_binop_less, a, b); +} + +ir_expression* +greater(operand a, operand b) +{ + return expr(ir_binop_greater, a, b); +} + +ir_expression* +lequal(operand a, operand b) +{ + return expr(ir_binop_lequal, a, b); +} + +ir_expression* +gequal(operand a, operand b) +{ + return expr(ir_binop_gequal, a, b); +} + +ir_expression* +logic_not(operand a) +{ + return expr(ir_unop_logic_not, a); +} + +ir_expression* +logic_and(operand a, operand b) +{ + return expr(ir_binop_logic_and, a, b); +} + +ir_expression* +logic_or(operand a, operand b) +{ + return expr(ir_binop_logic_or, a, b); +} + +ir_expression* +bit_not(operand a) +{ + return expr(ir_unop_bit_not, a); +} + +ir_expression* +bit_and(operand a, operand b) +{ + return expr(ir_binop_bit_and, a, b); +} + +ir_expression* +bit_or(operand a, operand b) +{ + return expr(ir_binop_bit_or, a, b); +} + +ir_expression* +lshift(operand a, operand b) +{ + return expr(ir_binop_lshift, a, b); +} + +ir_expression* +rshift(operand a, operand b) +{ + return expr(ir_binop_rshift, a, b); +} + +ir_expression* +f2i(operand a) +{ + return expr(ir_unop_f2i, a); +} + +ir_expression* +bitcast_f2i(operand a) +{ + return expr(ir_unop_bitcast_f2i, a); +} + +ir_expression* +i2f(operand a) +{ + return expr(ir_unop_i2f, a); +} + +ir_expression* +bitcast_i2f(operand a) +{ + return expr(ir_unop_bitcast_i2f, a); +} + +ir_expression* +i2u(operand a) +{ + return expr(ir_unop_i2u, a); +} + +ir_expression* +u2i(operand a) +{ + return expr(ir_unop_u2i, a); +} + +ir_expression* +f2u(operand a) +{ + return expr(ir_unop_f2u, a); +} + +ir_expression* +bitcast_f2u(operand a) +{ + return expr(ir_unop_bitcast_f2u, a); +} + +ir_expression* +u2f(operand a) +{ + return expr(ir_unop_u2f, a); +} + +ir_expression* +bitcast_u2f(operand a) +{ + return expr(ir_unop_bitcast_u2f, a); +} + +ir_expression* +i2b(operand a) +{ + return expr(ir_unop_i2b, a); +} + +ir_expression* +b2i(operand a) +{ + return expr(ir_unop_b2i, a); +} + +ir_expression * +f2b(operand a) +{ + return expr(ir_unop_f2b, a); +} + +ir_expression * +b2f(operand a) +{ + return expr(ir_unop_b2f, a); +} + +ir_expression * +interpolate_at_centroid(operand a) +{ + return expr(ir_unop_interpolate_at_centroid, a); +} + +ir_expression * +interpolate_at_offset(operand a, operand b) +{ + return expr(ir_binop_interpolate_at_offset, a, b); +} + +ir_expression * +interpolate_at_sample(operand a, operand b) +{ + return expr(ir_binop_interpolate_at_sample, a, b); +} + +ir_expression * +f2d(operand a) +{ + return expr(ir_unop_f2d, a); +} + +ir_expression * +i2d(operand a) +{ + return expr(ir_unop_i2d, a); +} + +ir_expression * +u2d(operand a) +{ + return expr(ir_unop_u2d, a); +} + +ir_expression * +fma(operand a, operand b, operand c) +{ + return expr(ir_triop_fma, a, b, c); +} + +ir_expression * +lrp(operand x, operand y, operand a) +{ + return expr(ir_triop_lrp, x, y, a); +} + +ir_expression * +csel(operand a, operand b, operand c) +{ + return expr(ir_triop_csel, a, b, c); +} + +ir_expression * +bitfield_extract(operand a, operand b, operand c) +{ + return expr(ir_triop_bitfield_extract, a, b, c); +} + +ir_expression * +bitfield_insert(operand a, operand b, operand c, operand d) +{ + void *mem_ctx = ralloc_parent(a.val); + return new(mem_ctx) ir_expression(ir_quadop_bitfield_insert, + a.val->type, a.val, b.val, c.val, d.val); +} + +ir_if* +if_tree(operand condition, + ir_instruction *then_branch) +{ + assert(then_branch != NULL); + + void *mem_ctx = ralloc_parent(condition.val); + + ir_if *result = new(mem_ctx) ir_if(condition.val); + result->then_instructions.push_tail(then_branch); + return result; +} + +ir_if* +if_tree(operand condition, + ir_instruction *then_branch, + ir_instruction *else_branch) +{ + assert(then_branch != NULL); + assert(else_branch != NULL); + + void *mem_ctx = ralloc_parent(condition.val); + + ir_if *result = new(mem_ctx) ir_if(condition.val); + result->then_instructions.push_tail(then_branch); + result->else_instructions.push_tail(else_branch); + return result; +} + +} /* namespace ir_builder */ diff --git a/src/compiler/glsl/ir_builder.h b/src/compiler/glsl/ir_builder.h new file mode 100644 index 0000000..b483ebf --- /dev/null +++ b/src/compiler/glsl/ir_builder.h @@ -0,0 +1,230 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "ir.h" + +namespace ir_builder { + +#ifndef WRITEMASK_X +enum writemask { + WRITEMASK_X = 0x1, + WRITEMASK_Y = 0x2, + WRITEMASK_Z = 0x4, + WRITEMASK_W = 0x8, +}; +#endif + +/** + * This little class exists to let the helper expression generators + * take either an ir_rvalue * or an ir_variable * to be automatically + * dereferenced, while still providing compile-time type checking. + * + * You don't have to explicitly call the constructor -- C++ will see + * that you passed an ir_variable, and silently call the + * operand(ir_variable *var) constructor behind your back. + */ +class operand { +public: + operand(ir_rvalue *val) + : val(val) + { + } + + operand(ir_variable *var) + { + void *mem_ctx = ralloc_parent(var); + val = new(mem_ctx) ir_dereference_variable(var); + } + + ir_rvalue *val; +}; + +/** Automatic generator for ir_dereference_variable on assignment LHS. + * + * \sa operand + */ +class deref { +public: + deref(ir_dereference *val) + : val(val) + { + } + + deref(ir_variable *var) + { + void *mem_ctx = ralloc_parent(var); + val = new(mem_ctx) ir_dereference_variable(var); + } + + + ir_dereference *val; +}; + +class ir_factory { +public: + ir_factory(exec_list *instructions = NULL, void *mem_ctx = NULL) + : instructions(instructions), + mem_ctx(mem_ctx) + { + return; + } + + void emit(ir_instruction *ir); + ir_variable *make_temp(const glsl_type *type, const char *name); + + ir_constant* + constant(float f) + { + return new(mem_ctx) ir_constant(f); + } + + ir_constant* + constant(int i) + { + return new(mem_ctx) ir_constant(i); + } + + ir_constant* + constant(unsigned u) + { + return new(mem_ctx) ir_constant(u); + } + + ir_constant* + constant(bool b) + { + return new(mem_ctx) ir_constant(b); + } + + exec_list *instructions; + void *mem_ctx; +}; + +ir_assignment *assign(deref lhs, operand rhs); +ir_assignment *assign(deref lhs, operand rhs, int writemask); +ir_assignment *assign(deref lhs, operand rhs, operand condition); +ir_assignment *assign(deref lhs, operand rhs, operand condition, int writemask); + +ir_return *ret(operand retval); + +ir_expression *expr(ir_expression_operation op, operand a); +ir_expression *expr(ir_expression_operation op, operand a, operand b); +ir_expression *expr(ir_expression_operation op, operand a, operand b, operand c); +ir_expression *add(operand a, operand b); +ir_expression *sub(operand a, operand b); +ir_expression *mul(operand a, operand b); +ir_expression *imul_high(operand a, operand b); +ir_expression *div(operand a, operand b); +ir_expression *carry(operand a, operand b); +ir_expression *borrow(operand a, operand b); +ir_expression *trunc(operand a); +ir_expression *round_even(operand a); +ir_expression *fract(operand a); +ir_expression *dot(operand a, operand b); +ir_expression *clamp(operand a, operand b, operand c); +ir_expression *saturate(operand a); +ir_expression *abs(operand a); +ir_expression *neg(operand a); +ir_expression *sin(operand a); +ir_expression *cos(operand a); +ir_expression *exp(operand a); +ir_expression *rsq(operand a); +ir_expression *sqrt(operand a); +ir_expression *log(operand a); +ir_expression *sign(operand a); + +ir_expression *subr_to_int(operand a); +ir_expression *equal(operand a, operand b); +ir_expression *nequal(operand a, operand b); +ir_expression *less(operand a, operand b); +ir_expression *greater(operand a, operand b); +ir_expression *lequal(operand a, operand b); +ir_expression *gequal(operand a, operand b); + +ir_expression *logic_not(operand a); +ir_expression *logic_and(operand a, operand b); +ir_expression *logic_or(operand a, operand b); + +ir_expression *bit_not(operand a); +ir_expression *bit_or(operand a, operand b); +ir_expression *bit_and(operand a, operand b); +ir_expression *lshift(operand a, operand b); +ir_expression *rshift(operand a, operand b); + +ir_expression *f2i(operand a); +ir_expression *bitcast_f2i(operand a); +ir_expression *i2f(operand a); +ir_expression *bitcast_i2f(operand a); +ir_expression *f2u(operand a); +ir_expression *bitcast_f2u(operand a); +ir_expression *u2f(operand a); +ir_expression *bitcast_u2f(operand a); +ir_expression *i2u(operand a); +ir_expression *u2i(operand a); +ir_expression *b2i(operand a); +ir_expression *i2b(operand a); +ir_expression *f2b(operand a); +ir_expression *b2f(operand a); + +ir_expression *f2d(operand a); +ir_expression *i2d(operand a); +ir_expression *u2d(operand a); + +ir_expression *min2(operand a, operand b); +ir_expression *max2(operand a, operand b); + +ir_expression *interpolate_at_centroid(operand a); +ir_expression *interpolate_at_offset(operand a, operand b); +ir_expression *interpolate_at_sample(operand a, operand b); + +ir_expression *fma(operand a, operand b, operand c); +ir_expression *lrp(operand x, operand y, operand a); +ir_expression *csel(operand a, operand b, operand c); +ir_expression *bitfield_extract(operand a, operand b, operand c); +ir_expression *bitfield_insert(operand a, operand b, operand c, operand d); + +ir_swizzle *swizzle(operand a, int swizzle, int components); +/** + * Swizzle away later components, but preserve the ordering. + */ +ir_swizzle *swizzle_for_size(operand a, unsigned components); + +ir_swizzle *swizzle_xxxx(operand a); +ir_swizzle *swizzle_yyyy(operand a); +ir_swizzle *swizzle_zzzz(operand a); +ir_swizzle *swizzle_wwww(operand a); +ir_swizzle *swizzle_x(operand a); +ir_swizzle *swizzle_y(operand a); +ir_swizzle *swizzle_z(operand a); +ir_swizzle *swizzle_w(operand a); +ir_swizzle *swizzle_xy(operand a); +ir_swizzle *swizzle_xyz(operand a); +ir_swizzle *swizzle_xyzw(operand a); + +ir_if *if_tree(operand condition, + ir_instruction *then_branch); +ir_if *if_tree(operand condition, + ir_instruction *then_branch, + ir_instruction *else_branch); + +} /* namespace ir_builder */ diff --git a/src/compiler/glsl/ir_clone.cpp b/src/compiler/glsl/ir_clone.cpp new file mode 100644 index 0000000..0965b0d --- /dev/null +++ b/src/compiler/glsl/ir_clone.cpp @@ -0,0 +1,440 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <string.h> +#include "main/compiler.h" +#include "ir.h" +#include "compiler/glsl_types.h" +#include "program/hash_table.h" + +ir_rvalue * +ir_rvalue::clone(void *mem_ctx, struct hash_table *) const +{ + /* The only possible instantiation is the generic error value. */ + return error_value(mem_ctx); +} + +/** + * Duplicate an IR variable + */ +ir_variable * +ir_variable::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_variable *var = new(mem_ctx) ir_variable(this->type, this->name, + (ir_variable_mode) this->data.mode); + + var->data.max_array_access = this->data.max_array_access; + if (this->is_interface_instance()) { + var->u.max_ifc_array_access = + rzalloc_array(var, unsigned, this->interface_type->length); + memcpy(var->u.max_ifc_array_access, this->u.max_ifc_array_access, + this->interface_type->length * sizeof(unsigned)); + } + + memcpy(&var->data, &this->data, sizeof(var->data)); + + if (this->get_state_slots()) { + ir_state_slot *s = var->allocate_state_slots(this->get_num_state_slots()); + memcpy(s, this->get_state_slots(), + sizeof(s[0]) * var->get_num_state_slots()); + } + + if (this->constant_value) + var->constant_value = this->constant_value->clone(mem_ctx, ht); + + if (this->constant_initializer) + var->constant_initializer = + this->constant_initializer->clone(mem_ctx, ht); + + var->interface_type = this->interface_type; + + if (ht) { + hash_table_insert(ht, var, (void *)const_cast<ir_variable *>(this)); + } + + return var; +} + +ir_swizzle * +ir_swizzle::clone(void *mem_ctx, struct hash_table *ht) const +{ + return new(mem_ctx) ir_swizzle(this->val->clone(mem_ctx, ht), this->mask); +} + +ir_return * +ir_return::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_rvalue *new_value = NULL; + + if (this->value) + new_value = this->value->clone(mem_ctx, ht); + + return new(mem_ctx) ir_return(new_value); +} + +ir_discard * +ir_discard::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_rvalue *new_condition = NULL; + + if (this->condition != NULL) + new_condition = this->condition->clone(mem_ctx, ht); + + return new(mem_ctx) ir_discard(new_condition); +} + +ir_loop_jump * +ir_loop_jump::clone(void *mem_ctx, struct hash_table *ht) const +{ + (void)ht; + + return new(mem_ctx) ir_loop_jump(this->mode); +} + +ir_if * +ir_if::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_if *new_if = new(mem_ctx) ir_if(this->condition->clone(mem_ctx, ht)); + + foreach_in_list(ir_instruction, ir, &this->then_instructions) { + new_if->then_instructions.push_tail(ir->clone(mem_ctx, ht)); + } + + foreach_in_list(ir_instruction, ir, &this->else_instructions) { + new_if->else_instructions.push_tail(ir->clone(mem_ctx, ht)); + } + + return new_if; +} + +ir_loop * +ir_loop::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_loop *new_loop = new(mem_ctx) ir_loop(); + + foreach_in_list(ir_instruction, ir, &this->body_instructions) { + new_loop->body_instructions.push_tail(ir->clone(mem_ctx, ht)); + } + + return new_loop; +} + +ir_call * +ir_call::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_dereference_variable *new_return_ref = NULL; + if (this->return_deref != NULL) + new_return_ref = this->return_deref->clone(mem_ctx, ht); + + exec_list new_parameters; + + foreach_in_list(ir_instruction, ir, &this->actual_parameters) { + new_parameters.push_tail(ir->clone(mem_ctx, ht)); + } + + return new(mem_ctx) ir_call(this->callee, new_return_ref, &new_parameters); +} + +ir_expression * +ir_expression::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_rvalue *op[ARRAY_SIZE(this->operands)] = { NULL, }; + unsigned int i; + + for (i = 0; i < get_num_operands(); i++) { + op[i] = this->operands[i]->clone(mem_ctx, ht); + } + + return new(mem_ctx) ir_expression(this->operation, this->type, + op[0], op[1], op[2], op[3]); +} + +ir_dereference_variable * +ir_dereference_variable::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_variable *new_var; + + if (ht) { + new_var = (ir_variable *)hash_table_find(ht, this->var); + if (!new_var) + new_var = this->var; + } else { + new_var = this->var; + } + + return new(mem_ctx) ir_dereference_variable(new_var); +} + +ir_dereference_array * +ir_dereference_array::clone(void *mem_ctx, struct hash_table *ht) const +{ + return new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, ht), + this->array_index->clone(mem_ctx, + ht)); +} + +ir_dereference_record * +ir_dereference_record::clone(void *mem_ctx, struct hash_table *ht) const +{ + return new(mem_ctx) ir_dereference_record(this->record->clone(mem_ctx, ht), + this->field); +} + +ir_texture * +ir_texture::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_texture *new_tex = new(mem_ctx) ir_texture(this->op); + new_tex->type = this->type; + + new_tex->sampler = this->sampler->clone(mem_ctx, ht); + if (this->coordinate) + new_tex->coordinate = this->coordinate->clone(mem_ctx, ht); + if (this->projector) + new_tex->projector = this->projector->clone(mem_ctx, ht); + if (this->shadow_comparitor) { + new_tex->shadow_comparitor = this->shadow_comparitor->clone(mem_ctx, ht); + } + + if (this->offset != NULL) + new_tex->offset = this->offset->clone(mem_ctx, ht); + + switch (this->op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht); + break; + case ir_txl: + case ir_txf: + case ir_txs: + new_tex->lod_info.lod = this->lod_info.lod->clone(mem_ctx, ht); + break; + case ir_txf_ms: + new_tex->lod_info.sample_index = this->lod_info.sample_index->clone(mem_ctx, ht); + break; + case ir_txd: + new_tex->lod_info.grad.dPdx = this->lod_info.grad.dPdx->clone(mem_ctx, ht); + new_tex->lod_info.grad.dPdy = this->lod_info.grad.dPdy->clone(mem_ctx, ht); + break; + case ir_tg4: + new_tex->lod_info.component = this->lod_info.component->clone(mem_ctx, ht); + break; + } + + return new_tex; +} + +ir_assignment * +ir_assignment::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_rvalue *new_condition = NULL; + + if (this->condition) + new_condition = this->condition->clone(mem_ctx, ht); + + ir_assignment *cloned = + new(mem_ctx) ir_assignment(this->lhs->clone(mem_ctx, ht), + this->rhs->clone(mem_ctx, ht), + new_condition); + cloned->write_mask = this->write_mask; + return cloned; +} + +ir_function * +ir_function::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_function *copy = new(mem_ctx) ir_function(this->name); + + copy->is_subroutine = this->is_subroutine; + copy->subroutine_index = this->subroutine_index; + copy->num_subroutine_types = this->num_subroutine_types; + copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types); + for (int i = 0; i < copy->num_subroutine_types; i++) + copy->subroutine_types[i] = this->subroutine_types[i]; + + foreach_in_list(const ir_function_signature, sig, &this->signatures) { + ir_function_signature *sig_copy = sig->clone(mem_ctx, ht); + copy->add_signature(sig_copy); + + if (ht != NULL) + hash_table_insert(ht, sig_copy, + (void *)const_cast<ir_function_signature *>(sig)); + } + + return copy; +} + +ir_function_signature * +ir_function_signature::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_function_signature *copy = this->clone_prototype(mem_ctx, ht); + + copy->is_defined = this->is_defined; + + /* Clone the instruction list. + */ + foreach_in_list(const ir_instruction, inst, &this->body) { + ir_instruction *const inst_copy = inst->clone(mem_ctx, ht); + copy->body.push_tail(inst_copy); + } + + return copy; +} + +ir_function_signature * +ir_function_signature::clone_prototype(void *mem_ctx, struct hash_table *ht) const +{ + ir_function_signature *copy = + new(mem_ctx) ir_function_signature(this->return_type); + + copy->is_defined = false; + copy->builtin_avail = this->builtin_avail; + copy->origin = this; + + /* Clone the parameter list, but NOT the body. + */ + foreach_in_list(const ir_variable, param, &this->parameters) { + assert(const_cast<ir_variable *>(param)->as_variable() != NULL); + + ir_variable *const param_copy = param->clone(mem_ctx, ht); + copy->parameters.push_tail(param_copy); + } + + return copy; +} + +ir_constant * +ir_constant::clone(void *mem_ctx, struct hash_table *ht) const +{ + (void)ht; + + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + return new(mem_ctx) ir_constant(this->type, &this->value); + + case GLSL_TYPE_STRUCT: { + ir_constant *c = new(mem_ctx) ir_constant; + + c->type = this->type; + for (exec_node *node = this->components.head + ; !node->is_tail_sentinel() + ; node = node->next) { + ir_constant *const orig = (ir_constant *) node; + + c->components.push_tail(orig->clone(mem_ctx, NULL)); + } + + return c; + } + + case GLSL_TYPE_ARRAY: { + ir_constant *c = new(mem_ctx) ir_constant; + + c->type = this->type; + c->array_elements = ralloc_array(c, ir_constant *, this->type->length); + for (unsigned i = 0; i < this->type->length; i++) { + c->array_elements[i] = this->array_elements[i]->clone(mem_ctx, NULL); + } + return c; + } + + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_INTERFACE: + assert(!"Should not get here."); + break; + } + + return NULL; +} + + +class fixup_ir_call_visitor : public ir_hierarchical_visitor { +public: + fixup_ir_call_visitor(struct hash_table *ht) + { + this->ht = ht; + } + + virtual ir_visitor_status visit_enter(ir_call *ir) + { + /* Try to find the function signature referenced by the ir_call in the + * table. If it is found, replace it with the value from the table. + */ + ir_function_signature *sig = + (ir_function_signature *) hash_table_find(this->ht, ir->callee); + if (sig != NULL) + ir->callee = sig; + + /* Since this may be used before function call parameters are flattened, + * the children also need to be processed. + */ + return visit_continue; + } + +private: + struct hash_table *ht; +}; + + +static void +fixup_function_calls(struct hash_table *ht, exec_list *instructions) +{ + fixup_ir_call_visitor v(ht); + v.run(instructions); +} + + +void +clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in) +{ + struct hash_table *ht = + hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); + + foreach_in_list(const ir_instruction, original, in) { + ir_instruction *copy = original->clone(mem_ctx, ht); + + out->push_tail(copy); + } + + /* Make a pass over the cloned tree to fix up ir_call nodes to point to the + * cloned ir_function_signature nodes. This cannot be done automatically + * during cloning because the ir_call might be a forward reference (i.e., + * the function signature that it references may not have been cloned yet). + */ + fixup_function_calls(ht, out); + + hash_table_dtor(ht); +} diff --git a/src/compiler/glsl/ir_constant_expression.cpp b/src/compiler/glsl/ir_constant_expression.cpp new file mode 100644 index 0000000..fbbf779 --- /dev/null +++ b/src/compiler/glsl/ir_constant_expression.cpp @@ -0,0 +1,2092 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_constant_expression.cpp + * Evaluate and process constant valued expressions + * + * In GLSL, constant valued expressions are used in several places. These + * must be processed and evaluated very early in the compilation process. + * + * * Sizes of arrays + * * Initializers for uniforms + * * Initializers for \c const variables + */ + +#include <math.h> +#include "main/core.h" /* for MAX2, MIN2, CLAMP */ +#include "util/rounding.h" /* for _mesa_roundeven */ +#include "util/half_float.h" +#include "ir.h" +#include "compiler/glsl_types.h" +#include "program/hash_table.h" + +static float +dot_f(ir_constant *op0, ir_constant *op1) +{ + assert(op0->type->is_float() && op1->type->is_float()); + + float result = 0; + for (unsigned c = 0; c < op0->type->components(); c++) + result += op0->value.f[c] * op1->value.f[c]; + + return result; +} + +static double +dot_d(ir_constant *op0, ir_constant *op1) +{ + assert(op0->type->is_double() && op1->type->is_double()); + + double result = 0; + for (unsigned c = 0; c < op0->type->components(); c++) + result += op0->value.d[c] * op1->value.d[c]; + + return result; +} + +/* This method is the only one supported by gcc. Unions in particular + * are iffy, and read-through-converted-pointer is killed by strict + * aliasing. OTOH, the compiler sees through the memcpy, so the + * resulting asm is reasonable. + */ +static float +bitcast_u2f(unsigned int u) +{ + assert(sizeof(float) == sizeof(unsigned int)); + float f; + memcpy(&f, &u, sizeof(f)); + return f; +} + +static unsigned int +bitcast_f2u(float f) +{ + assert(sizeof(float) == sizeof(unsigned int)); + unsigned int u; + memcpy(&u, &f, sizeof(f)); + return u; +} + +/** + * Evaluate one component of a floating-point 4x8 unpacking function. + */ +typedef uint8_t +(*pack_1x8_func_t)(float); + +/** + * Evaluate one component of a floating-point 2x16 unpacking function. + */ +typedef uint16_t +(*pack_1x16_func_t)(float); + +/** + * Evaluate one component of a floating-point 4x8 unpacking function. + */ +typedef float +(*unpack_1x8_func_t)(uint8_t); + +/** + * Evaluate one component of a floating-point 2x16 unpacking function. + */ +typedef float +(*unpack_1x16_func_t)(uint16_t); + +/** + * Evaluate a 2x16 floating-point packing function. + */ +static uint32_t +pack_2x16(pack_1x16_func_t pack_1x16, + float x, float y) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packSnorm2x16 + * ------------- + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * The specifications for the other packing functions contain similar + * language. + */ + uint32_t u = 0; + u |= ((uint32_t) pack_1x16(x) << 0); + u |= ((uint32_t) pack_1x16(y) << 16); + return u; +} + +/** + * Evaluate a 4x8 floating-point packing function. + */ +static uint32_t +pack_4x8(pack_1x8_func_t pack_1x8, + float x, float y, float z, float w) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packSnorm4x8 + * ------------ + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * The specifications for the other packing functions contain similar + * language. + */ + uint32_t u = 0; + u |= ((uint32_t) pack_1x8(x) << 0); + u |= ((uint32_t) pack_1x8(y) << 8); + u |= ((uint32_t) pack_1x8(z) << 16); + u |= ((uint32_t) pack_1x8(w) << 24); + return u; +} + +/** + * Evaluate a 2x16 floating-point unpacking function. + */ +static void +unpack_2x16(unpack_1x16_func_t unpack_1x16, + uint32_t u, + float *x, float *y) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackSnorm2x16 + * --------------- + * The first component of the returned vector will be extracted from + * the least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * The specifications for the other unpacking functions contain similar + * language. + */ + *x = unpack_1x16((uint16_t) (u & 0xffff)); + *y = unpack_1x16((uint16_t) (u >> 16)); +} + +/** + * Evaluate a 4x8 floating-point unpacking function. + */ +static void +unpack_4x8(unpack_1x8_func_t unpack_1x8, uint32_t u, + float *x, float *y, float *z, float *w) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackSnorm4x8 + * -------------- + * The first component of the returned vector will be extracted from + * the least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * The specifications for the other unpacking functions contain similar + * language. + */ + *x = unpack_1x8((uint8_t) (u & 0xff)); + *y = unpack_1x8((uint8_t) (u >> 8)); + *z = unpack_1x8((uint8_t) (u >> 16)); + *w = unpack_1x8((uint8_t) (u >> 24)); +} + +/** + * Evaluate one component of packSnorm4x8. + */ +static uint8_t +pack_snorm_1x8(float x) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packSnorm4x8 + * ------------ + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + */ + return (uint8_t) + _mesa_lroundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); +} + +/** + * Evaluate one component of packSnorm2x16. + */ +static uint16_t +pack_snorm_1x16(float x) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packSnorm2x16 + * ------------- + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) + */ + return (uint16_t) + _mesa_lroundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); +} + +/** + * Evaluate one component of unpackSnorm4x8. + */ +static float +unpack_snorm_1x8(uint8_t u) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackSnorm4x8 + * -------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + */ + return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component of unpackSnorm2x16. + */ +static float +unpack_snorm_1x16(uint16_t u) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackSnorm2x16 + * --------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) + */ + return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component packUnorm4x8. + */ +static uint8_t +pack_unorm_1x8(float x) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packUnorm4x8 + * ------------ + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) + */ + return (uint8_t) (int) _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); +} + +/** + * Evaluate one component packUnorm2x16. + */ +static uint16_t +pack_unorm_1x16(float x) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packUnorm2x16 + * ------------- + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) + */ + return (uint16_t) (int) + _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); +} + +/** + * Evaluate one component of unpackUnorm4x8. + */ +static float +unpack_unorm_1x8(uint8_t u) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackUnorm4x8 + * -------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm4x8: f / 255.0 + */ + return (float) u / 255.0f; +} + +/** + * Evaluate one component of unpackUnorm2x16. + */ +static float +unpack_unorm_1x16(uint16_t u) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackUnorm2x16 + * --------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm2x16: f / 65535.0 + */ + return (float) u / 65535.0f; +} + +/** + * Evaluate one component of packHalf2x16. + */ +static uint16_t +pack_half_1x16(float x) +{ + return _mesa_float_to_half(x); +} + +/** + * Evaluate one component of unpackHalf2x16. + */ +static float +unpack_half_1x16(uint16_t u) +{ + return _mesa_half_to_float(u); +} + +/** + * Get the constant that is ultimately referenced by an r-value, in a constant + * expression evaluation context. + * + * The offset is used when the reference is to a specific column of a matrix. + */ +static bool +constant_referenced(const ir_dereference *deref, + struct hash_table *variable_context, + ir_constant *&store, int &offset) +{ + store = NULL; + offset = 0; + + if (variable_context == NULL) + return false; + + switch (deref->ir_type) { + case ir_type_dereference_array: { + const ir_dereference_array *const da = + (const ir_dereference_array *) deref; + + ir_constant *const index_c = + da->array_index->constant_expression_value(variable_context); + + if (!index_c || !index_c->type->is_scalar() || !index_c->type->is_integer()) + break; + + const int index = index_c->type->base_type == GLSL_TYPE_INT ? + index_c->get_int_component(0) : + index_c->get_uint_component(0); + + ir_constant *substore; + int suboffset; + + const ir_dereference *const deref = da->array->as_dereference(); + if (!deref) + break; + + if (!constant_referenced(deref, variable_context, substore, suboffset)) + break; + + const glsl_type *const vt = da->array->type; + if (vt->is_array()) { + store = substore->get_array_element(index); + offset = 0; + } else if (vt->is_matrix()) { + store = substore; + offset = index * vt->vector_elements; + } else if (vt->is_vector()) { + store = substore; + offset = suboffset + index; + } + + break; + } + + case ir_type_dereference_record: { + const ir_dereference_record *const dr = + (const ir_dereference_record *) deref; + + const ir_dereference *const deref = dr->record->as_dereference(); + if (!deref) + break; + + ir_constant *substore; + int suboffset; + + if (!constant_referenced(deref, variable_context, substore, suboffset)) + break; + + /* Since we're dropping it on the floor... + */ + assert(suboffset == 0); + + store = substore->get_record_field(dr->field); + break; + } + + case ir_type_dereference_variable: { + const ir_dereference_variable *const dv = + (const ir_dereference_variable *) deref; + + store = (ir_constant *) hash_table_find(variable_context, dv->var); + break; + } + + default: + assert(!"Should not get here."); + break; + } + + return store != NULL; +} + + +ir_constant * +ir_rvalue::constant_expression_value(struct hash_table *) +{ + assert(this->type->is_error()); + return NULL; +} + +ir_constant * +ir_expression::constant_expression_value(struct hash_table *variable_context) +{ + if (this->type->is_error()) + return NULL; + + ir_constant *op[ARRAY_SIZE(this->operands)] = { NULL, }; + ir_constant_data data; + + memset(&data, 0, sizeof(data)); + + for (unsigned operand = 0; operand < this->get_num_operands(); operand++) { + op[operand] = this->operands[operand]->constant_expression_value(variable_context); + if (!op[operand]) + return NULL; + } + + if (op[1] != NULL) + switch (this->operation) { + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_ldexp: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + case ir_binop_vector_extract: + case ir_triop_csel: + case ir_triop_bitfield_extract: + break; + + default: + assert(op[0]->type->base_type == op[1]->type->base_type); + break; + } + + bool op0_scalar = op[0]->type->is_scalar(); + bool op1_scalar = op[1] != NULL && op[1]->type->is_scalar(); + + /* When iterating over a vector or matrix's components, we want to increase + * the loop counter. However, for scalars, we want to stay at 0. + */ + unsigned c0_inc = op0_scalar ? 0 : 1; + unsigned c1_inc = op1_scalar ? 0 : 1; + unsigned components; + if (op1_scalar || !op[1]) { + components = op[0]->type->components(); + } else { + components = op[1]->type->components(); + } + + void *ctx = ralloc_parent(this); + + /* Handle array operations here, rather than below. */ + if (op[0]->type->is_array()) { + assert(op[1] != NULL && op[1]->type->is_array()); + switch (this->operation) { + case ir_binop_all_equal: + return new(ctx) ir_constant(op[0]->has_value(op[1])); + case ir_binop_any_nequal: + return new(ctx) ir_constant(!op[0]->has_value(op[1])); + default: + break; + } + return NULL; + } + + switch (this->operation) { + case ir_unop_bit_not: + switch (op[0]->type->base_type) { + case GLSL_TYPE_INT: + for (unsigned c = 0; c < components; c++) + data.i[c] = ~ op[0]->value.i[c]; + break; + case GLSL_TYPE_UINT: + for (unsigned c = 0; c < components; c++) + data.u[c] = ~ op[0]->value.u[c]; + break; + default: + assert(0); + } + break; + + case ir_unop_logic_not: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) + data.b[c] = !op[0]->value.b[c]; + break; + + case ir_unop_f2i: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = (int) op[0]->value.f[c]; + } + break; + case ir_unop_f2u: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = (unsigned) op[0]->value.f[c]; + } + break; + case ir_unop_i2f: + assert(op[0]->type->base_type == GLSL_TYPE_INT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = (float) op[0]->value.i[c]; + } + break; + case ir_unop_u2f: + assert(op[0]->type->base_type == GLSL_TYPE_UINT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = (float) op[0]->value.u[c]; + } + break; + case ir_unop_b2f: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = op[0]->value.b[c] ? 1.0F : 0.0F; + } + break; + case ir_unop_f2b: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.b[c] = op[0]->value.f[c] != 0.0F ? true : false; + } + break; + case ir_unop_b2i: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.u[c] = op[0]->value.b[c] ? 1 : 0; + } + break; + case ir_unop_i2b: + assert(op[0]->type->is_integer()); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.b[c] = op[0]->value.u[c] ? true : false; + } + break; + case ir_unop_u2i: + assert(op[0]->type->base_type == GLSL_TYPE_UINT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = op[0]->value.u[c]; + } + break; + case ir_unop_i2u: + assert(op[0]->type->base_type == GLSL_TYPE_INT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.u[c] = op[0]->value.i[c]; + } + break; + case ir_unop_bitcast_i2f: + assert(op[0]->type->base_type == GLSL_TYPE_INT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = bitcast_u2f(op[0]->value.i[c]); + } + break; + case ir_unop_bitcast_f2i: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = bitcast_f2u(op[0]->value.f[c]); + } + break; + case ir_unop_bitcast_u2f: + assert(op[0]->type->base_type == GLSL_TYPE_UINT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = bitcast_u2f(op[0]->value.u[c]); + } + break; + case ir_unop_bitcast_f2u: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.u[c] = bitcast_f2u(op[0]->value.f[c]); + } + break; + case ir_unop_d2f: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = op[0]->value.d[c]; + } + break; + case ir_unop_f2d: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.d[c] = op[0]->value.f[c]; + } + break; + case ir_unop_d2i: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = op[0]->value.d[c]; + } + break; + case ir_unop_i2d: + assert(op[0]->type->base_type == GLSL_TYPE_INT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.d[c] = op[0]->value.i[c]; + } + break; + case ir_unop_d2u: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.u[c] = op[0]->value.d[c]; + } + break; + case ir_unop_u2d: + assert(op[0]->type->base_type == GLSL_TYPE_UINT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.d[c] = op[0]->value.u[c]; + } + break; + case ir_unop_d2b: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.b[c] = op[0]->value.d[c] != 0.0; + } + break; + case ir_unop_trunc: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = trunc(op[0]->value.d[c]); + else + data.f[c] = truncf(op[0]->value.f[c]); + } + break; + + case ir_unop_round_even: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = _mesa_roundeven(op[0]->value.d[c]); + else + data.f[c] = _mesa_roundevenf(op[0]->value.f[c]); + } + break; + + case ir_unop_ceil: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = ceil(op[0]->value.d[c]); + else + data.f[c] = ceilf(op[0]->value.f[c]); + } + break; + + case ir_unop_floor: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = floor(op[0]->value.d[c]); + else + data.f[c] = floorf(op[0]->value.f[c]); + } + break; + + case ir_unop_fract: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = 0; + break; + case GLSL_TYPE_INT: + data.i[c] = 0; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[0]->value.f[c] - floor(op[0]->value.f[c]); + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c] - floor(op[0]->value.d[c]); + break; + default: + assert(0); + } + } + break; + + case ir_unop_sin: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = sinf(op[0]->value.f[c]); + } + break; + + case ir_unop_cos: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = cosf(op[0]->value.f[c]); + } + break; + + case ir_unop_neg: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = -((int) op[0]->value.u[c]); + break; + case GLSL_TYPE_INT: + data.i[c] = -op[0]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = -op[0]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = -op[0]->value.d[c]; + break; + default: + assert(0); + } + } + break; + + case ir_unop_abs: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c]; + if (data.i[c] < 0) + data.i[c] = -data.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = fabs(op[0]->value.f[c]); + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = fabs(op[0]->value.d[c]); + break; + default: + assert(0); + } + } + break; + + case ir_unop_sign: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.i[c] > 0; + break; + case GLSL_TYPE_INT: + data.i[c] = (op[0]->value.i[c] > 0) - (op[0]->value.i[c] < 0); + break; + case GLSL_TYPE_FLOAT: + data.f[c] = float((op[0]->value.f[c] > 0)-(op[0]->value.f[c] < 0)); + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = double((op[0]->value.d[c] > 0)-(op[0]->value.d[c] < 0)); + break; + default: + assert(0); + } + } + break; + + case ir_unop_rcp: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + if (op[0]->value.u[c] != 0.0) + data.u[c] = 1 / op[0]->value.u[c]; + break; + case GLSL_TYPE_INT: + if (op[0]->value.i[c] != 0.0) + data.i[c] = 1 / op[0]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + if (op[0]->value.f[c] != 0.0) + data.f[c] = 1.0F / op[0]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + if (op[0]->value.d[c] != 0.0) + data.d[c] = 1.0 / op[0]->value.d[c]; + break; + default: + assert(0); + } + } + break; + + case ir_unop_rsq: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = 1.0 / sqrt(op[0]->value.d[c]); + else + data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]); + } + break; + + case ir_unop_sqrt: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = sqrt(op[0]->value.d[c]); + else + data.f[c] = sqrtf(op[0]->value.f[c]); + } + break; + + case ir_unop_exp: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = expf(op[0]->value.f[c]); + } + break; + + case ir_unop_exp2: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = exp2f(op[0]->value.f[c]); + } + break; + + case ir_unop_log: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = logf(op[0]->value.f[c]); + } + break; + + case ir_unop_log2: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = log2f(op[0]->value.f[c]); + } + break; + + case ir_unop_dFdx: + case ir_unop_dFdx_coarse: + case ir_unop_dFdx_fine: + case ir_unop_dFdy: + case ir_unop_dFdy_coarse: + case ir_unop_dFdy_fine: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = 0.0; + } + break; + + case ir_unop_pack_snorm_2x16: + assert(op[0]->type == glsl_type::vec2_type); + data.u[0] = pack_2x16(pack_snorm_1x16, + op[0]->value.f[0], + op[0]->value.f[1]); + break; + case ir_unop_pack_snorm_4x8: + assert(op[0]->type == glsl_type::vec4_type); + data.u[0] = pack_4x8(pack_snorm_1x8, + op[0]->value.f[0], + op[0]->value.f[1], + op[0]->value.f[2], + op[0]->value.f[3]); + break; + case ir_unop_unpack_snorm_2x16: + assert(op[0]->type == glsl_type::uint_type); + unpack_2x16(unpack_snorm_1x16, + op[0]->value.u[0], + &data.f[0], &data.f[1]); + break; + case ir_unop_unpack_snorm_4x8: + assert(op[0]->type == glsl_type::uint_type); + unpack_4x8(unpack_snorm_1x8, + op[0]->value.u[0], + &data.f[0], &data.f[1], &data.f[2], &data.f[3]); + break; + case ir_unop_pack_unorm_2x16: + assert(op[0]->type == glsl_type::vec2_type); + data.u[0] = pack_2x16(pack_unorm_1x16, + op[0]->value.f[0], + op[0]->value.f[1]); + break; + case ir_unop_pack_unorm_4x8: + assert(op[0]->type == glsl_type::vec4_type); + data.u[0] = pack_4x8(pack_unorm_1x8, + op[0]->value.f[0], + op[0]->value.f[1], + op[0]->value.f[2], + op[0]->value.f[3]); + break; + case ir_unop_unpack_unorm_2x16: + assert(op[0]->type == glsl_type::uint_type); + unpack_2x16(unpack_unorm_1x16, + op[0]->value.u[0], + &data.f[0], &data.f[1]); + break; + case ir_unop_unpack_unorm_4x8: + assert(op[0]->type == glsl_type::uint_type); + unpack_4x8(unpack_unorm_1x8, + op[0]->value.u[0], + &data.f[0], &data.f[1], &data.f[2], &data.f[3]); + break; + case ir_unop_pack_half_2x16: + assert(op[0]->type == glsl_type::vec2_type); + data.u[0] = pack_2x16(pack_half_1x16, + op[0]->value.f[0], + op[0]->value.f[1]); + break; + case ir_unop_unpack_half_2x16: + assert(op[0]->type == glsl_type::uint_type); + unpack_2x16(unpack_half_1x16, + op[0]->value.u[0], + &data.f[0], &data.f[1]); + break; + case ir_binop_pow: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = powf(op[0]->value.f[c], op[1]->value.f[c]); + } + break; + + case ir_binop_dot: + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[0] = dot_d(op[0], op[1]); + else + data.f[0] = dot_f(op[0], op[1]); + break; + + case ir_binop_min: + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = MIN2(op[0]->value.u[c0], op[1]->value.u[c1]); + break; + case GLSL_TYPE_INT: + data.i[c] = MIN2(op[0]->value.i[c0], op[1]->value.i[c1]); + break; + case GLSL_TYPE_FLOAT: + data.f[c] = MIN2(op[0]->value.f[c0], op[1]->value.f[c1]); + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = MIN2(op[0]->value.d[c0], op[1]->value.d[c1]); + break; + default: + assert(0); + } + } + + break; + case ir_binop_max: + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = MAX2(op[0]->value.u[c0], op[1]->value.u[c1]); + break; + case GLSL_TYPE_INT: + data.i[c] = MAX2(op[0]->value.i[c0], op[1]->value.i[c1]); + break; + case GLSL_TYPE_FLOAT: + data.f[c] = MAX2(op[0]->value.f[c0], op[1]->value.f[c1]); + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = MAX2(op[0]->value.d[c0], op[1]->value.d[c1]); + break; + default: + assert(0); + } + } + break; + + case ir_binop_add: + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] + op[1]->value.u[c1]; + break; + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] + op[1]->value.i[c1]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[0]->value.f[c0] + op[1]->value.f[c1]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] + op[1]->value.d[c1]; + break; + default: + assert(0); + } + } + + break; + case ir_binop_sub: + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] - op[1]->value.u[c1]; + break; + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] - op[1]->value.i[c1]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1]; + break; + default: + assert(0); + } + } + + break; + case ir_binop_mul: + /* Check for equal types, or unequal types involving scalars */ + if ((op[0]->type == op[1]->type && !op[0]->type->is_matrix()) + || op0_scalar || op1_scalar) { + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] * op[1]->value.u[c1]; + break; + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] * op[1]->value.i[c1]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[0]->value.f[c0] * op[1]->value.f[c1]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] * op[1]->value.d[c1]; + break; + default: + assert(0); + } + } + } else { + assert(op[0]->type->is_matrix() || op[1]->type->is_matrix()); + + /* Multiply an N-by-M matrix with an M-by-P matrix. Since either + * matrix can be a GLSL vector, either N or P can be 1. + * + * For vec*mat, the vector is treated as a row vector. This + * means the vector is a 1-row x M-column matrix. + * + * For mat*vec, the vector is treated as a column vector. Since + * matrix_columns is 1 for vectors, this just works. + */ + const unsigned n = op[0]->type->is_vector() + ? 1 : op[0]->type->vector_elements; + const unsigned m = op[1]->type->vector_elements; + const unsigned p = op[1]->type->matrix_columns; + for (unsigned j = 0; j < p; j++) { + for (unsigned i = 0; i < n; i++) { + for (unsigned k = 0; k < m; k++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[i+n*j] += op[0]->value.d[i+n*k]*op[1]->value.d[k+m*j]; + else + data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j]; + } + } + } + } + + break; + case ir_binop_div: + /* FINISHME: Emit warning when division-by-zero is detected. */ + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + if (op[1]->value.u[c1] == 0) { + data.u[c] = 0; + } else { + data.u[c] = op[0]->value.u[c0] / op[1]->value.u[c1]; + } + break; + case GLSL_TYPE_INT: + if (op[1]->value.i[c1] == 0) { + data.i[c] = 0; + } else { + data.i[c] = op[0]->value.i[c0] / op[1]->value.i[c1]; + } + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[0]->value.f[c0] / op[1]->value.f[c1]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] / op[1]->value.d[c1]; + break; + default: + assert(0); + } + } + + break; + case ir_binop_mod: + /* FINISHME: Emit warning when division-by-zero is detected. */ + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + if (op[1]->value.u[c1] == 0) { + data.u[c] = 0; + } else { + data.u[c] = op[0]->value.u[c0] % op[1]->value.u[c1]; + } + break; + case GLSL_TYPE_INT: + if (op[1]->value.i[c1] == 0) { + data.i[c] = 0; + } else { + data.i[c] = op[0]->value.i[c0] % op[1]->value.i[c1]; + } + break; + case GLSL_TYPE_FLOAT: + /* We don't use fmod because it rounds toward zero; GLSL specifies + * the use of floor. + */ + data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1] + * floorf(op[0]->value.f[c0] / op[1]->value.f[c1]); + break; + case GLSL_TYPE_DOUBLE: + /* We don't use fmod because it rounds toward zero; GLSL specifies + * the use of floor. + */ + data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1] + * floor(op[0]->value.d[c0] / op[1]->value.d[c1]); + break; + default: + assert(0); + } + } + + break; + + case ir_binop_logic_and: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) + data.b[c] = op[0]->value.b[c] && op[1]->value.b[c]; + break; + case ir_binop_logic_xor: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) + data.b[c] = op[0]->value.b[c] ^ op[1]->value.b[c]; + break; + case ir_binop_logic_or: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) + data.b[c] = op[0]->value.b[c] || op[1]->value.b[c]; + break; + + case ir_binop_less: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] < op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] < op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] < op[1]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] < op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_greater: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] > op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] > op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] > op[1]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] > op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_lequal: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] <= op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] <= op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] <= op[1]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] <= op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_gequal: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] >= op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] >= op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] >= op[1]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] >= op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_equal: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < components; c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] == op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] == op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] == op[1]->value.f[c]; + break; + case GLSL_TYPE_BOOL: + data.b[c] = op[0]->value.b[c] == op[1]->value.b[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] == op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_nequal: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < components; c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] != op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] != op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] != op[1]->value.f[c]; + break; + case GLSL_TYPE_BOOL: + data.b[c] = op[0]->value.b[c] != op[1]->value.b[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] != op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_all_equal: + data.b[0] = op[0]->has_value(op[1]); + break; + case ir_binop_any_nequal: + data.b[0] = !op[0]->has_value(op[1]); + break; + + case ir_binop_lshift: + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + if (op[0]->type->base_type == GLSL_TYPE_INT && + op[1]->type->base_type == GLSL_TYPE_INT) { + data.i[c] = op[0]->value.i[c0] << op[1]->value.i[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_INT && + op[1]->type->base_type == GLSL_TYPE_UINT) { + data.i[c] = op[0]->value.i[c0] << op[1]->value.u[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_UINT && + op[1]->type->base_type == GLSL_TYPE_INT) { + data.u[c] = op[0]->value.u[c0] << op[1]->value.i[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_UINT && + op[1]->type->base_type == GLSL_TYPE_UINT) { + data.u[c] = op[0]->value.u[c0] << op[1]->value.u[c1]; + } + } + break; + + case ir_binop_rshift: + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + if (op[0]->type->base_type == GLSL_TYPE_INT && + op[1]->type->base_type == GLSL_TYPE_INT) { + data.i[c] = op[0]->value.i[c0] >> op[1]->value.i[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_INT && + op[1]->type->base_type == GLSL_TYPE_UINT) { + data.i[c] = op[0]->value.i[c0] >> op[1]->value.u[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_UINT && + op[1]->type->base_type == GLSL_TYPE_INT) { + data.u[c] = op[0]->value.u[c0] >> op[1]->value.i[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_UINT && + op[1]->type->base_type == GLSL_TYPE_UINT) { + data.u[c] = op[0]->value.u[c0] >> op[1]->value.u[c1]; + } + } + break; + + case ir_binop_bit_and: + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] & op[1]->value.i[c1]; + break; + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] & op[1]->value.u[c1]; + break; + default: + assert(0); + } + } + break; + + case ir_binop_bit_or: + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] | op[1]->value.i[c1]; + break; + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] | op[1]->value.u[c1]; + break; + default: + assert(0); + } + } + break; + + case ir_binop_vector_extract: { + const int c = CLAMP(op[1]->value.i[0], 0, + (int) op[0]->type->vector_elements - 1); + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[0] = op[0]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.i[0] = op[0]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.f[0] = op[0]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.d[0] = op[0]->value.d[c]; + break; + case GLSL_TYPE_BOOL: + data.b[0] = op[0]->value.b[c]; + break; + default: + assert(0); + } + break; + } + + case ir_binop_bit_xor: + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] ^ op[1]->value.i[c1]; + break; + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] ^ op[1]->value.u[c1]; + break; + default: + assert(0); + } + } + break; + + case ir_unop_bitfield_reverse: + /* http://graphics.stanford.edu/~seander/bithacks.html#BitReverseObvious */ + for (unsigned c = 0; c < components; c++) { + unsigned int v = op[0]->value.u[c]; // input bits to be reversed + unsigned int r = v; // r will be reversed bits of v; first get LSB of v + int s = sizeof(v) * CHAR_BIT - 1; // extra shift needed at end + + for (v >>= 1; v; v >>= 1) { + r <<= 1; + r |= v & 1; + s--; + } + r <<= s; // shift when v's highest bits are zero + + data.u[c] = r; + } + break; + + case ir_unop_bit_count: + for (unsigned c = 0; c < components; c++) { + unsigned count = 0; + unsigned v = op[0]->value.u[c]; + + for (; v; count++) { + v &= v - 1; + } + data.u[c] = count; + } + break; + + case ir_unop_find_msb: + for (unsigned c = 0; c < components; c++) { + int v = op[0]->value.i[c]; + + if (v == 0 || (op[0]->type->base_type == GLSL_TYPE_INT && v == -1)) + data.i[c] = -1; + else { + int count = 0; + unsigned top_bit = op[0]->type->base_type == GLSL_TYPE_UINT + ? 0 : v & (1u << 31); + + while (((v & (1u << 31)) == top_bit) && count != 32) { + count++; + v <<= 1; + } + + data.i[c] = 31 - count; + } + } + break; + + case ir_unop_find_lsb: + for (unsigned c = 0; c < components; c++) { + if (op[0]->value.i[c] == 0) + data.i[c] = -1; + else { + unsigned pos = 0; + unsigned v = op[0]->value.u[c]; + + for (; !(v & 1); v >>= 1) { + pos++; + } + data.u[c] = pos; + } + } + break; + + case ir_unop_saturate: + for (unsigned c = 0; c < components; c++) { + data.f[c] = CLAMP(op[0]->value.f[c], 0.0f, 1.0f); + } + break; + case ir_unop_pack_double_2x32: { + /* XXX needs to be checked on big-endian */ + uint64_t temp; + temp = (uint64_t)op[0]->value.u[0] | ((uint64_t)op[0]->value.u[1] << 32); + data.d[0] = *(double *)&temp; + + break; + } + case ir_unop_unpack_double_2x32: + /* XXX needs to be checked on big-endian */ + data.u[0] = *(uint32_t *)&op[0]->value.d[0]; + data.u[1] = *((uint32_t *)&op[0]->value.d[0] + 1); + break; + + case ir_triop_bitfield_extract: { + for (unsigned c = 0; c < components; c++) { + int offset = op[1]->value.i[c]; + int bits = op[2]->value.i[c]; + + if (bits == 0) + data.u[c] = 0; + else if (offset < 0 || bits < 0) + data.u[c] = 0; /* Undefined, per spec. */ + else if (offset + bits > 32) + data.u[c] = 0; /* Undefined, per spec. */ + else { + if (op[0]->type->base_type == GLSL_TYPE_INT) { + /* int so that the right shift will sign-extend. */ + int value = op[0]->value.i[c]; + value <<= 32 - bits - offset; + value >>= 32 - bits; + data.i[c] = value; + } else { + unsigned value = op[0]->value.u[c]; + value <<= 32 - bits - offset; + value >>= 32 - bits; + data.u[c] = value; + } + } + } + break; + } + + case ir_binop_ldexp: + for (unsigned c = 0; c < components; c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) { + data.d[c] = ldexp(op[0]->value.d[c], op[1]->value.i[c]); + /* Flush subnormal values to zero. */ + if (!isnormal(data.d[c])) + data.d[c] = copysign(0.0, op[0]->value.d[c]); + } else { + data.f[c] = ldexpf(op[0]->value.f[c], op[1]->value.i[c]); + /* Flush subnormal values to zero. */ + if (!isnormal(data.f[c])) + data.f[c] = copysignf(0.0f, op[0]->value.f[c]); + } + } + break; + + case ir_triop_fma: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT || + op[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[1]->type->base_type == GLSL_TYPE_FLOAT || + op[1]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[2]->type->base_type == GLSL_TYPE_FLOAT || + op[2]->type->base_type == GLSL_TYPE_DOUBLE); + + for (unsigned c = 0; c < components; c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = op[0]->value.d[c] * op[1]->value.d[c] + + op[2]->value.d[c]; + else + data.f[c] = op[0]->value.f[c] * op[1]->value.f[c] + + op[2]->value.f[c]; + } + break; + + case ir_triop_lrp: { + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT || + op[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[1]->type->base_type == GLSL_TYPE_FLOAT || + op[1]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[2]->type->base_type == GLSL_TYPE_FLOAT || + op[2]->type->base_type == GLSL_TYPE_DOUBLE); + + unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1; + for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = op[0]->value.d[c] * (1.0 - op[2]->value.d[c2]) + + (op[1]->value.d[c] * op[2]->value.d[c2]); + else + data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) + + (op[1]->value.f[c] * op[2]->value.f[c2]); + } + break; + } + + case ir_triop_csel: + for (unsigned c = 0; c < components; c++) { + if (op[1]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = op[0]->value.b[c] ? op[1]->value.d[c] + : op[2]->value.d[c]; + else + data.u[c] = op[0]->value.b[c] ? op[1]->value.u[c] + : op[2]->value.u[c]; + } + break; + + case ir_triop_vector_insert: { + const unsigned idx = op[2]->value.u[0]; + + memcpy(&data, &op[0]->value, sizeof(data)); + + switch (this->type->base_type) { + case GLSL_TYPE_INT: + data.i[idx] = op[1]->value.i[0]; + break; + case GLSL_TYPE_UINT: + data.u[idx] = op[1]->value.u[0]; + break; + case GLSL_TYPE_FLOAT: + data.f[idx] = op[1]->value.f[0]; + break; + case GLSL_TYPE_BOOL: + data.b[idx] = op[1]->value.b[0]; + break; + case GLSL_TYPE_DOUBLE: + data.d[idx] = op[1]->value.d[0]; + break; + default: + assert(!"Should not get here."); + break; + } + break; + } + + case ir_quadop_bitfield_insert: { + for (unsigned c = 0; c < components; c++) { + int offset = op[2]->value.i[c]; + int bits = op[3]->value.i[c]; + + if (bits == 0) + data.u[c] = op[0]->value.u[c]; + else if (offset < 0 || bits < 0) + data.u[c] = 0; /* Undefined, per spec. */ + else if (offset + bits > 32) + data.u[c] = 0; /* Undefined, per spec. */ + else { + unsigned insert_mask = ((1ull << bits) - 1) << offset; + + unsigned insert = op[1]->value.u[c]; + insert <<= offset; + insert &= insert_mask; + + unsigned base = op[0]->value.u[c]; + base &= ~insert_mask; + + data.u[c] = base | insert; + } + } + break; + } + + case ir_quadop_vector: + for (unsigned c = 0; c < this->type->vector_elements; c++) { + switch (this->type->base_type) { + case GLSL_TYPE_INT: + data.i[c] = op[c]->value.i[0]; + break; + case GLSL_TYPE_UINT: + data.u[c] = op[c]->value.u[0]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[c]->value.f[0]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[c]->value.d[0]; + break; + default: + assert(0); + } + } + break; + + default: + /* FINISHME: Should handle all expression types. */ + return NULL; + } + + return new(ctx) ir_constant(this->type, &data); +} + + +ir_constant * +ir_texture::constant_expression_value(struct hash_table *) +{ + /* texture lookups aren't constant expressions */ + return NULL; +} + + +ir_constant * +ir_swizzle::constant_expression_value(struct hash_table *variable_context) +{ + ir_constant *v = this->val->constant_expression_value(variable_context); + + if (v != NULL) { + ir_constant_data data = { { 0 } }; + + const unsigned swiz_idx[4] = { + this->mask.x, this->mask.y, this->mask.z, this->mask.w + }; + + for (unsigned i = 0; i < this->mask.num_components; i++) { + switch (v->type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: data.u[i] = v->value.u[swiz_idx[i]]; break; + case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break; + case GLSL_TYPE_BOOL: data.b[i] = v->value.b[swiz_idx[i]]; break; + case GLSL_TYPE_DOUBLE:data.d[i] = v->value.d[swiz_idx[i]]; break; + default: assert(!"Should not get here."); break; + } + } + + void *ctx = ralloc_parent(this); + return new(ctx) ir_constant(this->type, &data); + } + return NULL; +} + + +ir_constant * +ir_dereference_variable::constant_expression_value(struct hash_table *variable_context) +{ + assert(var); + + /* Give priority to the context hashtable, if it exists */ + if (variable_context) { + ir_constant *value = (ir_constant *)hash_table_find(variable_context, var); + if(value) + return value; + } + + /* The constant_value of a uniform variable is its initializer, + * not the lifetime constant value of the uniform. + */ + if (var->data.mode == ir_var_uniform) + return NULL; + + if (!var->constant_value) + return NULL; + + return var->constant_value->clone(ralloc_parent(var), NULL); +} + + +ir_constant * +ir_dereference_array::constant_expression_value(struct hash_table *variable_context) +{ + ir_constant *array = this->array->constant_expression_value(variable_context); + ir_constant *idx = this->array_index->constant_expression_value(variable_context); + + if ((array != NULL) && (idx != NULL)) { + void *ctx = ralloc_parent(this); + if (array->type->is_matrix()) { + /* Array access of a matrix results in a vector. + */ + const unsigned column = idx->value.u[0]; + + const glsl_type *const column_type = array->type->column_type(); + + /* Offset in the constant matrix to the first element of the column + * to be extracted. + */ + const unsigned mat_idx = column * column_type->vector_elements; + + ir_constant_data data = { { 0 } }; + + switch (column_type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + for (unsigned i = 0; i < column_type->vector_elements; i++) + data.u[i] = array->value.u[mat_idx + i]; + + break; + + case GLSL_TYPE_FLOAT: + for (unsigned i = 0; i < column_type->vector_elements; i++) + data.f[i] = array->value.f[mat_idx + i]; + + break; + + case GLSL_TYPE_DOUBLE: + for (unsigned i = 0; i < column_type->vector_elements; i++) + data.d[i] = array->value.d[mat_idx + i]; + + break; + + default: + assert(!"Should not get here."); + break; + } + + return new(ctx) ir_constant(column_type, &data); + } else if (array->type->is_vector()) { + const unsigned component = idx->value.u[0]; + + return new(ctx) ir_constant(array, component); + } else { + const unsigned index = idx->value.u[0]; + return array->get_array_element(index)->clone(ctx, NULL); + } + } + return NULL; +} + + +ir_constant * +ir_dereference_record::constant_expression_value(struct hash_table *) +{ + ir_constant *v = this->record->constant_expression_value(); + + return (v != NULL) ? v->get_record_field(this->field) : NULL; +} + + +ir_constant * +ir_assignment::constant_expression_value(struct hash_table *) +{ + /* FINISHME: Handle CEs involving assignment (return RHS) */ + return NULL; +} + + +ir_constant * +ir_constant::constant_expression_value(struct hash_table *) +{ + return this; +} + + +ir_constant * +ir_call::constant_expression_value(struct hash_table *variable_context) +{ + return this->callee->constant_expression_value(&this->actual_parameters, variable_context); +} + + +bool ir_function_signature::constant_expression_evaluate_expression_list(const struct exec_list &body, + struct hash_table *variable_context, + ir_constant **result) +{ + foreach_in_list(ir_instruction, inst, &body) { + switch(inst->ir_type) { + + /* (declare () type symbol) */ + case ir_type_variable: { + ir_variable *var = inst->as_variable(); + hash_table_insert(variable_context, ir_constant::zero(this, var->type), var); + break; + } + + /* (assign [condition] (write-mask) (ref) (value)) */ + case ir_type_assignment: { + ir_assignment *asg = inst->as_assignment(); + if (asg->condition) { + ir_constant *cond = asg->condition->constant_expression_value(variable_context); + if (!cond) + return false; + if (!cond->get_bool_component(0)) + break; + } + + ir_constant *store = NULL; + int offset = 0; + + if (!constant_referenced(asg->lhs, variable_context, store, offset)) + return false; + + ir_constant *value = asg->rhs->constant_expression_value(variable_context); + + if (!value) + return false; + + store->copy_masked_offset(value, offset, asg->write_mask); + break; + } + + /* (return (expression)) */ + case ir_type_return: + assert (result); + *result = inst->as_return()->value->constant_expression_value(variable_context); + return *result != NULL; + + /* (call name (ref) (params))*/ + case ir_type_call: { + ir_call *call = inst->as_call(); + + /* Just say no to void functions in constant expressions. We + * don't need them at that point. + */ + + if (!call->return_deref) + return false; + + ir_constant *store = NULL; + int offset = 0; + + if (!constant_referenced(call->return_deref, variable_context, + store, offset)) + return false; + + ir_constant *value = call->constant_expression_value(variable_context); + + if(!value) + return false; + + store->copy_offset(value, offset); + break; + } + + /* (if condition (then-instructions) (else-instructions)) */ + case ir_type_if: { + ir_if *iif = inst->as_if(); + + ir_constant *cond = iif->condition->constant_expression_value(variable_context); + if (!cond || !cond->type->is_boolean()) + return false; + + exec_list &branch = cond->get_bool_component(0) ? iif->then_instructions : iif->else_instructions; + + *result = NULL; + if (!constant_expression_evaluate_expression_list(branch, variable_context, result)) + return false; + + /* If there was a return in the branch chosen, drop out now. */ + if (*result) + return true; + + break; + } + + /* Every other expression type, we drop out. */ + default: + return false; + } + } + + /* Reaching the end of the block is not an error condition */ + if (result) + *result = NULL; + + return true; +} + +ir_constant * +ir_function_signature::constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context) +{ + const glsl_type *type = this->return_type; + if (type == glsl_type::void_type) + return NULL; + + /* From the GLSL 1.20 spec, page 23: + * "Function calls to user-defined functions (non-built-in functions) + * cannot be used to form constant expressions." + */ + if (!this->is_builtin()) + return NULL; + + /* + * Of the builtin functions, only the texture lookups and the noise + * ones must not be used in constant expressions. They all include + * specific opcodes so they don't need to be special-cased at this + * point. + */ + + /* Initialize the table of dereferencable names with the function + * parameters. Verify their const-ness on the way. + * + * We expect the correctness of the number of parameters to have + * been checked earlier. + */ + hash_table *deref_hash = hash_table_ctor(8, hash_table_pointer_hash, + hash_table_pointer_compare); + + /* If "origin" is non-NULL, then the function body is there. So we + * have to use the variable objects from the object with the body, + * but the parameter instanciation on the current object. + */ + const exec_node *parameter_info = origin ? origin->parameters.head : parameters.head; + + foreach_in_list(ir_rvalue, n, actual_parameters) { + ir_constant *constant = n->constant_expression_value(variable_context); + if (constant == NULL) { + hash_table_dtor(deref_hash); + return NULL; + } + + + ir_variable *var = (ir_variable *)parameter_info; + hash_table_insert(deref_hash, constant, var); + + parameter_info = parameter_info->next; + } + + ir_constant *result = NULL; + + /* Now run the builtin function until something non-constant + * happens or we get the result. + */ + if (constant_expression_evaluate_expression_list(origin ? origin->body : body, deref_hash, &result) && result) + result = result->clone(ralloc_parent(this), NULL); + + hash_table_dtor(deref_hash); + + return result; +} diff --git a/src/compiler/glsl/ir_equals.cpp b/src/compiler/glsl/ir_equals.cpp new file mode 100644 index 0000000..b86f4ea --- /dev/null +++ b/src/compiler/glsl/ir_equals.cpp @@ -0,0 +1,211 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" + +/** + * Helper for checking equality when one instruction might be NULL, since you + * can't access a's vtable in that case. + */ +static bool +possibly_null_equals(const ir_instruction *a, const ir_instruction *b, + enum ir_node_type ignore) +{ + if (!a || !b) + return !a && !b; + + return a->equals(b, ignore); +} + +/** + * The base equality function: Return not equal for anything we don't know + * about. + */ +bool +ir_instruction::equals(const ir_instruction *, enum ir_node_type) const +{ + return false; +} + +bool +ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const +{ + const ir_constant *other = ir->as_constant(); + if (!other) + return false; + + if (type != other->type) + return false; + + for (unsigned i = 0; i < type->components(); i++) { + if (type->base_type == GLSL_TYPE_DOUBLE) { + if (value.d[i] != other->value.d[i]) + return false; + } else { + if (value.u[i] != other->value.u[i]) + return false; + } + } + + return true; +} + +bool +ir_dereference_variable::equals(const ir_instruction *ir, + enum ir_node_type) const +{ + const ir_dereference_variable *other = ir->as_dereference_variable(); + if (!other) + return false; + + return var == other->var; +} + +bool +ir_dereference_array::equals(const ir_instruction *ir, + enum ir_node_type ignore) const +{ + const ir_dereference_array *other = ir->as_dereference_array(); + if (!other) + return false; + + if (type != other->type) + return false; + + if (!array->equals(other->array, ignore)) + return false; + + if (!array_index->equals(other->array_index, ignore)) + return false; + + return true; +} + +bool +ir_swizzle::equals(const ir_instruction *ir, + enum ir_node_type ignore) const +{ + const ir_swizzle *other = ir->as_swizzle(); + if (!other) + return false; + + if (type != other->type) + return false; + + if (ignore != ir_type_swizzle) { + if (mask.x != other->mask.x || + mask.y != other->mask.y || + mask.z != other->mask.z || + mask.w != other->mask.w) { + return false; + } + } + + return val->equals(other->val, ignore); +} + +bool +ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const +{ + const ir_texture *other = ir->as_texture(); + if (!other) + return false; + + if (type != other->type) + return false; + + if (op != other->op) + return false; + + if (!possibly_null_equals(coordinate, other->coordinate, ignore)) + return false; + + if (!possibly_null_equals(projector, other->projector, ignore)) + return false; + + if (!possibly_null_equals(shadow_comparitor, other->shadow_comparitor, ignore)) + return false; + + if (!possibly_null_equals(offset, other->offset, ignore)) + return false; + + if (!sampler->equals(other->sampler, ignore)) + return false; + + switch (op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + if (!lod_info.bias->equals(other->lod_info.bias, ignore)) + return false; + break; + case ir_txl: + case ir_txf: + case ir_txs: + if (!lod_info.lod->equals(other->lod_info.lod, ignore)) + return false; + break; + case ir_txd: + if (!lod_info.grad.dPdx->equals(other->lod_info.grad.dPdx, ignore) || + !lod_info.grad.dPdy->equals(other->lod_info.grad.dPdy, ignore)) + return false; + break; + case ir_txf_ms: + if (!lod_info.sample_index->equals(other->lod_info.sample_index, ignore)) + return false; + break; + case ir_tg4: + if (!lod_info.component->equals(other->lod_info.component, ignore)) + return false; + break; + default: + assert(!"Unrecognized texture op"); + } + + return true; +} + +bool +ir_expression::equals(const ir_instruction *ir, enum ir_node_type ignore) const +{ + const ir_expression *other = ir->as_expression(); + if (!other) + return false; + + if (type != other->type) + return false; + + if (operation != other->operation) + return false; + + for (unsigned i = 0; i < get_num_operands(); i++) { + if (!operands[i]->equals(other->operands[i], ignore)) + return false; + } + + return true; +} diff --git a/src/compiler/glsl/ir_expression_flattening.cpp b/src/compiler/glsl/ir_expression_flattening.cpp new file mode 100644 index 0000000..c13ae81 --- /dev/null +++ b/src/compiler/glsl/ir_expression_flattening.cpp @@ -0,0 +1,86 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_expression_flattening.cpp + * + * Takes the leaves of expression trees and makes them dereferences of + * assignments of the leaves to temporaries, according to a predicate. + * + * This is used for breaking down matrix operations, where it's easier to + * create a temporary and work on each of its vector components individually. + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" +#include "ir_expression_flattening.h" + +class ir_expression_flattening_visitor : public ir_rvalue_visitor { +public: + ir_expression_flattening_visitor(bool (*predicate)(ir_instruction *ir)) + { + this->predicate = predicate; + } + + virtual ~ir_expression_flattening_visitor() + { + /* empty */ + } + + void handle_rvalue(ir_rvalue **rvalue); + bool (*predicate)(ir_instruction *ir); +}; + +void +do_expression_flattening(exec_list *instructions, + bool (*predicate)(ir_instruction *ir)) +{ + ir_expression_flattening_visitor v(predicate); + + foreach_in_list(ir_instruction, ir, instructions) { + ir->accept(&v); + } +} + +void +ir_expression_flattening_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + ir_variable *var; + ir_assignment *assign; + ir_rvalue *ir = *rvalue; + + if (!ir || !this->predicate(ir)) + return; + + void *ctx = ralloc_parent(ir); + + var = new(ctx) ir_variable(ir->type, "flattening_tmp", ir_var_temporary); + base_ir->insert_before(var); + + assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var), + ir, + NULL); + base_ir->insert_before(assign); + + *rvalue = new(ctx) ir_dereference_variable(var); +} diff --git a/src/compiler/glsl/ir_expression_flattening.h b/src/compiler/glsl/ir_expression_flattening.h new file mode 100644 index 0000000..2eda159 --- /dev/null +++ b/src/compiler/glsl/ir_expression_flattening.h @@ -0,0 +1,38 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +/** + * \file ir_expression_flattening.h + * + * Takes the leaves of expression trees and makes them dereferences of + * assignments of the leaves to temporaries, according to a predicate. + * + * This is used for automatic function inlining, where we want to take + * an expression containing a call and move the call out to its own + * assignment so that we can inline it at the appropriate place in the + * instruction stream. + */ + +void do_expression_flattening(exec_list *instructions, + bool (*predicate)(ir_instruction *ir)); diff --git a/src/compiler/glsl/ir_function.cpp b/src/compiler/glsl/ir_function.cpp new file mode 100644 index 0000000..0b4cb4b --- /dev/null +++ b/src/compiler/glsl/ir_function.cpp @@ -0,0 +1,404 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "compiler/glsl_types.h" +#include "ir.h" +#include "glsl_parser_extras.h" +#include "main/errors.h" + +typedef enum { + PARAMETER_LIST_NO_MATCH, + PARAMETER_LIST_EXACT_MATCH, + PARAMETER_LIST_INEXACT_MATCH /*< Match requires implicit conversion. */ +} parameter_list_match_t; + +/** + * \brief Check if two parameter lists match. + * + * \param list_a Parameters of the function definition. + * \param list_b Actual parameters passed to the function. + * \see matching_signature() + */ +static parameter_list_match_t +parameter_lists_match(_mesa_glsl_parse_state *state, + const exec_list *list_a, const exec_list *list_b) +{ + const exec_node *node_a = list_a->head; + const exec_node *node_b = list_b->head; + + /* This is set to true if there is an inexact match requiring an implicit + * conversion. */ + bool inexact_match = false; + + for (/* empty */ + ; !node_a->is_tail_sentinel() + ; node_a = node_a->next, node_b = node_b->next) { + /* If all of the parameters from the other parameter list have been + * exhausted, the lists have different length and, by definition, + * do not match. + */ + if (node_b->is_tail_sentinel()) + return PARAMETER_LIST_NO_MATCH; + + + const ir_variable *const param = (ir_variable *) node_a; + const ir_rvalue *const actual = (ir_rvalue *) node_b; + + if (param->type == actual->type) + continue; + + /* Try to find an implicit conversion from actual to param. */ + inexact_match = true; + switch ((enum ir_variable_mode)(param->data.mode)) { + case ir_var_auto: + case ir_var_uniform: + case ir_var_shader_storage: + case ir_var_temporary: + /* These are all error conditions. It is invalid for a parameter to + * a function to be declared as auto (not in, out, or inout) or + * as uniform. + */ + assert(0); + return PARAMETER_LIST_NO_MATCH; + + case ir_var_const_in: + case ir_var_function_in: + if (!actual->type->can_implicitly_convert_to(param->type, state)) + return PARAMETER_LIST_NO_MATCH; + break; + + case ir_var_function_out: + if (!param->type->can_implicitly_convert_to(actual->type, state)) + return PARAMETER_LIST_NO_MATCH; + break; + + case ir_var_function_inout: + /* Since there are no bi-directional automatic conversions (e.g., + * there is int -> float but no float -> int), inout parameters must + * be exact matches. + */ + return PARAMETER_LIST_NO_MATCH; + + default: + assert(false); + return PARAMETER_LIST_NO_MATCH; + } + } + + /* If all of the parameters from the other parameter list have been + * exhausted, the lists have different length and, by definition, do not + * match. + */ + if (!node_b->is_tail_sentinel()) + return PARAMETER_LIST_NO_MATCH; + + if (inexact_match) + return PARAMETER_LIST_INEXACT_MATCH; + else + return PARAMETER_LIST_EXACT_MATCH; +} + + +/* Classes of parameter match, sorted (mostly) best matches first. + * See is_better_parameter_match() below for the exceptions. + * */ +typedef enum { + PARAMETER_EXACT_MATCH, + PARAMETER_FLOAT_TO_DOUBLE, + PARAMETER_INT_TO_FLOAT, + PARAMETER_INT_TO_DOUBLE, + PARAMETER_OTHER_CONVERSION, +} parameter_match_t; + + +static parameter_match_t +get_parameter_match_type(const ir_variable *param, + const ir_rvalue *actual) +{ + const glsl_type *from_type; + const glsl_type *to_type; + + if (param->data.mode == ir_var_function_out) { + from_type = param->type; + to_type = actual->type; + } else { + from_type = actual->type; + to_type = param->type; + } + + if (from_type == to_type) + return PARAMETER_EXACT_MATCH; + + if (to_type->base_type == GLSL_TYPE_DOUBLE) { + if (from_type->base_type == GLSL_TYPE_FLOAT) + return PARAMETER_FLOAT_TO_DOUBLE; + return PARAMETER_INT_TO_DOUBLE; + } + + if (to_type->base_type == GLSL_TYPE_FLOAT) + return PARAMETER_INT_TO_FLOAT; + + /* int -> uint and any other oddball conversions */ + return PARAMETER_OTHER_CONVERSION; +} + + +static bool +is_better_parameter_match(parameter_match_t a_match, + parameter_match_t b_match) +{ + /* From section 6.1 of the GLSL 4.00 spec (and the ARB_gpu_shader5 spec): + * + * 1. An exact match is better than a match involving any implicit + * conversion. + * + * 2. A match involving an implicit conversion from float to double + * is better than match involving any other implicit conversion. + * + * [XXX: Not in GLSL 4.0: Only in ARB_gpu_shader5: + * 3. A match involving an implicit conversion from either int or uint + * to float is better than a match involving an implicit conversion + * from either int or uint to double.] + * + * If none of the rules above apply to a particular pair of conversions, + * neither conversion is considered better than the other. + * + * -- + * + * Notably, the int->uint conversion is *not* considered to be better + * or worse than int/uint->float or int/uint->double. + */ + + if (a_match >= PARAMETER_INT_TO_FLOAT && b_match == PARAMETER_OTHER_CONVERSION) + return false; + + return a_match < b_match; +} + + +static bool +is_best_inexact_overload(const exec_list *actual_parameters, + ir_function_signature **matches, + int num_matches, + ir_function_signature *sig) +{ + /* From section 6.1 of the GLSL 4.00 spec (and the ARB_gpu_shader5 spec): + * + * "A function definition A is considered a better + * match than function definition B if: + * + * * for at least one function argument, the conversion for that argument + * in A is better than the corresponding conversion in B; and + * + * * there is no function argument for which the conversion in B is better + * than the corresponding conversion in A. + * + * If a single function definition is considered a better match than every + * other matching function definition, it will be used. Otherwise, a + * semantic error occurs and the shader will fail to compile." + */ + for (ir_function_signature **other = matches; + other < matches + num_matches; other++) { + if (*other == sig) + continue; + + const exec_node *node_a = sig->parameters.head; + const exec_node *node_b = (*other)->parameters.head; + const exec_node *node_p = actual_parameters->head; + + bool better_for_some_parameter = false; + + for (/* empty */ + ; !node_a->is_tail_sentinel() + ; node_a = node_a->next, + node_b = node_b->next, + node_p = node_p->next) { + parameter_match_t a_match = get_parameter_match_type( + (const ir_variable *)node_a, + (const ir_rvalue *)node_p); + parameter_match_t b_match = get_parameter_match_type( + (const ir_variable *)node_b, + (const ir_rvalue *)node_p); + + if (is_better_parameter_match(a_match, b_match)) + better_for_some_parameter = true; + + if (is_better_parameter_match(b_match, a_match)) + return false; /* B is better for this parameter */ + } + + if (!better_for_some_parameter) + return false; /* A must be better than B for some parameter */ + + } + + return true; +} + + +static ir_function_signature * +choose_best_inexact_overload(_mesa_glsl_parse_state *state, + const exec_list *actual_parameters, + ir_function_signature **matches, + int num_matches) +{ + if (num_matches == 0) + return NULL; + + if (num_matches == 1) + return *matches; + + /* Without GLSL 4.0 / ARB_gpu_shader5, there is no overload resolution + * among multiple inexact matches. Note that state may be NULL here if + * called from the linker; in that case we assume everything supported in + * any GLSL version is available. */ + if (!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable) { + for (ir_function_signature **sig = matches; sig < matches + num_matches; sig++) { + if (is_best_inexact_overload(actual_parameters, matches, num_matches, *sig)) + return *sig; + } + } + + return NULL; /* no best candidate */ +} + + +ir_function_signature * +ir_function::matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_parameters, + bool allow_builtins) +{ + bool is_exact; + return matching_signature(state, actual_parameters, allow_builtins, + &is_exact); +} + +ir_function_signature * +ir_function::matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_parameters, + bool allow_builtins, + bool *is_exact) +{ + ir_function_signature **inexact_matches = NULL; + ir_function_signature **inexact_matches_temp; + ir_function_signature *match = NULL; + int num_inexact_matches = 0; + + /* From page 42 (page 49 of the PDF) of the GLSL 1.20 spec: + * + * "If an exact match is found, the other signatures are ignored, and + * the exact match is used. Otherwise, if no exact match is found, then + * the implicit conversions in Section 4.1.10 "Implicit Conversions" will + * be applied to the calling arguments if this can make their types match + * a signature. In this case, it is a semantic error if there are + * multiple ways to apply these conversions to the actual arguments of a + * call such that the call can be made to match multiple signatures." + */ + foreach_in_list(ir_function_signature, sig, &this->signatures) { + /* Skip over any built-ins that aren't available in this shader. */ + if (sig->is_builtin() && (!allow_builtins || + !sig->is_builtin_available(state))) + continue; + + switch (parameter_lists_match(state, & sig->parameters, actual_parameters)) { + case PARAMETER_LIST_EXACT_MATCH: + *is_exact = true; + free(inexact_matches); + return sig; + case PARAMETER_LIST_INEXACT_MATCH: + inexact_matches_temp = (ir_function_signature **) + realloc(inexact_matches, + sizeof(*inexact_matches) * + (num_inexact_matches + 1)); + if (inexact_matches_temp == NULL) { + _mesa_error_no_memory(__func__); + free(inexact_matches); + return NULL; + } + inexact_matches = inexact_matches_temp; + inexact_matches[num_inexact_matches++] = sig; + continue; + case PARAMETER_LIST_NO_MATCH: + continue; + default: + assert(false); + return NULL; + } + } + + /* There is no exact match (we would have returned it by now). If there + * are multiple inexact matches, the call is ambiguous, which is an error. + * + * FINISHME: Report a decent error. Returning NULL will likely result in + * FINISHME: a "no matching signature" error; it should report that the + * FINISHME: call is ambiguous. But reporting errors from here is hard. + */ + *is_exact = false; + + match = choose_best_inexact_overload(state, actual_parameters, + inexact_matches, num_inexact_matches); + + free(inexact_matches); + return match; +} + + +static bool +parameter_lists_match_exact(const exec_list *list_a, const exec_list *list_b) +{ + const exec_node *node_a = list_a->head; + const exec_node *node_b = list_b->head; + + for (/* empty */ + ; !node_a->is_tail_sentinel() && !node_b->is_tail_sentinel() + ; node_a = node_a->next, node_b = node_b->next) { + ir_variable *a = (ir_variable *) node_a; + ir_variable *b = (ir_variable *) node_b; + + /* If the types of the parameters do not match, the parameters lists + * are different. + */ + if (a->type != b->type) + return false; + } + + /* Unless both lists are exhausted, they differ in length and, by + * definition, do not match. + */ + return (node_a->is_tail_sentinel() == node_b->is_tail_sentinel()); +} + +ir_function_signature * +ir_function::exact_matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_parameters) +{ + foreach_in_list(ir_function_signature, sig, &this->signatures) { + /* Skip over any built-ins that aren't available in this shader. */ + if (sig->is_builtin() && !sig->is_builtin_available(state)) + continue; + + if (parameter_lists_match_exact(&sig->parameters, actual_parameters)) + return sig; + } + return NULL; +} diff --git a/src/compiler/glsl/ir_function_can_inline.cpp b/src/compiler/glsl/ir_function_can_inline.cpp new file mode 100644 index 0000000..3b1d15f --- /dev/null +++ b/src/compiler/glsl/ir_function_can_inline.cpp @@ -0,0 +1,75 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_function_can_inline.cpp + * + * Determines if we can inline a function call using ir_function_inlining.cpp. + * + * The primary restriction is that we can't return from the function other + * than as the last instruction. In lower_jumps.cpp, we can lower return + * statements not at the end of the function to other control flow in order to + * deal with this restriction. + */ + +#include "ir.h" + +class ir_function_can_inline_visitor : public ir_hierarchical_visitor { +public: + ir_function_can_inline_visitor() + { + this->num_returns = 0; + } + + virtual ir_visitor_status visit_enter(ir_return *); + + int num_returns; +}; + +ir_visitor_status +ir_function_can_inline_visitor::visit_enter(ir_return *ir) +{ + (void) ir; + this->num_returns++; + return visit_continue; +} + +bool +can_inline(ir_call *call) +{ + ir_function_can_inline_visitor v; + const ir_function_signature *callee = call->callee; + if (!callee->is_defined) + return false; + + v.run((exec_list *) &callee->body); + + /* If the function is empty (no last instruction) or does not end with a + * return statement, we need to count the implicit return. + */ + ir_instruction *last = (ir_instruction *)callee->body.get_tail(); + if (last == NULL || !last->as_return()) + v.num_returns++; + + return v.num_returns == 1; +} diff --git a/src/compiler/glsl/ir_function_detect_recursion.cpp b/src/compiler/glsl/ir_function_detect_recursion.cpp new file mode 100644 index 0000000..b2334d2 --- /dev/null +++ b/src/compiler/glsl/ir_function_detect_recursion.cpp @@ -0,0 +1,358 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_function_detect_recursion.cpp + * Determine whether a shader contains static recursion. + * + * Consider the (possibly disjoint) graph of function calls in a shader. If a + * program contains recursion, this graph will contain a cycle. If a function + * is part of a cycle, it will have a caller and it will have a callee (it + * calls another function). + * + * To detect recursion, the function call graph is constructed. The graph is + * repeatedly reduced by removing any function that either has no callees + * (leaf functions) or has no caller. Eventually the only functions that + * remain will be the functions in the cycles. + * + * The GLSL spec is a bit wishy-washy about recursion. + * + * From page 39 (page 45 of the PDF) of the GLSL 1.10 spec: + * + * "Behavior is undefined if recursion is used. Recursion means having any + * function appearing more than once at any one time in the run-time stack + * of function calls. That is, a function may not call itself either + * directly or indirectly. Compilers may give diagnostic messages when + * this is detectable at compile time, but not all such cases can be + * detected at compile time." + * + * From page 79 (page 85 of the PDF): + * + * "22) Should recursion be supported? + * + * DISCUSSION: Probably not necessary, but another example of limiting + * the language based on how it would directly map to hardware. One + * thought is that recursion would benefit ray tracing shaders. On the + * other hand, many recursion operations can also be implemented with the + * user managing the recursion through arrays. RenderMan doesn't support + * recursion. This could be added at a later date, if it proved to be + * necessary. + * + * RESOLVED on September 10, 2002: Implementations are not required to + * support recursion. + * + * CLOSED on September 10, 2002." + * + * From page 79 (page 85 of the PDF): + * + * "56) Is it an error for an implementation to support recursion if the + * specification says recursion is not supported? + * + * ADDED on September 10, 2002. + * + * DISCUSSION: This issues is related to Issue (22). If we say that + * recursion (or some other piece of functionality) is not supported, is + * it an error for an implementation to support it? Perhaps the + * specification should remain silent on these kind of things so that they + * could be gracefully added later as an extension or as part of the + * standard. + * + * RESOLUTION: Languages, in general, have programs that are not + * well-formed in ways a compiler cannot detect. Portability is only + * ensured for well-formed programs. Detecting recursion is an example of + * this. The language will say a well-formed program may not recurse, but + * compilers are not forced to detect that recursion may happen. + * + * CLOSED: November 29, 2002." + * + * In GLSL 1.10 the behavior of recursion is undefined. Compilers don't have + * to reject shaders (at compile-time or link-time) that contain recursion. + * Instead they could work, or crash, or kill a kitten. + * + * From page 44 (page 50 of the PDF) of the GLSL 1.20 spec: + * + * "Recursion is not allowed, not even statically. Static recursion is + * present if the static function call graph of the program contains + * cycles." + * + * This langauge clears things up a bit, but it still leaves a lot of + * questions unanswered. + * + * - Is the error generated at compile-time or link-time? + * + * - Is it an error to have a recursive function that is never statically + * called by main or any function called directly or indirectly by main? + * Technically speaking, such a function is not in the "static function + * call graph of the program" at all. + * + * \bug + * If a shader has multiple cycles, this algorithm may erroneously complain + * about functions that aren't in any cycle, but are in the part of the call + * tree that connects them. For example, if the call graph consists of a + * cycle between A and B, and a cycle between D and E, and B also calls C + * which calls D, then this algorithm will report C as a function which "has + * static recursion" even though it is not part of any cycle. + * + * A better algorithm for cycle detection that doesn't have this drawback can + * be found here: + * + * http://en.wikipedia.org/wiki/Tarjan%E2%80%99s_strongly_connected_components_algorithm + * + * \author Ian Romanick <ian.d.romanick@intel.com> + */ +#include "main/core.h" +#include "ir.h" +#include "glsl_parser_extras.h" +#include "linker.h" +#include "program/hash_table.h" +#include "program.h" + +namespace { + +struct call_node : public exec_node { + class function *func; +}; + +class function { +public: + function(ir_function_signature *sig) + : sig(sig) + { + /* empty */ + } + + DECLARE_RALLOC_CXX_OPERATORS(function) + + ir_function_signature *sig; + + /** List of functions called by this function. */ + exec_list callees; + + /** List of functions that call this function. */ + exec_list callers; +}; + +class has_recursion_visitor : public ir_hierarchical_visitor { +public: + has_recursion_visitor() + : current(NULL) + { + progress = false; + this->mem_ctx = ralloc_context(NULL); + this->function_hash = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~has_recursion_visitor() + { + hash_table_dtor(this->function_hash); + ralloc_free(this->mem_ctx); + } + + function *get_function(ir_function_signature *sig) + { + function *f = (function *) hash_table_find(this->function_hash, sig); + if (f == NULL) { + f = new(mem_ctx) function(sig); + hash_table_insert(this->function_hash, f, sig); + } + + return f; + } + + virtual ir_visitor_status visit_enter(ir_function_signature *sig) + { + this->current = this->get_function(sig); + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_function_signature *sig) + { + (void) sig; + this->current = NULL; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_call *call) + { + /* At global scope this->current will be NULL. Since there is no way to + * call global scope, it can never be part of a cycle. Don't bother + * adding calls from global scope to the graph. + */ + if (this->current == NULL) + return visit_continue; + + function *const target = this->get_function(call->callee); + + /* Create a link from the caller to the callee. + */ + call_node *node = new(mem_ctx) call_node; + node->func = target; + this->current->callees.push_tail(node); + + /* Create a link from the callee to the caller. + */ + node = new(mem_ctx) call_node; + node->func = this->current; + target->callers.push_tail(node); + return visit_continue; + } + + function *current; + struct hash_table *function_hash; + void *mem_ctx; + bool progress; +}; + +} /* anonymous namespace */ + +static void +destroy_links(exec_list *list, function *f) +{ + foreach_in_list_safe(call_node, node, list) { + /* If this is the right function, remove it. Note that the loop cannot + * terminate now. There can be multiple links to a function if it is + * either called multiple times or calls multiple times. + */ + if (node->func == f) + node->remove(); + } +} + + +/** + * Remove a function if it has either no in or no out links + */ +static void +remove_unlinked_functions(const void *key, void *data, void *closure) +{ + has_recursion_visitor *visitor = (has_recursion_visitor *) closure; + function *f = (function *) data; + + if (f->callers.is_empty() || f->callees.is_empty()) { + while (!f->callers.is_empty()) { + struct call_node *n = (struct call_node *) f->callers.pop_head(); + destroy_links(& n->func->callees, f); + } + + while (!f->callees.is_empty()) { + struct call_node *n = (struct call_node *) f->callees.pop_head(); + destroy_links(& n->func->callers, f); + } + + hash_table_remove(visitor->function_hash, key); + visitor->progress = true; + } +} + + +static void +emit_errors_unlinked(const void *key, void *data, void *closure) +{ + struct _mesa_glsl_parse_state *state = + (struct _mesa_glsl_parse_state *) closure; + function *f = (function *) data; + YYLTYPE loc; + + (void) key; + + char *proto = prototype_string(f->sig->return_type, + f->sig->function_name(), + &f->sig->parameters); + + memset(&loc, 0, sizeof(loc)); + _mesa_glsl_error(&loc, state, + "function `%s' has static recursion", + proto); + ralloc_free(proto); +} + + +static void +emit_errors_linked(const void *key, void *data, void *closure) +{ + struct gl_shader_program *prog = + (struct gl_shader_program *) closure; + function *f = (function *) data; + + (void) key; + + char *proto = prototype_string(f->sig->return_type, + f->sig->function_name(), + &f->sig->parameters); + + linker_error(prog, "function `%s' has static recursion.\n", proto); + ralloc_free(proto); +} + + +void +detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, + exec_list *instructions) +{ + has_recursion_visitor v; + + /* Collect all of the information about which functions call which other + * functions. + */ + v.run(instructions); + + /* Remove from the set all of the functions that either have no caller or + * call no other functions. Repeat until no functions are removed. + */ + do { + v.progress = false; + hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v); + } while (v.progress); + + + /* At this point any functions still in the hash must be part of a cycle. + */ + hash_table_call_foreach(v.function_hash, emit_errors_unlinked, state); +} + + +void +detect_recursion_linked(struct gl_shader_program *prog, + exec_list *instructions) +{ + has_recursion_visitor v; + + /* Collect all of the information about which functions call which other + * functions. + */ + v.run(instructions); + + /* Remove from the set all of the functions that either have no caller or + * call no other functions. Repeat until no functions are removed. + */ + do { + v.progress = false; + hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v); + } while (v.progress); + + + /* At this point any functions still in the hash must be part of a cycle. + */ + hash_table_call_foreach(v.function_hash, emit_errors_linked, prog); +} diff --git a/src/compiler/glsl/ir_function_inlining.h b/src/compiler/glsl/ir_function_inlining.h new file mode 100644 index 0000000..6db011b --- /dev/null +++ b/src/compiler/glsl/ir_function_inlining.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_function_inlining.h + * + * Replaces calls to functions with the body of the function. + */ + +bool can_inline(ir_call *call); diff --git a/src/compiler/glsl/ir_hierarchical_visitor.cpp b/src/compiler/glsl/ir_hierarchical_visitor.cpp new file mode 100644 index 0000000..1d23a77 --- /dev/null +++ b/src/compiler/glsl/ir_hierarchical_visitor.cpp @@ -0,0 +1,383 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_hierarchical_visitor.h" + +ir_hierarchical_visitor::ir_hierarchical_visitor() +{ + this->base_ir = NULL; + this->callback_enter = NULL; + this->callback_leave = NULL; + this->data_enter = NULL; + this->data_leave = NULL; + this->in_assignee = false; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_rvalue *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_variable *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_constant *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_loop_jump *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_dereference_variable *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_barrier *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_loop *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_loop *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_function_signature *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_function_signature *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_function *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_function *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_expression *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_expression *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_texture *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_texture *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_swizzle *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_swizzle *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_dereference_array *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_dereference_array *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_dereference_record *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_dereference_record *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_assignment *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_assignment *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_call *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_call *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_return *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_return *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_discard *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_discard *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_if *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_if *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_emit_vertex *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_emit_vertex *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_end_primitive *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_end_primitive *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +void +ir_hierarchical_visitor::run(exec_list *instructions) +{ + visit_list_elements(this, instructions); +} + + +void +visit_tree(ir_instruction *ir, + void (*callback_enter)(class ir_instruction *ir, void *data), + void *data_enter, + void (*callback_leave)(class ir_instruction *ir, void *data), + void *data_leave) +{ + ir_hierarchical_visitor v; + + v.callback_enter = callback_enter; + v.callback_leave = callback_leave; + v.data_enter = data_enter; + v.data_leave = data_leave; + + ir->accept(&v); +} diff --git a/src/compiler/glsl/ir_hierarchical_visitor.h b/src/compiler/glsl/ir_hierarchical_visitor.h new file mode 100644 index 0000000..28517b6 --- /dev/null +++ b/src/compiler/glsl/ir_hierarchical_visitor.h @@ -0,0 +1,209 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_HIERARCHICAL_VISITOR_H +#define IR_HIERARCHICAL_VISITOR_H + +/** + * Enumeration values returned by visit methods to guide processing + */ +enum ir_visitor_status { + visit_continue, /**< Continue visiting as normal. */ + visit_continue_with_parent, /**< Don't visit siblings, continue w/parent. */ + visit_stop /**< Stop visiting immediately. */ +}; + + +#ifdef __cplusplus +/** + * Base class of hierarchical visitors of IR instruction trees + * + * Hierarchical visitors differ from traditional visitors in a couple of + * important ways. Rather than having a single \c visit method for each + * subclass in the composite, there are three kinds of visit methods. + * Leaf-node classes have a traditional \c visit method. Internal-node + * classes have a \c visit_enter method, which is invoked just before + * processing child nodes, and a \c visit_leave method which is invoked just + * after processing child nodes. + * + * In addition, each visit method and the \c accept methods in the composite + * have a return value which guides the navigation. Any of the visit methods + * can choose to continue visiting the tree as normal (by returning \c + * visit_continue), terminate visiting any further nodes immediately (by + * returning \c visit_stop), or stop visiting sibling nodes (by returning \c + * visit_continue_with_parent). + * + * These two changes combine to allow nagivation of children to be implemented + * in the composite's \c accept method. The \c accept method for a leaf-node + * class will simply call the \c visit method, as usual, and pass its return + * value on. The \c accept method for internal-node classes will call the \c + * visit_enter method, call the \c accept method of each child node, and, + * finally, call the \c visit_leave method. If any of these return a value + * other that \c visit_continue, the correct action must be taken. + * + * The final benefit is that the hierarchical visitor base class need not be + * abstract. Default implementations of every \c visit, \c visit_enter, and + * \c visit_leave method can be provided. By default each of these methods + * simply returns \c visit_continue. This allows a significant reduction in + * derived class code. + * + * For more information about hierarchical visitors, see: + * + * http://c2.com/cgi/wiki?HierarchicalVisitorPattern + * http://c2.com/cgi/wiki?HierarchicalVisitorDiscussion + */ + +class ir_hierarchical_visitor { +public: + ir_hierarchical_visitor(); + + /** + * \name Visit methods for leaf-node classes + */ + /*@{*/ + virtual ir_visitor_status visit(class ir_rvalue *); + virtual ir_visitor_status visit(class ir_variable *); + virtual ir_visitor_status visit(class ir_constant *); + virtual ir_visitor_status visit(class ir_loop_jump *); + virtual ir_visitor_status visit(class ir_barrier *); + + /** + * ir_dereference_variable isn't technically a leaf, but it is treated as a + * leaf here for a couple reasons. By not automatically visiting the one + * child ir_variable node from the ir_dereference_variable, ir_variable + * nodes can always be handled as variable declarations. Code that used + * non-hierarchical visitors had to set an "in a dereference" flag to + * determine how to handle an ir_variable. By forcing the visitor to + * handle the ir_variable within the ir_dereference_variable visitor, this + * kludge can be avoided. + * + * In addition, I can envision no use for having separate enter and leave + * methods. Anything that could be done in the enter and leave methods + * that couldn't just be done in the visit method. + */ + virtual ir_visitor_status visit(class ir_dereference_variable *); + /*@}*/ + + /** + * \name Visit methods for internal-node classes + */ + /*@{*/ + virtual ir_visitor_status visit_enter(class ir_loop *); + virtual ir_visitor_status visit_leave(class ir_loop *); + virtual ir_visitor_status visit_enter(class ir_function_signature *); + virtual ir_visitor_status visit_leave(class ir_function_signature *); + virtual ir_visitor_status visit_enter(class ir_function *); + virtual ir_visitor_status visit_leave(class ir_function *); + virtual ir_visitor_status visit_enter(class ir_expression *); + virtual ir_visitor_status visit_leave(class ir_expression *); + virtual ir_visitor_status visit_enter(class ir_texture *); + virtual ir_visitor_status visit_leave(class ir_texture *); + virtual ir_visitor_status visit_enter(class ir_swizzle *); + virtual ir_visitor_status visit_leave(class ir_swizzle *); + virtual ir_visitor_status visit_enter(class ir_dereference_array *); + virtual ir_visitor_status visit_leave(class ir_dereference_array *); + virtual ir_visitor_status visit_enter(class ir_dereference_record *); + virtual ir_visitor_status visit_leave(class ir_dereference_record *); + virtual ir_visitor_status visit_enter(class ir_assignment *); + virtual ir_visitor_status visit_leave(class ir_assignment *); + virtual ir_visitor_status visit_enter(class ir_call *); + virtual ir_visitor_status visit_leave(class ir_call *); + virtual ir_visitor_status visit_enter(class ir_return *); + virtual ir_visitor_status visit_leave(class ir_return *); + virtual ir_visitor_status visit_enter(class ir_discard *); + virtual ir_visitor_status visit_leave(class ir_discard *); + virtual ir_visitor_status visit_enter(class ir_if *); + virtual ir_visitor_status visit_leave(class ir_if *); + virtual ir_visitor_status visit_enter(class ir_emit_vertex *); + virtual ir_visitor_status visit_leave(class ir_emit_vertex *); + virtual ir_visitor_status visit_enter(class ir_end_primitive *); + virtual ir_visitor_status visit_leave(class ir_end_primitive *); + /*@}*/ + + + /** + * Utility function to process a linked list of instructions with a visitor + */ + void run(struct exec_list *instructions); + + /* Some visitors may need to insert new variable declarations and + * assignments for portions of a subtree, which means they need a + * pointer to the current instruction in the stream, not just their + * node in the tree rooted at that instruction. + * + * This is implemented by visit_list_elements -- if the visitor is + * not called by it, nothing good will happen. + */ + class ir_instruction *base_ir; + + /** + * Callback function that is invoked on entry to each node visited. + * + * \warning + * Visitor classes derived from \c ir_hierarchical_visitor \b may \b not + * invoke this function. This can be used, for example, to cause the + * callback to be invoked on every node type except one. + */ + void (*callback_enter)(class ir_instruction *ir, void *data); + + /** + * Callback function that is invoked on exit of each node visited. + * + * \warning + * Visitor classes derived from \c ir_hierarchical_visitor \b may \b not + * invoke this function. This can be used, for example, to cause the + * callback to be invoked on every node type except one. + */ + void (*callback_leave)(class ir_instruction *ir, void *data); + + /** + * Extra data parameter passed to the per-node callback_enter function + */ + void *data_enter; + + /** + * Extra data parameter passed to the per-node callback_leave function + */ + void *data_leave; + + /** + * Currently in the LHS of an assignment? + * + * This is set and cleared by the \c ir_assignment::accept method. + */ + bool in_assignee; +}; + +void visit_tree(ir_instruction *ir, + void (*callback_enter)(class ir_instruction *ir, void *data), + void *data_enter, + void (*callback_leave)(class ir_instruction *ir, void *data) = NULL, + void *data_leave = NULL); + +ir_visitor_status visit_list_elements(ir_hierarchical_visitor *v, exec_list *l, + bool statement_list = true); +#endif /* __cplusplus */ + +#endif /* IR_HIERARCHICAL_VISITOR_H */ diff --git a/src/compiler/glsl/ir_hv_accept.cpp b/src/compiler/glsl/ir_hv_accept.cpp new file mode 100644 index 0000000..213992a --- /dev/null +++ b/src/compiler/glsl/ir_hv_accept.cpp @@ -0,0 +1,439 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" + +/** + * \file ir_hv_accept.cpp + * Implementations of all hierarchical visitor accept methods for IR + * instructions. + */ + +/** + * Process a list of nodes using a hierarchical vistor. + * + * If statement_list is true (the default), this is a list of statements, so + * v->base_ir will be set to point to each statement just before iterating + * over it, and restored after iteration is complete. If statement_list is + * false, this is a list that appears inside a statement (e.g. a parameter + * list), so v->base_ir will be left alone. + * + * \warning + * This function will operate correctly if a node being processed is removed + * from the list. However, if nodes are added to the list after the node being + * processed, some of the added nodes may not be processed. + */ +ir_visitor_status +visit_list_elements(ir_hierarchical_visitor *v, exec_list *l, + bool statement_list) +{ + ir_instruction *prev_base_ir = v->base_ir; + + foreach_in_list_safe(ir_instruction, ir, l) { + if (statement_list) + v->base_ir = ir; + ir_visitor_status s = ir->accept(v); + + if (s != visit_continue) + return s; + } + if (statement_list) + v->base_ir = prev_base_ir; + + return visit_continue; +} + + +ir_visitor_status +ir_rvalue::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} + + +ir_visitor_status +ir_variable::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} + + +ir_visitor_status +ir_loop::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = visit_list_elements(v, &this->body_instructions); + if (s == visit_stop) + return s; + + return v->visit_leave(this); +} + + +ir_visitor_status +ir_loop_jump::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} + + +ir_visitor_status +ir_function_signature::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = visit_list_elements(v, &this->parameters); + if (s == visit_stop) + return s; + + s = visit_list_elements(v, &this->body); + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_function::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = visit_list_elements(v, &this->signatures, false); + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_expression::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + for (unsigned i = 0; i < this->get_num_operands(); i++) { + switch (this->operands[i]->accept(v)) { + case visit_continue: + break; + + case visit_continue_with_parent: + // I wish for Java's labeled break-statement here. + goto done; + + case visit_stop: + return s; + } + } + +done: + return v->visit_leave(this); +} + +ir_visitor_status +ir_texture::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->sampler->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + if (this->coordinate) { + s = this->coordinate->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + if (this->projector) { + s = this->projector->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + if (this->shadow_comparitor) { + s = this->shadow_comparitor->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + if (this->offset) { + s = this->offset->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + switch (this->op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + s = this->lod_info.bias->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + break; + case ir_txl: + case ir_txf: + case ir_txs: + s = this->lod_info.lod->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + break; + case ir_txf_ms: + s = this->lod_info.sample_index->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + break; + case ir_txd: + s = this->lod_info.grad.dPdx->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->lod_info.grad.dPdy->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + break; + case ir_tg4: + s = this->lod_info.component->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + break; + } + + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_swizzle::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->val->accept(v); + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_dereference_variable::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} + + +ir_visitor_status +ir_dereference_array::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + /* The array index is not the target of the assignment, so clear the + * 'in_assignee' flag. Restore it after returning from the array index. + */ + const bool was_in_assignee = v->in_assignee; + v->in_assignee = false; + s = this->array_index->accept(v); + v->in_assignee = was_in_assignee; + + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->array->accept(v); + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_dereference_record::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->record->accept(v); + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_assignment::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + v->in_assignee = true; + s = this->lhs->accept(v); + v->in_assignee = false; + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->rhs->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + if (this->condition) + s = this->condition->accept(v); + + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_constant::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} + + +ir_visitor_status +ir_call::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + if (this->return_deref != NULL) { + v->in_assignee = true; + s = this->return_deref->accept(v); + v->in_assignee = false; + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + s = visit_list_elements(v, &this->actual_parameters, false); + if (s == visit_stop) + return s; + + return v->visit_leave(this); +} + + +ir_visitor_status +ir_return::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + ir_rvalue *val = this->get_value(); + if (val) { + s = val->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + return v->visit_leave(this); +} + + +ir_visitor_status +ir_discard::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + if (this->condition != NULL) { + s = this->condition->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + return v->visit_leave(this); +} + + +ir_visitor_status +ir_if::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->condition->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + if (s != visit_continue_with_parent) { + s = visit_list_elements(v, &this->then_instructions); + if (s == visit_stop) + return s; + } + + if (s != visit_continue_with_parent) { + s = visit_list_elements(v, &this->else_instructions); + if (s == visit_stop) + return s; + } + + return v->visit_leave(this); +} + +ir_visitor_status +ir_emit_vertex::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->stream->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_end_primitive::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->stream->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + return (s == visit_stop) ? s : v->visit_leave(this); +} + +ir_visitor_status +ir_barrier::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} diff --git a/src/compiler/glsl/ir_import_prototypes.cpp b/src/compiler/glsl/ir_import_prototypes.cpp new file mode 100644 index 0000000..b0429fb --- /dev/null +++ b/src/compiler/glsl/ir_import_prototypes.cpp @@ -0,0 +1,125 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_import_prototypes.cpp + * Import function prototypes from one IR tree into another. + * + * \author Ian Romanick + */ +#include "ir.h" +#include "glsl_symbol_table.h" + +namespace { + +/** + * Visitor used to import function prototypes + * + * Normally the \c clone method of either \c ir_function or + * \c ir_function_signature could be used. However, we don't want a complete + * clone of the \c ir_function_signature. We want everything \b except the + * body of the function. + */ +class import_prototype_visitor : public ir_hierarchical_visitor { +public: + /** + */ + import_prototype_visitor(exec_list *list, glsl_symbol_table *symbols, + void *mem_ctx) + { + this->mem_ctx = mem_ctx; + this->list = list; + this->symbols = symbols; + this->function = NULL; + } + + virtual ir_visitor_status visit_enter(ir_function *ir) + { + assert(this->function == NULL); + + this->function = this->symbols->get_function(ir->name); + if (!this->function) { + this->function = new(this->mem_ctx) ir_function(ir->name); + + list->push_tail(this->function); + + /* Add the new function to the symbol table. + */ + this->symbols->add_function(this->function); + } + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_function *ir) + { + (void) ir; + assert(this->function != NULL); + + this->function = NULL; + return visit_continue; + } + + ir_visitor_status visit_enter(ir_function_signature *ir) + { + assert(this->function != NULL); + + ir_function_signature *copy = ir->clone_prototype(mem_ctx, NULL); + + this->function->add_signature(copy); + + /* Do not process child nodes of the ir_function_signature. There can + * never be any nodes inside the ir_function_signature that we care + * about. Instead continue with the next sibling. + */ + return visit_continue_with_parent; + } + +private: + exec_list *list; + ir_function *function; + glsl_symbol_table *symbols; + void *mem_ctx; +}; + +} /* anonymous namespace */ + +/** + * Import function prototypes from one IR tree into another + * + * \param source Source instruction stream containing functions whose + * prototypes are to be imported + * \param dest Destination instruction stream where new \c ir_function and + * \c ir_function_signature nodes will be stored + * \param symbols Symbol table where new functions will be stored + * \param mem_ctx ralloc memory context used for new allocations + */ +void +import_prototypes(const exec_list *source, exec_list *dest, + glsl_symbol_table *symbols, void *mem_ctx) +{ + import_prototype_visitor v(dest, symbols, mem_ctx); + + /* Making source be const is just extra documentation. + */ + v.run(const_cast<exec_list *>(source)); +} diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h new file mode 100644 index 0000000..be86f54 --- /dev/null +++ b/src/compiler/glsl/ir_optimization.h @@ -0,0 +1,147 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +/** + * \file ir_optimization.h + * + * Prototypes for optimization passes to be called by the compiler and drivers. + */ + +/* Operations for lower_instructions() */ +#define SUB_TO_ADD_NEG 0x01 +#define DIV_TO_MUL_RCP 0x02 +#define EXP_TO_EXP2 0x04 +#define POW_TO_EXP2 0x08 +#define LOG_TO_LOG2 0x10 +#define MOD_TO_FLOOR 0x20 +#define INT_DIV_TO_MUL_RCP 0x40 +#define LDEXP_TO_ARITH 0x80 +#define CARRY_TO_ARITH 0x100 +#define BORROW_TO_ARITH 0x200 +#define SAT_TO_CLAMP 0x400 +#define DOPS_TO_DFRAC 0x800 +#define DFREXP_DLDEXP_TO_ARITH 0x1000 + +/** + * \see class lower_packing_builtins_visitor + */ +enum lower_packing_builtins_op { + LOWER_PACK_UNPACK_NONE = 0x0000, + + LOWER_PACK_SNORM_2x16 = 0x0001, + LOWER_UNPACK_SNORM_2x16 = 0x0002, + + LOWER_PACK_UNORM_2x16 = 0x0004, + LOWER_UNPACK_UNORM_2x16 = 0x0008, + + LOWER_PACK_HALF_2x16 = 0x0010, + LOWER_UNPACK_HALF_2x16 = 0x0020, + + LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040, + LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080, + + LOWER_PACK_SNORM_4x8 = 0x0100, + LOWER_UNPACK_SNORM_4x8 = 0x0200, + + LOWER_PACK_UNORM_4x8 = 0x0400, + LOWER_UNPACK_UNORM_4x8 = 0x0800, + + LOWER_PACK_USE_BFI = 0x1000, + LOWER_PACK_USE_BFE = 0x2000, +}; + +bool do_common_optimization(exec_list *ir, bool linked, + bool uniform_locations_assigned, + const struct gl_shader_compiler_options *options, + bool native_integers); + +bool do_rebalance_tree(exec_list *instructions); +bool do_algebraic(exec_list *instructions, bool native_integers, + const struct gl_shader_compiler_options *options); +bool opt_conditional_discard(exec_list *instructions); +bool do_constant_folding(exec_list *instructions); +bool do_constant_variable(exec_list *instructions); +bool do_constant_variable_unlinked(exec_list *instructions); +bool do_copy_propagation(exec_list *instructions); +bool do_copy_propagation_elements(exec_list *instructions); +bool do_constant_propagation(exec_list *instructions); +void do_dead_builtin_varyings(struct gl_context *ctx, + gl_shader *producer, gl_shader *consumer, + unsigned num_tfeedback_decls, + class tfeedback_decl *tfeedback_decls); +bool do_dead_code(exec_list *instructions, bool uniform_locations_assigned); +bool do_dead_code_local(exec_list *instructions); +bool do_dead_code_unlinked(exec_list *instructions); +bool do_dead_functions(exec_list *instructions); +bool opt_flip_matrices(exec_list *instructions); +bool do_function_inlining(exec_list *instructions); +bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false); +bool do_lower_texture_projection(exec_list *instructions); +bool do_if_simplification(exec_list *instructions); +bool opt_flatten_nested_if_blocks(exec_list *instructions); +bool do_discard_simplification(exec_list *instructions); +bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0); +bool do_mat_op_to_vec(exec_list *instructions); +bool do_minmax_prune(exec_list *instructions); +bool do_noop_swizzle(exec_list *instructions); +bool do_structure_splitting(exec_list *instructions); +bool do_swizzle_swizzle(exec_list *instructions); +bool do_vectorize(exec_list *instructions); +bool do_tree_grafting(exec_list *instructions); +bool do_vec_index_to_cond_assign(exec_list *instructions); +bool do_vec_index_to_swizzle(exec_list *instructions); +bool lower_discard(exec_list *instructions); +void lower_discard_flow(exec_list *instructions); +bool lower_instructions(exec_list *instructions, unsigned what_to_lower); +bool lower_noise(exec_list *instructions); +bool lower_variable_index_to_cond_assign(gl_shader_stage stage, + exec_list *instructions, bool lower_input, bool lower_output, + bool lower_temp, bool lower_uniform); +bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); +bool lower_const_arrays_to_uniforms(exec_list *instructions); +bool lower_clip_distance(gl_shader *shader); +void lower_output_reads(unsigned stage, exec_list *instructions); +bool lower_packing_builtins(exec_list *instructions, int op_mask); +void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size); +void lower_ubo_reference(struct gl_shader *shader); +void lower_packed_varyings(void *mem_ctx, + unsigned locations_used, ir_variable_mode mode, + unsigned gs_input_vertices, gl_shader *shader); +bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index); +bool lower_vector_derefs(gl_shader *shader); +void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader); +bool optimize_redundant_jumps(exec_list *instructions); +bool optimize_split_arrays(exec_list *instructions, bool linked); +bool lower_offset_arrays(exec_list *instructions); +void optimize_dead_builtin_variables(exec_list *instructions, + enum ir_variable_mode other); +bool lower_tess_level(gl_shader *shader); + +bool lower_vertex_id(gl_shader *shader); + +bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state); + +ir_rvalue * +compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, void *mem_ctx); diff --git a/src/compiler/glsl/ir_print_visitor.cpp b/src/compiler/glsl/ir_print_visitor.cpp new file mode 100644 index 0000000..960b23f --- /dev/null +++ b/src/compiler/glsl/ir_print_visitor.cpp @@ -0,0 +1,604 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir_print_visitor.h" +#include "compiler/glsl_types.h" +#include "glsl_parser_extras.h" +#include "main/macros.h" +#include "util/hash_table.h" + +static void print_type(FILE *f, const glsl_type *t); + +void +ir_instruction::print(void) const +{ + this->fprint(stdout); +} + +void +ir_instruction::fprint(FILE *f) const +{ + ir_instruction *deconsted = const_cast<ir_instruction *>(this); + + ir_print_visitor v(f); + deconsted->accept(&v); +} + +extern "C" { +void +_mesa_print_ir(FILE *f, exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + if (state) { + for (unsigned i = 0; i < state->num_user_structures; i++) { + const glsl_type *const s = state->user_structures[i]; + + fprintf(f, "(structure (%s) (%s@%p) (%u) (\n", + s->name, s->name, (void *) s, s->length); + + for (unsigned j = 0; j < s->length; j++) { + fprintf(f, "\t(("); + print_type(f, s->fields.structure[j].type); + fprintf(f, ")(%s))\n", s->fields.structure[j].name); + } + + fprintf(f, ")\n"); + } + } + + fprintf(f, "(\n"); + foreach_in_list(ir_instruction, ir, instructions) { + ir->fprint(f); + if (ir->ir_type != ir_type_function) + fprintf(f, "\n"); + } + fprintf(f, ")\n"); +} + +void +fprint_ir(FILE *f, const void *instruction) +{ + const ir_instruction *ir = (const ir_instruction *)instruction; + ir->fprint(f); +} + +} /* extern "C" */ + +ir_print_visitor::ir_print_visitor(FILE *f) + : f(f) +{ + indentation = 0; + printable_names = + _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + symbols = _mesa_symbol_table_ctor(); + mem_ctx = ralloc_context(NULL); +} + +ir_print_visitor::~ir_print_visitor() +{ + _mesa_hash_table_destroy(printable_names, NULL); + _mesa_symbol_table_dtor(symbols); + ralloc_free(mem_ctx); +} + +void ir_print_visitor::indent(void) +{ + for (int i = 0; i < indentation; i++) + fprintf(f, " "); +} + +const char * +ir_print_visitor::unique_name(ir_variable *var) +{ + /* var->name can be NULL in function prototypes when a type is given for a + * parameter but no name is given. In that case, just return an empty + * string. Don't worry about tracking the generated name in the printable + * names hash because this is the only scope where it can ever appear. + */ + if (var->name == NULL) { + static unsigned arg = 1; + return ralloc_asprintf(this->mem_ctx, "parameter@%u", arg++); + } + + /* Do we already have a name for this variable? */ + struct hash_entry * entry = + _mesa_hash_table_search(this->printable_names, var); + + if (entry != NULL) { + return (const char *) entry->data; + } + + /* If there's no conflict, just use the original name */ + const char* name = NULL; + if (_mesa_symbol_table_find_symbol(this->symbols, -1, var->name) == NULL) { + name = var->name; + } else { + static unsigned i = 1; + name = ralloc_asprintf(this->mem_ctx, "%s@%u", var->name, ++i); + } + _mesa_hash_table_insert(this->printable_names, var, (void *) name); + _mesa_symbol_table_add_symbol(this->symbols, -1, name, var); + return name; +} + +static void +print_type(FILE *f, const glsl_type *t) +{ + if (t->base_type == GLSL_TYPE_ARRAY) { + fprintf(f, "(array "); + print_type(f, t->fields.array); + fprintf(f, " %u)", t->length); + } else if ((t->base_type == GLSL_TYPE_STRUCT) + && !is_gl_identifier(t->name)) { + fprintf(f, "%s@%p", t->name, (void *) t); + } else { + fprintf(f, "%s", t->name); + } +} + +void ir_print_visitor::visit(ir_rvalue *) +{ + fprintf(f, "error"); +} + +void ir_print_visitor::visit(ir_variable *ir) +{ + fprintf(f, "(declare "); + + char loc[256] = {0}; + if (ir->data.location != -1) + snprintf(loc, sizeof(loc), "location=%i ", ir->data.location); + + const char *const cent = (ir->data.centroid) ? "centroid " : ""; + const char *const samp = (ir->data.sample) ? "sample " : ""; + const char *const patc = (ir->data.patch) ? "patch " : ""; + const char *const inv = (ir->data.invariant) ? "invariant " : ""; + const char *const mode[] = { "", "uniform ", "shader_storage ", + "shader_shared ", "shader_in ", "shader_out ", + "in ", "out ", "inout ", + "const_in ", "sys ", "temporary " }; + STATIC_ASSERT(ARRAY_SIZE(mode) == ir_var_mode_count); + const char *const stream [] = {"", "stream1 ", "stream2 ", "stream3 "}; + const char *const interp[] = { "", "smooth", "flat", "noperspective" }; + STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_QUALIFIER_COUNT); + + fprintf(f, "(%s%s%s%s%s%s%s%s) ", + loc, cent, samp, patc, inv, mode[ir->data.mode], + stream[ir->data.stream], + interp[ir->data.interpolation]); + + print_type(f, ir->type); + fprintf(f, " %s)", unique_name(ir)); +} + + +void ir_print_visitor::visit(ir_function_signature *ir) +{ + _mesa_symbol_table_push_scope(symbols); + fprintf(f, "(signature "); + indentation++; + + print_type(f, ir->return_type); + fprintf(f, "\n"); + indent(); + + fprintf(f, "(parameters\n"); + indentation++; + + foreach_in_list(ir_variable, inst, &ir->parameters) { + indent(); + inst->accept(this); + fprintf(f, "\n"); + } + indentation--; + + indent(); + fprintf(f, ")\n"); + + indent(); + + fprintf(f, "(\n"); + indentation++; + + foreach_in_list(ir_instruction, inst, &ir->body) { + indent(); + inst->accept(this); + fprintf(f, "\n"); + } + indentation--; + indent(); + fprintf(f, "))\n"); + indentation--; + _mesa_symbol_table_pop_scope(symbols); +} + + +void ir_print_visitor::visit(ir_function *ir) +{ + fprintf(f, "(%s function %s\n", ir->is_subroutine ? "subroutine" : "", ir->name); + indentation++; + foreach_in_list(ir_function_signature, sig, &ir->signatures) { + indent(); + sig->accept(this); + fprintf(f, "\n"); + } + indentation--; + indent(); + fprintf(f, ")\n\n"); +} + + +void ir_print_visitor::visit(ir_expression *ir) +{ + fprintf(f, "(expression "); + + print_type(f, ir->type); + + fprintf(f, " %s ", ir->operator_string()); + + for (unsigned i = 0; i < ir->get_num_operands(); i++) { + ir->operands[i]->accept(this); + } + + fprintf(f, ") "); +} + + +void ir_print_visitor::visit(ir_texture *ir) +{ + fprintf(f, "(%s ", ir->opcode_string()); + + if (ir->op == ir_samples_identical) { + ir->sampler->accept(this); + fprintf(f, " "); + ir->coordinate->accept(this); + fprintf(f, ")"); + return; + } + + print_type(f, ir->type); + fprintf(f, " "); + + ir->sampler->accept(this); + fprintf(f, " "); + + if (ir->op != ir_txs && ir->op != ir_query_levels && + ir->op != ir_texture_samples) { + ir->coordinate->accept(this); + + fprintf(f, " "); + + if (ir->offset != NULL) { + ir->offset->accept(this); + } else { + fprintf(f, "0"); + } + + fprintf(f, " "); + } + + if (ir->op != ir_txf && ir->op != ir_txf_ms && + ir->op != ir_txs && ir->op != ir_tg4 && + ir->op != ir_query_levels && ir->op != ir_texture_samples) { + if (ir->projector) + ir->projector->accept(this); + else + fprintf(f, "1"); + + if (ir->shadow_comparitor) { + fprintf(f, " "); + ir->shadow_comparitor->accept(this); + } else { + fprintf(f, " ()"); + } + } + + fprintf(f, " "); + switch (ir->op) + { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + break; + case ir_txb: + ir->lod_info.bias->accept(this); + break; + case ir_txl: + case ir_txf: + case ir_txs: + ir->lod_info.lod->accept(this); + break; + case ir_txf_ms: + ir->lod_info.sample_index->accept(this); + break; + case ir_txd: + fprintf(f, "("); + ir->lod_info.grad.dPdx->accept(this); + fprintf(f, " "); + ir->lod_info.grad.dPdy->accept(this); + fprintf(f, ")"); + break; + case ir_tg4: + ir->lod_info.component->accept(this); + break; + case ir_samples_identical: + unreachable(!"ir_samples_identical was already handled"); + }; + fprintf(f, ")"); +} + + +void ir_print_visitor::visit(ir_swizzle *ir) +{ + const unsigned swiz[4] = { + ir->mask.x, + ir->mask.y, + ir->mask.z, + ir->mask.w, + }; + + fprintf(f, "(swiz "); + for (unsigned i = 0; i < ir->mask.num_components; i++) { + fprintf(f, "%c", "xyzw"[swiz[i]]); + } + fprintf(f, " "); + ir->val->accept(this); + fprintf(f, ")"); +} + + +void ir_print_visitor::visit(ir_dereference_variable *ir) +{ + ir_variable *var = ir->variable_referenced(); + fprintf(f, "(var_ref %s) ", unique_name(var)); +} + + +void ir_print_visitor::visit(ir_dereference_array *ir) +{ + fprintf(f, "(array_ref "); + ir->array->accept(this); + ir->array_index->accept(this); + fprintf(f, ") "); +} + + +void ir_print_visitor::visit(ir_dereference_record *ir) +{ + fprintf(f, "(record_ref "); + ir->record->accept(this); + fprintf(f, " %s) ", ir->field); +} + + +void ir_print_visitor::visit(ir_assignment *ir) +{ + fprintf(f, "(assign "); + + if (ir->condition) + ir->condition->accept(this); + + char mask[5]; + unsigned j = 0; + + for (unsigned i = 0; i < 4; i++) { + if ((ir->write_mask & (1 << i)) != 0) { + mask[j] = "xyzw"[i]; + j++; + } + } + mask[j] = '\0'; + + fprintf(f, " (%s) ", mask); + + ir->lhs->accept(this); + + fprintf(f, " "); + + ir->rhs->accept(this); + fprintf(f, ") "); +} + + +void ir_print_visitor::visit(ir_constant *ir) +{ + fprintf(f, "(constant "); + print_type(f, ir->type); + fprintf(f, " ("); + + if (ir->type->is_array()) { + for (unsigned i = 0; i < ir->type->length; i++) + ir->get_array_element(i)->accept(this); + } else if (ir->type->is_record()) { + ir_constant *value = (ir_constant *) ir->components.get_head(); + for (unsigned i = 0; i < ir->type->length; i++) { + fprintf(f, "(%s ", ir->type->fields.structure[i].name); + value->accept(this); + fprintf(f, ")"); + + value = (ir_constant *) value->next; + } + } else { + for (unsigned i = 0; i < ir->type->components(); i++) { + if (i != 0) + fprintf(f, " "); + switch (ir->type->base_type) { + case GLSL_TYPE_UINT: fprintf(f, "%u", ir->value.u[i]); break; + case GLSL_TYPE_INT: fprintf(f, "%d", ir->value.i[i]); break; + case GLSL_TYPE_FLOAT: + if (ir->value.f[i] == 0.0f) + /* 0.0 == -0.0, so print with %f to get the proper sign. */ + fprintf(f, "%f", ir->value.f[i]); + else if (fabs(ir->value.f[i]) < 0.000001f) + fprintf(f, "%a", ir->value.f[i]); + else if (fabs(ir->value.f[i]) > 1000000.0f) + fprintf(f, "%e", ir->value.f[i]); + else + fprintf(f, "%f", ir->value.f[i]); + break; + case GLSL_TYPE_BOOL: fprintf(f, "%d", ir->value.b[i]); break; + case GLSL_TYPE_DOUBLE: + if (ir->value.d[i] == 0.0) + /* 0.0 == -0.0, so print with %f to get the proper sign. */ + fprintf(f, "%.1f", ir->value.d[i]); + else if (fabs(ir->value.d[i]) < 0.000001) + fprintf(f, "%a", ir->value.d[i]); + else if (fabs(ir->value.d[i]) > 1000000.0) + fprintf(f, "%e", ir->value.d[i]); + else + fprintf(f, "%f", ir->value.d[i]); + break; + default: assert(0); + } + } + } + fprintf(f, ")) "); +} + + +void +ir_print_visitor::visit(ir_call *ir) +{ + fprintf(f, "(call %s ", ir->callee_name()); + if (ir->return_deref) + ir->return_deref->accept(this); + fprintf(f, " ("); + foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { + param->accept(this); + } + fprintf(f, "))\n"); +} + + +void +ir_print_visitor::visit(ir_return *ir) +{ + fprintf(f, "(return"); + + ir_rvalue *const value = ir->get_value(); + if (value) { + fprintf(f, " "); + value->accept(this); + } + + fprintf(f, ")"); +} + + +void +ir_print_visitor::visit(ir_discard *ir) +{ + fprintf(f, "(discard "); + + if (ir->condition != NULL) { + fprintf(f, " "); + ir->condition->accept(this); + } + + fprintf(f, ")"); +} + + +void +ir_print_visitor::visit(ir_if *ir) +{ + fprintf(f, "(if "); + ir->condition->accept(this); + + fprintf(f, "(\n"); + indentation++; + + foreach_in_list(ir_instruction, inst, &ir->then_instructions) { + indent(); + inst->accept(this); + fprintf(f, "\n"); + } + + indentation--; + indent(); + fprintf(f, ")\n"); + + indent(); + if (!ir->else_instructions.is_empty()) { + fprintf(f, "(\n"); + indentation++; + + foreach_in_list(ir_instruction, inst, &ir->else_instructions) { + indent(); + inst->accept(this); + fprintf(f, "\n"); + } + indentation--; + indent(); + fprintf(f, "))\n"); + } else { + fprintf(f, "())\n"); + } +} + + +void +ir_print_visitor::visit(ir_loop *ir) +{ + fprintf(f, "(loop (\n"); + indentation++; + + foreach_in_list(ir_instruction, inst, &ir->body_instructions) { + indent(); + inst->accept(this); + fprintf(f, "\n"); + } + indentation--; + indent(); + fprintf(f, "))\n"); +} + + +void +ir_print_visitor::visit(ir_loop_jump *ir) +{ + fprintf(f, "%s", ir->is_break() ? "break" : "continue"); +} + +void +ir_print_visitor::visit(ir_emit_vertex *ir) +{ + fprintf(f, "(emit-vertex "); + ir->stream->accept(this); + fprintf(f, ")\n"); +} + +void +ir_print_visitor::visit(ir_end_primitive *ir) +{ + fprintf(f, "(end-primitive "); + ir->stream->accept(this); + fprintf(f, ")\n"); +} + +void +ir_print_visitor::visit(ir_barrier *) +{ + fprintf(f, "(barrier)\n"); +} diff --git a/src/compiler/glsl/ir_print_visitor.h b/src/compiler/glsl/ir_print_visitor.h new file mode 100644 index 0000000..965e63a --- /dev/null +++ b/src/compiler/glsl/ir_print_visitor.h @@ -0,0 +1,96 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_PRINT_VISITOR_H +#define IR_PRINT_VISITOR_H + +#include "ir.h" +#include "ir_visitor.h" + +extern "C" { +#include "program/symbol_table.h" +} + +/** + * Abstract base class of visitors of IR instruction trees + */ +class ir_print_visitor : public ir_visitor { +public: + ir_print_visitor(FILE *f); + virtual ~ir_print_visitor(); + + void indent(void); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_rvalue *); + virtual void visit(ir_variable *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_texture *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_if *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_emit_vertex *); + virtual void visit(ir_end_primitive *); + virtual void visit(ir_barrier *); + /*@}*/ + +private: + /** + * Fetch/generate a unique name for ir_variable. + * + * GLSL IR permits multiple ir_variables to share the same name. This works + * fine until we try to print it, when we really need a unique one. + */ + const char *unique_name(ir_variable *var); + + /** A mapping from ir_variable * -> unique printable names. */ + hash_table *printable_names; + _mesa_symbol_table *symbols; + + void *mem_ctx; + FILE *f; + + int indentation; +}; + +#endif /* IR_PRINT_VISITOR_H */ diff --git a/src/compiler/glsl/ir_reader.cpp b/src/compiler/glsl/ir_reader.cpp new file mode 100644 index 0000000..15315aa --- /dev/null +++ b/src/compiler/glsl/ir_reader.cpp @@ -0,0 +1,1167 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir_reader.h" +#include "glsl_parser_extras.h" +#include "compiler/glsl_types.h" +#include "s_expression.h" + +static const bool debug = false; + +namespace { + +class ir_reader { +public: + ir_reader(_mesa_glsl_parse_state *); + + void read(exec_list *instructions, const char *src, bool scan_for_protos); + +private: + void *mem_ctx; + _mesa_glsl_parse_state *state; + + void ir_read_error(s_expression *, const char *fmt, ...); + + const glsl_type *read_type(s_expression *); + + void scan_for_prototypes(exec_list *, s_expression *); + ir_function *read_function(s_expression *, bool skip_body); + void read_function_sig(ir_function *, s_expression *, bool skip_body); + + void read_instructions(exec_list *, s_expression *, ir_loop *); + ir_instruction *read_instruction(s_expression *, ir_loop *); + ir_variable *read_declaration(s_expression *); + ir_if *read_if(s_expression *, ir_loop *); + ir_loop *read_loop(s_expression *); + ir_call *read_call(s_expression *); + ir_return *read_return(s_expression *); + ir_rvalue *read_rvalue(s_expression *); + ir_assignment *read_assignment(s_expression *); + ir_expression *read_expression(s_expression *); + ir_swizzle *read_swizzle(s_expression *); + ir_constant *read_constant(s_expression *); + ir_texture *read_texture(s_expression *); + ir_emit_vertex *read_emit_vertex(s_expression *); + ir_end_primitive *read_end_primitive(s_expression *); + ir_barrier *read_barrier(s_expression *); + + ir_dereference *read_dereference(s_expression *); + ir_dereference_variable *read_var_ref(s_expression *); +}; + +} /* anonymous namespace */ + +ir_reader::ir_reader(_mesa_glsl_parse_state *state) : state(state) +{ + this->mem_ctx = state; +} + +void +_mesa_glsl_read_ir(_mesa_glsl_parse_state *state, exec_list *instructions, + const char *src, bool scan_for_protos) +{ + ir_reader r(state); + r.read(instructions, src, scan_for_protos); +} + +void +ir_reader::read(exec_list *instructions, const char *src, bool scan_for_protos) +{ + void *sx_mem_ctx = ralloc_context(NULL); + s_expression *expr = s_expression::read_expression(sx_mem_ctx, src); + if (expr == NULL) { + ir_read_error(NULL, "couldn't parse S-Expression."); + return; + } + + if (scan_for_protos) { + scan_for_prototypes(instructions, expr); + if (state->error) + return; + } + + read_instructions(instructions, expr, NULL); + ralloc_free(sx_mem_ctx); + + if (debug) + validate_ir_tree(instructions); +} + +void +ir_reader::ir_read_error(s_expression *expr, const char *fmt, ...) +{ + va_list ap; + + state->error = true; + + if (state->current_function != NULL) + ralloc_asprintf_append(&state->info_log, "In function %s:\n", + state->current_function->function_name()); + ralloc_strcat(&state->info_log, "error: "); + + va_start(ap, fmt); + ralloc_vasprintf_append(&state->info_log, fmt, ap); + va_end(ap); + ralloc_strcat(&state->info_log, "\n"); + + if (expr != NULL) { + ralloc_strcat(&state->info_log, "...in this context:\n "); + expr->print(); + ralloc_strcat(&state->info_log, "\n\n"); + } +} + +const glsl_type * +ir_reader::read_type(s_expression *expr) +{ + s_expression *s_base_type; + s_int *s_size; + + s_pattern pat[] = { "array", s_base_type, s_size }; + if (MATCH(expr, pat)) { + const glsl_type *base_type = read_type(s_base_type); + if (base_type == NULL) { + ir_read_error(NULL, "when reading base type of array type"); + return NULL; + } + + return glsl_type::get_array_instance(base_type, s_size->value()); + } + + s_symbol *type_sym = SX_AS_SYMBOL(expr); + if (type_sym == NULL) { + ir_read_error(expr, "expected <type>"); + return NULL; + } + + const glsl_type *type = state->symbols->get_type(type_sym->value()); + if (type == NULL) + ir_read_error(expr, "invalid type: %s", type_sym->value()); + + return type; +} + + +void +ir_reader::scan_for_prototypes(exec_list *instructions, s_expression *expr) +{ + s_list *list = SX_AS_LIST(expr); + if (list == NULL) { + ir_read_error(expr, "Expected (<instruction> ...); found an atom."); + return; + } + + foreach_in_list(s_list, sub, &list->subexpressions) { + if (!sub->is_list()) + continue; // not a (function ...); ignore it. + + s_symbol *tag = SX_AS_SYMBOL(sub->subexpressions.get_head()); + if (tag == NULL || strcmp(tag->value(), "function") != 0) + continue; // not a (function ...); ignore it. + + ir_function *f = read_function(sub, true); + if (f == NULL) + return; + instructions->push_tail(f); + } +} + +ir_function * +ir_reader::read_function(s_expression *expr, bool skip_body) +{ + bool added = false; + s_symbol *name; + + s_pattern pat[] = { "function", name }; + if (!PARTIAL_MATCH(expr, pat)) { + ir_read_error(expr, "Expected (function <name> (signature ...) ...)"); + return NULL; + } + + ir_function *f = state->symbols->get_function(name->value()); + if (f == NULL) { + f = new(mem_ctx) ir_function(name->value()); + added = state->symbols->add_function(f); + assert(added); + } + + /* Skip over "function" tag and function name (which are guaranteed to be + * present by the above PARTIAL_MATCH call). + */ + exec_node *node = ((s_list *) expr)->subexpressions.head->next->next; + for (/* nothing */; !node->is_tail_sentinel(); node = node->next) { + s_expression *s_sig = (s_expression *) node; + read_function_sig(f, s_sig, skip_body); + } + return added ? f : NULL; +} + +static bool +always_available(const _mesa_glsl_parse_state *) +{ + return true; +} + +void +ir_reader::read_function_sig(ir_function *f, s_expression *expr, bool skip_body) +{ + s_expression *type_expr; + s_list *paramlist; + s_list *body_list; + + s_pattern pat[] = { "signature", type_expr, paramlist, body_list }; + if (!MATCH(expr, pat)) { + ir_read_error(expr, "Expected (signature <type> (parameters ...) " + "(<instruction> ...))"); + return; + } + + const glsl_type *return_type = read_type(type_expr); + if (return_type == NULL) + return; + + s_symbol *paramtag = SX_AS_SYMBOL(paramlist->subexpressions.get_head()); + if (paramtag == NULL || strcmp(paramtag->value(), "parameters") != 0) { + ir_read_error(paramlist, "Expected (parameters ...)"); + return; + } + + // Read the parameters list into a temporary place. + exec_list hir_parameters; + state->symbols->push_scope(); + + /* Skip over the "parameters" tag. */ + exec_node *node = paramlist->subexpressions.head->next; + for (/* nothing */; !node->is_tail_sentinel(); node = node->next) { + ir_variable *var = read_declaration((s_expression *) node); + if (var == NULL) + return; + + hir_parameters.push_tail(var); + } + + ir_function_signature *sig = + f->exact_matching_signature(state, &hir_parameters); + if (sig == NULL && skip_body) { + /* If scanning for prototypes, generate a new signature. */ + /* ir_reader doesn't know what languages support a given built-in, so + * just say that they're always available. For now, other mechanisms + * guarantee the right built-ins are available. + */ + sig = new(mem_ctx) ir_function_signature(return_type, always_available); + f->add_signature(sig); + } else if (sig != NULL) { + const char *badvar = sig->qualifiers_match(&hir_parameters); + if (badvar != NULL) { + ir_read_error(expr, "function `%s' parameter `%s' qualifiers " + "don't match prototype", f->name, badvar); + return; + } + + if (sig->return_type != return_type) { + ir_read_error(expr, "function `%s' return type doesn't " + "match prototype", f->name); + return; + } + } else { + /* No prototype for this body exists - skip it. */ + state->symbols->pop_scope(); + return; + } + assert(sig != NULL); + + sig->replace_parameters(&hir_parameters); + + if (!skip_body && !body_list->subexpressions.is_empty()) { + if (sig->is_defined) { + ir_read_error(expr, "function %s redefined", f->name); + return; + } + state->current_function = sig; + read_instructions(&sig->body, body_list, NULL); + state->current_function = NULL; + sig->is_defined = true; + } + + state->symbols->pop_scope(); +} + +void +ir_reader::read_instructions(exec_list *instructions, s_expression *expr, + ir_loop *loop_ctx) +{ + // Read in a list of instructions + s_list *list = SX_AS_LIST(expr); + if (list == NULL) { + ir_read_error(expr, "Expected (<instruction> ...); found an atom."); + return; + } + + foreach_in_list(s_expression, sub, &list->subexpressions) { + ir_instruction *ir = read_instruction(sub, loop_ctx); + if (ir != NULL) { + /* Global variable declarations should be moved to the top, before + * any functions that might use them. Functions are added to the + * instruction stream when scanning for prototypes, so without this + * hack, they always appear before variable declarations. + */ + if (state->current_function == NULL && ir->as_variable() != NULL) + instructions->push_head(ir); + else + instructions->push_tail(ir); + } + } +} + + +ir_instruction * +ir_reader::read_instruction(s_expression *expr, ir_loop *loop_ctx) +{ + s_symbol *symbol = SX_AS_SYMBOL(expr); + if (symbol != NULL) { + if (strcmp(symbol->value(), "break") == 0 && loop_ctx != NULL) + return new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); + if (strcmp(symbol->value(), "continue") == 0 && loop_ctx != NULL) + return new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); + } + + s_list *list = SX_AS_LIST(expr); + if (list == NULL || list->subexpressions.is_empty()) { + ir_read_error(expr, "Invalid instruction.\n"); + return NULL; + } + + s_symbol *tag = SX_AS_SYMBOL(list->subexpressions.get_head()); + if (tag == NULL) { + ir_read_error(expr, "expected instruction tag"); + return NULL; + } + + ir_instruction *inst = NULL; + if (strcmp(tag->value(), "declare") == 0) { + inst = read_declaration(list); + } else if (strcmp(tag->value(), "assign") == 0) { + inst = read_assignment(list); + } else if (strcmp(tag->value(), "if") == 0) { + inst = read_if(list, loop_ctx); + } else if (strcmp(tag->value(), "loop") == 0) { + inst = read_loop(list); + } else if (strcmp(tag->value(), "call") == 0) { + inst = read_call(list); + } else if (strcmp(tag->value(), "return") == 0) { + inst = read_return(list); + } else if (strcmp(tag->value(), "function") == 0) { + inst = read_function(list, false); + } else if (strcmp(tag->value(), "emit-vertex") == 0) { + inst = read_emit_vertex(list); + } else if (strcmp(tag->value(), "end-primitive") == 0) { + inst = read_end_primitive(list); + } else if (strcmp(tag->value(), "barrier") == 0) { + inst = read_barrier(list); + } else { + inst = read_rvalue(list); + if (inst == NULL) + ir_read_error(NULL, "when reading instruction"); + } + return inst; +} + +ir_variable * +ir_reader::read_declaration(s_expression *expr) +{ + s_list *s_quals; + s_expression *s_type; + s_symbol *s_name; + + s_pattern pat[] = { "declare", s_quals, s_type, s_name }; + if (!MATCH(expr, pat)) { + ir_read_error(expr, "expected (declare (<qualifiers>) <type> <name>)"); + return NULL; + } + + const glsl_type *type = read_type(s_type); + if (type == NULL) + return NULL; + + ir_variable *var = new(mem_ctx) ir_variable(type, s_name->value(), + ir_var_auto); + + foreach_in_list(s_symbol, qualifier, &s_quals->subexpressions) { + if (!qualifier->is_symbol()) { + ir_read_error(expr, "qualifier list must contain only symbols"); + return NULL; + } + + // FINISHME: Check for duplicate/conflicting qualifiers. + if (strcmp(qualifier->value(), "centroid") == 0) { + var->data.centroid = 1; + } else if (strcmp(qualifier->value(), "sample") == 0) { + var->data.sample = 1; + } else if (strcmp(qualifier->value(), "patch") == 0) { + var->data.patch = 1; + } else if (strcmp(qualifier->value(), "invariant") == 0) { + var->data.invariant = 1; + } else if (strcmp(qualifier->value(), "uniform") == 0) { + var->data.mode = ir_var_uniform; + } else if (strcmp(qualifier->value(), "shader_storage") == 0) { + var->data.mode = ir_var_shader_storage; + } else if (strcmp(qualifier->value(), "auto") == 0) { + var->data.mode = ir_var_auto; + } else if (strcmp(qualifier->value(), "in") == 0) { + var->data.mode = ir_var_function_in; + } else if (strcmp(qualifier->value(), "shader_in") == 0) { + var->data.mode = ir_var_shader_in; + } else if (strcmp(qualifier->value(), "const_in") == 0) { + var->data.mode = ir_var_const_in; + } else if (strcmp(qualifier->value(), "out") == 0) { + var->data.mode = ir_var_function_out; + } else if (strcmp(qualifier->value(), "shader_out") == 0) { + var->data.mode = ir_var_shader_out; + } else if (strcmp(qualifier->value(), "inout") == 0) { + var->data.mode = ir_var_function_inout; + } else if (strcmp(qualifier->value(), "temporary") == 0) { + var->data.mode = ir_var_temporary; + } else if (strcmp(qualifier->value(), "stream1") == 0) { + var->data.stream = 1; + } else if (strcmp(qualifier->value(), "stream2") == 0) { + var->data.stream = 2; + } else if (strcmp(qualifier->value(), "stream3") == 0) { + var->data.stream = 3; + } else if (strcmp(qualifier->value(), "smooth") == 0) { + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + } else if (strcmp(qualifier->value(), "flat") == 0) { + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } else if (strcmp(qualifier->value(), "noperspective") == 0) { + var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + } else { + ir_read_error(expr, "unknown qualifier: %s", qualifier->value()); + return NULL; + } + } + + // Add the variable to the symbol table + state->symbols->add_variable(var); + + return var; +} + + +ir_if * +ir_reader::read_if(s_expression *expr, ir_loop *loop_ctx) +{ + s_expression *s_cond; + s_expression *s_then; + s_expression *s_else; + + s_pattern pat[] = { "if", s_cond, s_then, s_else }; + if (!MATCH(expr, pat)) { + ir_read_error(expr, "expected (if <condition> (<then>...) (<else>...))"); + return NULL; + } + + ir_rvalue *condition = read_rvalue(s_cond); + if (condition == NULL) { + ir_read_error(NULL, "when reading condition of (if ...)"); + return NULL; + } + + ir_if *iff = new(mem_ctx) ir_if(condition); + + read_instructions(&iff->then_instructions, s_then, loop_ctx); + read_instructions(&iff->else_instructions, s_else, loop_ctx); + if (state->error) { + delete iff; + iff = NULL; + } + return iff; +} + + +ir_loop * +ir_reader::read_loop(s_expression *expr) +{ + s_expression *s_body; + + s_pattern loop_pat[] = { "loop", s_body }; + if (!MATCH(expr, loop_pat)) { + ir_read_error(expr, "expected (loop <body>)"); + return NULL; + } + + ir_loop *loop = new(mem_ctx) ir_loop; + + read_instructions(&loop->body_instructions, s_body, loop); + if (state->error) { + delete loop; + loop = NULL; + } + return loop; +} + + +ir_return * +ir_reader::read_return(s_expression *expr) +{ + s_expression *s_retval; + + s_pattern return_value_pat[] = { "return", s_retval}; + s_pattern return_void_pat[] = { "return" }; + if (MATCH(expr, return_value_pat)) { + ir_rvalue *retval = read_rvalue(s_retval); + if (retval == NULL) { + ir_read_error(NULL, "when reading return value"); + return NULL; + } + return new(mem_ctx) ir_return(retval); + } else if (MATCH(expr, return_void_pat)) { + return new(mem_ctx) ir_return; + } else { + ir_read_error(expr, "expected (return <rvalue>) or (return)"); + return NULL; + } +} + + +ir_rvalue * +ir_reader::read_rvalue(s_expression *expr) +{ + s_list *list = SX_AS_LIST(expr); + if (list == NULL || list->subexpressions.is_empty()) + return NULL; + + s_symbol *tag = SX_AS_SYMBOL(list->subexpressions.get_head()); + if (tag == NULL) { + ir_read_error(expr, "expected rvalue tag"); + return NULL; + } + + ir_rvalue *rvalue = read_dereference(list); + if (rvalue != NULL || state->error) + return rvalue; + else if (strcmp(tag->value(), "swiz") == 0) { + rvalue = read_swizzle(list); + } else if (strcmp(tag->value(), "expression") == 0) { + rvalue = read_expression(list); + } else if (strcmp(tag->value(), "constant") == 0) { + rvalue = read_constant(list); + } else { + rvalue = read_texture(list); + if (rvalue == NULL && !state->error) + ir_read_error(expr, "unrecognized rvalue tag: %s", tag->value()); + } + + return rvalue; +} + +ir_assignment * +ir_reader::read_assignment(s_expression *expr) +{ + s_expression *cond_expr = NULL; + s_expression *lhs_expr, *rhs_expr; + s_list *mask_list; + + s_pattern pat4[] = { "assign", mask_list, lhs_expr, rhs_expr }; + s_pattern pat5[] = { "assign", cond_expr, mask_list, lhs_expr, rhs_expr }; + if (!MATCH(expr, pat4) && !MATCH(expr, pat5)) { + ir_read_error(expr, "expected (assign [<condition>] (<write mask>) " + "<lhs> <rhs>)"); + return NULL; + } + + ir_rvalue *condition = NULL; + if (cond_expr != NULL) { + condition = read_rvalue(cond_expr); + if (condition == NULL) { + ir_read_error(NULL, "when reading condition of assignment"); + return NULL; + } + } + + unsigned mask = 0; + + s_symbol *mask_symbol; + s_pattern mask_pat[] = { mask_symbol }; + if (MATCH(mask_list, mask_pat)) { + const char *mask_str = mask_symbol->value(); + unsigned mask_length = strlen(mask_str); + if (mask_length > 4) { + ir_read_error(expr, "invalid write mask: %s", mask_str); + return NULL; + } + + const unsigned idx_map[] = { 3, 0, 1, 2 }; /* w=bit 3, x=0, y=1, z=2 */ + + for (unsigned i = 0; i < mask_length; i++) { + if (mask_str[i] < 'w' || mask_str[i] > 'z') { + ir_read_error(expr, "write mask contains invalid character: %c", + mask_str[i]); + return NULL; + } + mask |= 1 << idx_map[mask_str[i] - 'w']; + } + } else if (!mask_list->subexpressions.is_empty()) { + ir_read_error(mask_list, "expected () or (<write mask>)"); + return NULL; + } + + ir_dereference *lhs = read_dereference(lhs_expr); + if (lhs == NULL) { + ir_read_error(NULL, "when reading left-hand side of assignment"); + return NULL; + } + + ir_rvalue *rhs = read_rvalue(rhs_expr); + if (rhs == NULL) { + ir_read_error(NULL, "when reading right-hand side of assignment"); + return NULL; + } + + if (mask == 0 && (lhs->type->is_vector() || lhs->type->is_scalar())) { + ir_read_error(expr, "non-zero write mask required."); + return NULL; + } + + return new(mem_ctx) ir_assignment(lhs, rhs, condition, mask); +} + +ir_call * +ir_reader::read_call(s_expression *expr) +{ + s_symbol *name; + s_list *params; + s_list *s_return = NULL; + + ir_dereference_variable *return_deref = NULL; + + s_pattern void_pat[] = { "call", name, params }; + s_pattern non_void_pat[] = { "call", name, s_return, params }; + if (MATCH(expr, non_void_pat)) { + return_deref = read_var_ref(s_return); + if (return_deref == NULL) { + ir_read_error(s_return, "when reading a call's return storage"); + return NULL; + } + } else if (!MATCH(expr, void_pat)) { + ir_read_error(expr, "expected (call <name> [<deref>] (<param> ...))"); + return NULL; + } + + exec_list parameters; + + foreach_in_list(s_expression, e, ¶ms->subexpressions) { + ir_rvalue *param = read_rvalue(e); + if (param == NULL) { + ir_read_error(e, "when reading parameter to function call"); + return NULL; + } + parameters.push_tail(param); + } + + ir_function *f = state->symbols->get_function(name->value()); + if (f == NULL) { + ir_read_error(expr, "found call to undefined function %s", + name->value()); + return NULL; + } + + ir_function_signature *callee = + f->matching_signature(state, ¶meters, true); + if (callee == NULL) { + ir_read_error(expr, "couldn't find matching signature for function " + "%s", name->value()); + return NULL; + } + + if (callee->return_type == glsl_type::void_type && return_deref) { + ir_read_error(expr, "call has return value storage but void type"); + return NULL; + } else if (callee->return_type != glsl_type::void_type && !return_deref) { + ir_read_error(expr, "call has non-void type but no return value storage"); + return NULL; + } + + return new(mem_ctx) ir_call(callee, return_deref, ¶meters); +} + +ir_expression * +ir_reader::read_expression(s_expression *expr) +{ + s_expression *s_type; + s_symbol *s_op; + s_expression *s_arg[4] = {NULL}; + + s_pattern pat[] = { "expression", s_type, s_op, s_arg[0] }; + if (!PARTIAL_MATCH(expr, pat)) { + ir_read_error(expr, "expected (expression <type> <operator> " + "<operand> [<operand>] [<operand>] [<operand>])"); + return NULL; + } + s_arg[1] = (s_expression *) s_arg[0]->next; // may be tail sentinel + s_arg[2] = (s_expression *) s_arg[1]->next; // may be tail sentinel or NULL + if (s_arg[2]) + s_arg[3] = (s_expression *) s_arg[2]->next; // may be tail sentinel or NULL + + const glsl_type *type = read_type(s_type); + if (type == NULL) + return NULL; + + /* Read the operator */ + ir_expression_operation op = ir_expression::get_operator(s_op->value()); + if (op == (ir_expression_operation) -1) { + ir_read_error(expr, "invalid operator: %s", s_op->value()); + return NULL; + } + + /* Skip "expression" <type> <operation> by subtracting 3. */ + int num_operands = (int) ((s_list *) expr)->subexpressions.length() - 3; + + int expected_operands = ir_expression::get_num_operands(op); + if (num_operands != expected_operands) { + ir_read_error(expr, "found %d expression operands, expected %d", + num_operands, expected_operands); + return NULL; + } + + ir_rvalue *arg[4] = {NULL}; + for (int i = 0; i < num_operands; i++) { + arg[i] = read_rvalue(s_arg[i]); + if (arg[i] == NULL) { + ir_read_error(NULL, "when reading operand #%d of %s", i, s_op->value()); + return NULL; + } + } + + return new(mem_ctx) ir_expression(op, type, arg[0], arg[1], arg[2], arg[3]); +} + +ir_swizzle * +ir_reader::read_swizzle(s_expression *expr) +{ + s_symbol *swiz; + s_expression *sub; + + s_pattern pat[] = { "swiz", swiz, sub }; + if (!MATCH(expr, pat)) { + ir_read_error(expr, "expected (swiz <swizzle> <rvalue>)"); + return NULL; + } + + if (strlen(swiz->value()) > 4) { + ir_read_error(expr, "expected a valid swizzle; found %s", swiz->value()); + return NULL; + } + + ir_rvalue *rvalue = read_rvalue(sub); + if (rvalue == NULL) + return NULL; + + ir_swizzle *ir = ir_swizzle::create(rvalue, swiz->value(), + rvalue->type->vector_elements); + if (ir == NULL) + ir_read_error(expr, "invalid swizzle"); + + return ir; +} + +ir_constant * +ir_reader::read_constant(s_expression *expr) +{ + s_expression *type_expr; + s_list *values; + + s_pattern pat[] = { "constant", type_expr, values }; + if (!MATCH(expr, pat)) { + ir_read_error(expr, "expected (constant <type> (...))"); + return NULL; + } + + const glsl_type *type = read_type(type_expr); + if (type == NULL) + return NULL; + + if (values == NULL) { + ir_read_error(expr, "expected (constant <type> (...))"); + return NULL; + } + + if (type->is_array()) { + unsigned elements_supplied = 0; + exec_list elements; + foreach_in_list(s_expression, elt, &values->subexpressions) { + ir_constant *ir_elt = read_constant(elt); + if (ir_elt == NULL) + return NULL; + elements.push_tail(ir_elt); + elements_supplied++; + } + + if (elements_supplied != type->length) { + ir_read_error(values, "expected exactly %u array elements, " + "given %u", type->length, elements_supplied); + return NULL; + } + return new(mem_ctx) ir_constant(type, &elements); + } + + ir_constant_data data = { { 0 } }; + + // Read in list of values (at most 16). + unsigned k = 0; + foreach_in_list(s_expression, expr, &values->subexpressions) { + if (k >= 16) { + ir_read_error(values, "expected at most 16 numbers"); + return NULL; + } + + if (type->base_type == GLSL_TYPE_FLOAT) { + s_number *value = SX_AS_NUMBER(expr); + if (value == NULL) { + ir_read_error(values, "expected numbers"); + return NULL; + } + data.f[k] = value->fvalue(); + } else { + s_int *value = SX_AS_INT(expr); + if (value == NULL) { + ir_read_error(values, "expected integers"); + return NULL; + } + + switch (type->base_type) { + case GLSL_TYPE_UINT: { + data.u[k] = value->value(); + break; + } + case GLSL_TYPE_INT: { + data.i[k] = value->value(); + break; + } + case GLSL_TYPE_BOOL: { + data.b[k] = value->value(); + break; + } + default: + ir_read_error(values, "unsupported constant type"); + return NULL; + } + } + ++k; + } + if (k != type->components()) { + ir_read_error(values, "expected %u constant values, found %u", + type->components(), k); + return NULL; + } + + return new(mem_ctx) ir_constant(type, &data); +} + +ir_dereference_variable * +ir_reader::read_var_ref(s_expression *expr) +{ + s_symbol *s_var; + s_pattern var_pat[] = { "var_ref", s_var }; + + if (MATCH(expr, var_pat)) { + ir_variable *var = state->symbols->get_variable(s_var->value()); + if (var == NULL) { + ir_read_error(expr, "undeclared variable: %s", s_var->value()); + return NULL; + } + return new(mem_ctx) ir_dereference_variable(var); + } + return NULL; +} + +ir_dereference * +ir_reader::read_dereference(s_expression *expr) +{ + s_expression *s_subject; + s_expression *s_index; + s_symbol *s_field; + + s_pattern array_pat[] = { "array_ref", s_subject, s_index }; + s_pattern record_pat[] = { "record_ref", s_subject, s_field }; + + ir_dereference_variable *var_ref = read_var_ref(expr); + if (var_ref != NULL) { + return var_ref; + } else if (MATCH(expr, array_pat)) { + ir_rvalue *subject = read_rvalue(s_subject); + if (subject == NULL) { + ir_read_error(NULL, "when reading the subject of an array_ref"); + return NULL; + } + + ir_rvalue *idx = read_rvalue(s_index); + if (idx == NULL) { + ir_read_error(NULL, "when reading the index of an array_ref"); + return NULL; + } + return new(mem_ctx) ir_dereference_array(subject, idx); + } else if (MATCH(expr, record_pat)) { + ir_rvalue *subject = read_rvalue(s_subject); + if (subject == NULL) { + ir_read_error(NULL, "when reading the subject of a record_ref"); + return NULL; + } + return new(mem_ctx) ir_dereference_record(subject, s_field->value()); + } + return NULL; +} + +ir_texture * +ir_reader::read_texture(s_expression *expr) +{ + s_symbol *tag = NULL; + s_expression *s_type = NULL; + s_expression *s_sampler = NULL; + s_expression *s_coord = NULL; + s_expression *s_offset = NULL; + s_expression *s_proj = NULL; + s_list *s_shadow = NULL; + s_expression *s_lod = NULL; + s_expression *s_sample_index = NULL; + s_expression *s_component = NULL; + + ir_texture_opcode op = ir_tex; /* silence warning */ + + s_pattern tex_pattern[] = + { "tex", s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow }; + s_pattern lod_pattern[] = + { "lod", s_type, s_sampler, s_coord }; + s_pattern txf_pattern[] = + { "txf", s_type, s_sampler, s_coord, s_offset, s_lod }; + s_pattern txf_ms_pattern[] = + { "txf_ms", s_type, s_sampler, s_coord, s_sample_index }; + s_pattern txs_pattern[] = + { "txs", s_type, s_sampler, s_lod }; + s_pattern tg4_pattern[] = + { "tg4", s_type, s_sampler, s_coord, s_offset, s_component }; + s_pattern query_levels_pattern[] = + { "query_levels", s_type, s_sampler }; + s_pattern texture_samples_pattern[] = + { "samples", s_type, s_sampler }; + s_pattern other_pattern[] = + { tag, s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow, s_lod }; + + if (MATCH(expr, lod_pattern)) { + op = ir_lod; + } else if (MATCH(expr, tex_pattern)) { + op = ir_tex; + } else if (MATCH(expr, txf_pattern)) { + op = ir_txf; + } else if (MATCH(expr, txf_ms_pattern)) { + op = ir_txf_ms; + } else if (MATCH(expr, txs_pattern)) { + op = ir_txs; + } else if (MATCH(expr, tg4_pattern)) { + op = ir_tg4; + } else if (MATCH(expr, query_levels_pattern)) { + op = ir_query_levels; + } else if (MATCH(expr, texture_samples_pattern)) { + op = ir_texture_samples; + } else if (MATCH(expr, other_pattern)) { + op = ir_texture::get_opcode(tag->value()); + if (op == (ir_texture_opcode) -1) + return NULL; + } else { + ir_read_error(NULL, "unexpected texture pattern %s", tag->value()); + return NULL; + } + + ir_texture *tex = new(mem_ctx) ir_texture(op); + + // Read return type + const glsl_type *type = read_type(s_type); + if (type == NULL) { + ir_read_error(NULL, "when reading type in (%s ...)", + tex->opcode_string()); + return NULL; + } + + // Read sampler (must be a deref) + ir_dereference *sampler = read_dereference(s_sampler); + if (sampler == NULL) { + ir_read_error(NULL, "when reading sampler in (%s ...)", + tex->opcode_string()); + return NULL; + } + tex->set_sampler(sampler, type); + + if (op != ir_txs) { + // Read coordinate (any rvalue) + tex->coordinate = read_rvalue(s_coord); + if (tex->coordinate == NULL) { + ir_read_error(NULL, "when reading coordinate in (%s ...)", + tex->opcode_string()); + return NULL; + } + + if (op != ir_txf_ms && op != ir_lod) { + // Read texel offset - either 0 or an rvalue. + s_int *si_offset = SX_AS_INT(s_offset); + if (si_offset == NULL || si_offset->value() != 0) { + tex->offset = read_rvalue(s_offset); + if (tex->offset == NULL) { + ir_read_error(s_offset, "expected 0 or an expression"); + return NULL; + } + } + } + } + + if (op != ir_txf && op != ir_txf_ms && + op != ir_txs && op != ir_lod && op != ir_tg4 && + op != ir_query_levels && op != ir_texture_samples) { + s_int *proj_as_int = SX_AS_INT(s_proj); + if (proj_as_int && proj_as_int->value() == 1) { + tex->projector = NULL; + } else { + tex->projector = read_rvalue(s_proj); + if (tex->projector == NULL) { + ir_read_error(NULL, "when reading projective divide in (%s ..)", + tex->opcode_string()); + return NULL; + } + } + + if (s_shadow->subexpressions.is_empty()) { + tex->shadow_comparitor = NULL; + } else { + tex->shadow_comparitor = read_rvalue(s_shadow); + if (tex->shadow_comparitor == NULL) { + ir_read_error(NULL, "when reading shadow comparitor in (%s ..)", + tex->opcode_string()); + return NULL; + } + } + } + + switch (op) { + case ir_txb: + tex->lod_info.bias = read_rvalue(s_lod); + if (tex->lod_info.bias == NULL) { + ir_read_error(NULL, "when reading LOD bias in (txb ...)"); + return NULL; + } + break; + case ir_txl: + case ir_txf: + case ir_txs: + tex->lod_info.lod = read_rvalue(s_lod); + if (tex->lod_info.lod == NULL) { + ir_read_error(NULL, "when reading LOD in (%s ...)", + tex->opcode_string()); + return NULL; + } + break; + case ir_txf_ms: + tex->lod_info.sample_index = read_rvalue(s_sample_index); + if (tex->lod_info.sample_index == NULL) { + ir_read_error(NULL, "when reading sample_index in (txf_ms ...)"); + return NULL; + } + break; + case ir_txd: { + s_expression *s_dx, *s_dy; + s_pattern dxdy_pat[] = { s_dx, s_dy }; + if (!MATCH(s_lod, dxdy_pat)) { + ir_read_error(s_lod, "expected (dPdx dPdy) in (txd ...)"); + return NULL; + } + tex->lod_info.grad.dPdx = read_rvalue(s_dx); + if (tex->lod_info.grad.dPdx == NULL) { + ir_read_error(NULL, "when reading dPdx in (txd ...)"); + return NULL; + } + tex->lod_info.grad.dPdy = read_rvalue(s_dy); + if (tex->lod_info.grad.dPdy == NULL) { + ir_read_error(NULL, "when reading dPdy in (txd ...)"); + return NULL; + } + break; + } + case ir_tg4: + tex->lod_info.component = read_rvalue(s_component); + if (tex->lod_info.component == NULL) { + ir_read_error(NULL, "when reading component in (tg4 ...)"); + return NULL; + } + break; + default: + // tex and lod don't have any extra parameters. + break; + }; + return tex; +} + +ir_emit_vertex * +ir_reader::read_emit_vertex(s_expression *expr) +{ + s_expression *s_stream = NULL; + + s_pattern pat[] = { "emit-vertex", s_stream }; + + if (MATCH(expr, pat)) { + ir_rvalue *stream = read_dereference(s_stream); + if (stream == NULL) { + ir_read_error(NULL, "when reading stream info in emit-vertex"); + return NULL; + } + return new(mem_ctx) ir_emit_vertex(stream); + } + ir_read_error(NULL, "when reading emit-vertex"); + return NULL; +} + +ir_end_primitive * +ir_reader::read_end_primitive(s_expression *expr) +{ + s_expression *s_stream = NULL; + + s_pattern pat[] = { "end-primitive", s_stream }; + + if (MATCH(expr, pat)) { + ir_rvalue *stream = read_dereference(s_stream); + if (stream == NULL) { + ir_read_error(NULL, "when reading stream info in end-primitive"); + return NULL; + } + return new(mem_ctx) ir_end_primitive(stream); + } + ir_read_error(NULL, "when reading end-primitive"); + return NULL; +} + +ir_barrier * +ir_reader::read_barrier(s_expression *expr) +{ + s_pattern pat[] = { "barrier" }; + + if (MATCH(expr, pat)) { + return new(mem_ctx) ir_barrier(); + } + ir_read_error(NULL, "when reading barrier"); + return NULL; +} diff --git a/src/compiler/glsl/ir_reader.h b/src/compiler/glsl/ir_reader.h new file mode 100644 index 0000000..aef2ca2 --- /dev/null +++ b/src/compiler/glsl/ir_reader.h @@ -0,0 +1,34 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_READER_H +#define IR_READER_H + +#include "ir.h" + +void _mesa_glsl_read_ir(_mesa_glsl_parse_state *state, exec_list *instructions, + const char *src, bool scan_for_prototypes); + +#endif /* IR_READER_H */ diff --git a/src/compiler/glsl/ir_rvalue_visitor.cpp b/src/compiler/glsl/ir_rvalue_visitor.cpp new file mode 100644 index 0000000..6ab6cf0 --- /dev/null +++ b/src/compiler/glsl/ir_rvalue_visitor.cpp @@ -0,0 +1,316 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_rvalue_visitor.cpp + * + * Generic class to implement the common pattern we have of wanting to + * visit each ir_rvalue * and possibly change that node to a different + * class. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "compiler/glsl_types.h" + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_expression *ir) +{ + unsigned int operand; + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + handle_rvalue(&ir->operands[operand]); + } + + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir) +{ + handle_rvalue(&ir->coordinate); + handle_rvalue(&ir->projector); + handle_rvalue(&ir->shadow_comparitor); + handle_rvalue(&ir->offset); + + switch (ir->op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + handle_rvalue(&ir->lod_info.bias); + break; + case ir_txf: + case ir_txl: + case ir_txs: + handle_rvalue(&ir->lod_info.lod); + break; + case ir_txf_ms: + handle_rvalue(&ir->lod_info.sample_index); + break; + case ir_txd: + handle_rvalue(&ir->lod_info.grad.dPdx); + handle_rvalue(&ir->lod_info.grad.dPdy); + break; + case ir_tg4: + handle_rvalue(&ir->lod_info.component); + break; + } + + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_swizzle *ir) +{ + handle_rvalue(&ir->val); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_dereference_array *ir) +{ + /* The array index is not the target of the assignment, so clear the + * 'in_assignee' flag. Restore it after returning from the array index. + */ + const bool was_in_assignee = this->in_assignee; + this->in_assignee = false; + handle_rvalue(&ir->array_index); + this->in_assignee = was_in_assignee; + + handle_rvalue(&ir->array); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_dereference_record *ir) +{ + handle_rvalue(&ir->record); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_assignment *ir) +{ + handle_rvalue(&ir->rhs); + handle_rvalue(&ir->condition); + + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_call *ir) +{ + foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { + ir_rvalue *new_param = param; + handle_rvalue(&new_param); + + if (new_param != param) { + param->replace_with(new_param); + } + } + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_discard *ir) +{ + handle_rvalue(&ir->condition); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_return *ir) +{ + handle_rvalue(&ir->value);; + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_if *ir) +{ + handle_rvalue(&ir->condition); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_emit_vertex *ir) +{ + handle_rvalue(&ir->stream); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_end_primitive *ir) +{ + handle_rvalue(&ir->stream); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_expression *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_texture *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_swizzle *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_dereference_array *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_dereference_record *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_assignment *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_call *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_discard *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_return *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_if *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_emit_vertex *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_end_primitive *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_expression *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_texture *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_swizzle *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_dereference_array *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_dereference_record *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_assignment *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_call *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_discard *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_return *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_if *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_emit_vertex *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_end_primitive *ir) +{ + return rvalue_visit(ir); +} diff --git a/src/compiler/glsl/ir_rvalue_visitor.h b/src/compiler/glsl/ir_rvalue_visitor.h new file mode 100644 index 0000000..185c72a --- /dev/null +++ b/src/compiler/glsl/ir_rvalue_visitor.h @@ -0,0 +1,83 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_rvalue_visitor.h + * + * Generic class to implement the common pattern we have of wanting to + * visit each ir_rvalue * and possibly change that node to a different + * class. Just implement handle_rvalue() and you will be called with + * a pointer to each rvalue in the tree. + */ + +class ir_rvalue_base_visitor : public ir_hierarchical_visitor { +public: + ir_visitor_status rvalue_visit(ir_assignment *); + ir_visitor_status rvalue_visit(ir_call *); + ir_visitor_status rvalue_visit(ir_dereference_array *); + ir_visitor_status rvalue_visit(ir_dereference_record *); + ir_visitor_status rvalue_visit(ir_discard *); + ir_visitor_status rvalue_visit(ir_expression *); + ir_visitor_status rvalue_visit(ir_if *); + ir_visitor_status rvalue_visit(ir_return *); + ir_visitor_status rvalue_visit(ir_swizzle *); + ir_visitor_status rvalue_visit(ir_texture *); + ir_visitor_status rvalue_visit(ir_emit_vertex *); + ir_visitor_status rvalue_visit(ir_end_primitive *); + + virtual void handle_rvalue(ir_rvalue **rvalue) = 0; +}; + +class ir_rvalue_visitor : public ir_rvalue_base_visitor { +public: + + virtual ir_visitor_status visit_leave(ir_assignment *); + virtual ir_visitor_status visit_leave(ir_call *); + virtual ir_visitor_status visit_leave(ir_dereference_array *); + virtual ir_visitor_status visit_leave(ir_dereference_record *); + virtual ir_visitor_status visit_leave(ir_discard *); + virtual ir_visitor_status visit_leave(ir_expression *); + virtual ir_visitor_status visit_leave(ir_if *); + virtual ir_visitor_status visit_leave(ir_return *); + virtual ir_visitor_status visit_leave(ir_swizzle *); + virtual ir_visitor_status visit_leave(ir_texture *); + virtual ir_visitor_status visit_leave(ir_emit_vertex *); + virtual ir_visitor_status visit_leave(ir_end_primitive *); +}; + +class ir_rvalue_enter_visitor : public ir_rvalue_base_visitor { +public: + + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_call *); + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_dereference_record *); + virtual ir_visitor_status visit_enter(ir_discard *); + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_if *); + virtual ir_visitor_status visit_enter(ir_return *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_enter(ir_texture *); + virtual ir_visitor_status visit_enter(ir_emit_vertex *); + virtual ir_visitor_status visit_enter(ir_end_primitive *); +}; diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp b/src/compiler/glsl/ir_set_program_inouts.cpp new file mode 100644 index 0000000..df06923 --- /dev/null +++ b/src/compiler/glsl/ir_set_program_inouts.cpp @@ -0,0 +1,453 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_set_program_inouts.cpp + * + * Sets the InputsRead and OutputsWritten of Mesa programs. + * + * Additionally, for fragment shaders, sets the InterpQualifier array, the + * IsCentroid and IsSample bitfields, and the UsesDFdy flag. + * + * Mesa programs (gl_program, not gl_shader_program) have a set of + * flags indicating which varyings are read and written. Computing + * which are actually read from some sort of backend code can be + * tricky when variable array indexing involved. So this pass + * provides support for setting InputsRead and OutputsWritten right + * from the GLSL IR. + */ + +#include "main/core.h" /* for struct gl_program */ +#include "ir.h" +#include "ir_visitor.h" +#include "compiler/glsl_types.h" + +namespace { + +class ir_set_program_inouts_visitor : public ir_hierarchical_visitor { +public: + ir_set_program_inouts_visitor(struct gl_program *prog, + gl_shader_stage shader_stage) + { + this->prog = prog; + this->shader_stage = shader_stage; + } + ~ir_set_program_inouts_visitor() + { + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_function_signature *); + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_discard *); + virtual ir_visitor_status visit_enter(ir_texture *); + virtual ir_visitor_status visit(ir_dereference_variable *); + +private: + void mark_whole_variable(ir_variable *var); + bool try_mark_partial_variable(ir_variable *var, ir_rvalue *index); + + struct gl_program *prog; + gl_shader_stage shader_stage; +}; + +} /* anonymous namespace */ + +static inline bool +is_shader_inout(ir_variable *var) +{ + return var->data.mode == ir_var_shader_in || + var->data.mode == ir_var_shader_out || + var->data.mode == ir_var_system_value; +} + +static void +mark(struct gl_program *prog, ir_variable *var, int offset, int len, + gl_shader_stage stage) +{ + /* As of GLSL 1.20, varyings can only be floats, floating-point + * vectors or matrices, or arrays of them. For Mesa programs using + * InputsRead/OutputsWritten, everything but matrices uses one + * slot, while matrices use a slot per column. Presumably + * something doing a more clever packing would use something other + * than InputsRead/OutputsWritten. + */ + + for (int i = 0; i < len; i++) { + int idx = var->data.location + var->data.index + offset + i; + bool is_patch_generic = var->data.patch && + idx != VARYING_SLOT_TESS_LEVEL_INNER && + idx != VARYING_SLOT_TESS_LEVEL_OUTER; + GLbitfield64 bitfield; + + if (is_patch_generic) { + assert(idx >= VARYING_SLOT_PATCH0 && idx < VARYING_SLOT_TESS_MAX); + bitfield = BITFIELD64_BIT(idx - VARYING_SLOT_PATCH0); + } + else { + assert(idx < VARYING_SLOT_MAX); + bitfield = BITFIELD64_BIT(idx); + } + + if (var->data.mode == ir_var_shader_in) { + if (is_patch_generic) + prog->PatchInputsRead |= bitfield; + else + prog->InputsRead |= bitfield; + + /* double inputs read is only for vertex inputs */ + if (stage == MESA_SHADER_VERTEX && + var->type->without_array()->is_dual_slot_double()) + prog->DoubleInputsRead |= bitfield; + + if (stage == MESA_SHADER_FRAGMENT) { + gl_fragment_program *fprog = (gl_fragment_program *) prog; + fprog->InterpQualifier[idx] = + (glsl_interp_qualifier) var->data.interpolation; + if (var->data.centroid) + fprog->IsCentroid |= bitfield; + if (var->data.sample) + fprog->IsSample |= bitfield; + } + } else if (var->data.mode == ir_var_system_value) { + prog->SystemValuesRead |= bitfield; + } else { + assert(var->data.mode == ir_var_shader_out); + if (is_patch_generic) + prog->PatchOutputsWritten |= bitfield; + else + prog->OutputsWritten |= bitfield; + } + } +} + +/** + * Mark an entire variable as used. Caller must ensure that the variable + * represents a shader input or output. + */ +void +ir_set_program_inouts_visitor::mark_whole_variable(ir_variable *var) +{ + const glsl_type *type = var->type; + bool vertex_input = false; + if (this->shader_stage == MESA_SHADER_GEOMETRY && + var->data.mode == ir_var_shader_in && type->is_array()) { + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_in) { + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_out && !var->data.patch) { + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_EVAL && + var->data.mode == ir_var_shader_in && !var->data.patch) { + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_VERTEX && + var->data.mode == ir_var_shader_in) + vertex_input = true; + + mark(this->prog, var, 0, type->count_attribute_slots(vertex_input), + this->shader_stage); +} + +/* Default handler: Mark all the locations in the variable as used. */ +ir_visitor_status +ir_set_program_inouts_visitor::visit(ir_dereference_variable *ir) +{ + if (!is_shader_inout(ir->var)) + return visit_continue; + + mark_whole_variable(ir->var); + + return visit_continue; +} + +/** + * Try to mark a portion of the given variable as used. Caller must ensure + * that the variable represents a shader input or output which can be indexed + * into in array fashion (an array or matrix). For the purpose of geometry + * shader inputs (which are always arrays*), this means that the array element + * must be something that can be indexed into in array fashion. + * + * *Except gl_PrimitiveIDIn, as noted below. + * + * For tessellation control shaders all inputs and non-patch outputs are + * arrays. For tessellation evaluation shaders non-patch inputs are arrays. + * + * If the index can't be interpreted as a constant, or some other problem + * occurs, then nothing will be marked and false will be returned. + */ +bool +ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var, + ir_rvalue *index) +{ + const glsl_type *type = var->type; + + if (this->shader_stage == MESA_SHADER_GEOMETRY && + var->data.mode == ir_var_shader_in) { + /* The only geometry shader input that is not an array is + * gl_PrimitiveIDIn, and in that case, this code will never be reached, + * because gl_PrimitiveIDIn can't be indexed into in array fashion. + */ + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_in) { + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_out && !var->data.patch) { + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_EVAL && + var->data.mode == ir_var_shader_in && !var->data.patch) { + assert(type->is_array()); + type = type->fields.array; + } + + /* TODO: implement proper arrays of arrays support + * for now let the caller mark whole variable as used. + */ + if (type->is_array() && type->fields.array->is_array()) + return false; + + /* The code below only handles: + * + * - Indexing into matrices + * - Indexing into arrays of (matrices, vectors, or scalars) + * + * All other possibilities are either prohibited by GLSL (vertex inputs and + * fragment outputs can't be structs) or should have been eliminated by + * lowering passes (do_vec_index_to_swizzle() gets rid of indexing into + * vectors, and lower_packed_varyings() gets rid of structs that occur in + * varyings). + */ + if (!(type->is_matrix() || + (type->is_array() && + (type->fields.array->is_numeric() || + type->fields.array->is_boolean())))) { + assert(!"Unexpected indexing in ir_set_program_inouts"); + + /* For safety in release builds, in case we ever encounter unexpected + * indexing, give up and let the caller mark the whole variable as used. + */ + return false; + } + + ir_constant *index_as_constant = index->as_constant(); + if (!index_as_constant) + return false; + + unsigned elem_width; + unsigned num_elems; + if (type->is_array()) { + num_elems = type->length; + if (type->fields.array->is_matrix()) + elem_width = type->fields.array->matrix_columns; + else + elem_width = 1; + } else { + num_elems = type->matrix_columns; + elem_width = 1; + } + + if (index_as_constant->value.u[0] >= num_elems) { + /* Constant index outside the bounds of the matrix/array. This could + * arise as a result of constant folding of a legal GLSL program. + * + * Even though the spec says that indexing outside the bounds of a + * matrix/array results in undefined behaviour, we don't want to pass + * out-of-range values to mark() (since this could result in slots that + * don't exist being marked as used), so just let the caller mark the + * whole variable as used. + */ + return false; + } + + /* double element width for double types that takes two slots */ + if (this->shader_stage != MESA_SHADER_VERTEX || + var->data.mode != ir_var_shader_in) { + if (type->without_array()->is_dual_slot_double()) + elem_width *= 2; + } + + mark(this->prog, var, index_as_constant->value.u[0] * elem_width, + elem_width, this->shader_stage); + return true; +} + +static bool +is_multiple_vertices(gl_shader_stage stage, ir_variable *var) +{ + if (var->data.patch) + return false; + + if (var->data.mode == ir_var_shader_in) + return stage == MESA_SHADER_GEOMETRY || + stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL; + if (var->data.mode == ir_var_shader_out) + return stage == MESA_SHADER_TESS_CTRL; + + return false; +} + +ir_visitor_status +ir_set_program_inouts_visitor::visit_enter(ir_dereference_array *ir) +{ + /* Note: for geometry shader inputs, lower_named_interface_blocks may + * create 2D arrays, so we need to be able to handle those. 2D arrays + * shouldn't be able to crop up for any other reason. + */ + if (ir_dereference_array * const inner_array = + ir->array->as_dereference_array()) { + /* ir => foo[i][j] + * inner_array => foo[i] + */ + if (ir_dereference_variable * const deref_var = + inner_array->array->as_dereference_variable()) { + if (is_multiple_vertices(this->shader_stage, deref_var->var)) { + /* foo is a geometry or tessellation shader input, so i is + * the vertex, and j the part of the input we're accessing. + */ + if (try_mark_partial_variable(deref_var->var, ir->array_index)) + { + /* We've now taken care of foo and j, but i might contain a + * subexpression that accesses shader inputs. So manually + * visit i and then continue with the parent. + */ + inner_array->array_index->accept(this); + return visit_continue_with_parent; + } + } + } + } else if (ir_dereference_variable * const deref_var = + ir->array->as_dereference_variable()) { + /* ir => foo[i], where foo is a variable. */ + if (is_multiple_vertices(this->shader_stage, deref_var->var)) { + /* foo is a geometry or tessellation shader input, so i is + * the vertex, and we're accessing the entire input. + */ + mark_whole_variable(deref_var->var); + /* We've now taken care of foo, but i might contain a subexpression + * that accesses shader inputs. So manually visit i and then + * continue with the parent. + */ + ir->array_index->accept(this); + return visit_continue_with_parent; + } else if (is_shader_inout(deref_var->var)) { + /* foo is a shader input/output, but not a geometry shader input, + * so i is the part of the input we're accessing. + */ + if (try_mark_partial_variable(deref_var->var, ir->array_index)) + return visit_continue_with_parent; + } + } + + /* The expression is something we don't recognize. Just visit its + * subexpressions. + */ + return visit_continue; +} + +ir_visitor_status +ir_set_program_inouts_visitor::visit_enter(ir_function_signature *ir) +{ + /* We don't want to descend into the function parameters and + * consider them as shader inputs or outputs. + */ + visit_list_elements(this, &ir->body); + return visit_continue_with_parent; +} + +ir_visitor_status +ir_set_program_inouts_visitor::visit_enter(ir_expression *ir) +{ + if (this->shader_stage == MESA_SHADER_FRAGMENT && + (ir->operation == ir_unop_dFdy || + ir->operation == ir_unop_dFdy_coarse || + ir->operation == ir_unop_dFdy_fine)) { + gl_fragment_program *fprog = (gl_fragment_program *) prog; + fprog->UsesDFdy = true; + } + return visit_continue; +} + +ir_visitor_status +ir_set_program_inouts_visitor::visit_enter(ir_discard *) +{ + /* discards are only allowed in fragment shaders. */ + assert(this->shader_stage == MESA_SHADER_FRAGMENT); + + gl_fragment_program *fprog = (gl_fragment_program *) prog; + fprog->UsesKill = true; + + return visit_continue; +} + +ir_visitor_status +ir_set_program_inouts_visitor::visit_enter(ir_texture *ir) +{ + if (ir->op == ir_tg4) + prog->UsesGather = true; + return visit_continue; +} + +void +do_set_program_inouts(exec_list *instructions, struct gl_program *prog, + gl_shader_stage shader_stage) +{ + ir_set_program_inouts_visitor v(prog, shader_stage); + + prog->InputsRead = 0; + prog->OutputsWritten = 0; + prog->PatchInputsRead = 0; + prog->PatchOutputsWritten = 0; + prog->SystemValuesRead = 0; + if (shader_stage == MESA_SHADER_FRAGMENT) { + gl_fragment_program *fprog = (gl_fragment_program *) prog; + memset(fprog->InterpQualifier, 0, sizeof(fprog->InterpQualifier)); + fprog->IsCentroid = 0; + fprog->IsSample = 0; + fprog->UsesDFdy = false; + fprog->UsesKill = false; + } + visit_list_elements(&v, instructions); +} diff --git a/src/compiler/glsl/ir_uniform.h b/src/compiler/glsl/ir_uniform.h new file mode 100644 index 0000000..1854279 --- /dev/null +++ b/src/compiler/glsl/ir_uniform.h @@ -0,0 +1,216 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_UNIFORM_H +#define IR_UNIFORM_H + + +/* stdbool.h is necessary because this file is included in both C and C++ code. + */ +#include <stdbool.h> + +#include "program/prog_parameter.h" /* For union gl_constant_value. */ + +/** + * Used by GL_ARB_explicit_uniform_location extension code in the linker + * and glUniform* functions to identify inactive explicit uniform locations. + */ +#define INACTIVE_UNIFORM_EXPLICIT_LOCATION ((gl_uniform_storage *) -1) + +#ifdef __cplusplus +extern "C" { +#endif + +enum PACKED gl_uniform_driver_format { + uniform_native = 0, /**< Store data in the native format. */ + uniform_int_float, /**< Store integer data as floats. */ +}; + +struct gl_uniform_driver_storage { + /** + * Number of bytes from one array element to the next. + */ + uint8_t element_stride; + + /** + * Number of bytes from one vector in a matrix to the next. + */ + uint8_t vector_stride; + + /** + * Base format of the stored data. + */ + enum gl_uniform_driver_format format; + + /** + * Pointer to the base of the data. + */ + void *data; +}; + +struct gl_opaque_uniform_index { + /** + * Base opaque uniform index + * + * If \c gl_uniform_storage::base_type is an opaque type, this + * represents its uniform index. If \c + * gl_uniform_storage::array_elements is not zero, the array will + * use opaque uniform indices \c index through \c index + \c + * gl_uniform_storage::array_elements - 1, inclusive. + * + * Note that the index may be different in each shader stage. + */ + uint8_t index; + + /** + * Whether this opaque uniform is used in this shader stage. + */ + bool active; +}; + +struct gl_uniform_storage { + char *name; + /** Type of this uniform data stored. + * + * In the case of an array, it's the type of a single array element. + */ + const struct glsl_type *type; + + /** + * The number of elements in this uniform. + * + * For non-arrays, this is always 0. For arrays, the value is the size of + * the array. + */ + unsigned array_elements; + + /** + * Has this uniform ever been set? + */ + bool initialized; + + struct gl_opaque_uniform_index opaque[MESA_SHADER_STAGES]; + + /** + * Storage used by the driver for the uniform + */ + unsigned num_driver_storage; + struct gl_uniform_driver_storage *driver_storage; + + /** + * Storage used by Mesa for the uniform + * + * This form of the uniform is used by Mesa's implementation of \c + * glGetUniform. It can also be used by drivers to obtain the value of the + * uniform if the \c ::driver_storage interface is not used. + */ + union gl_constant_value *storage; + + /** Fields for GL_ARB_uniform_buffer_object + * @{ + */ + + /** + * GL_UNIFORM_BLOCK_INDEX: index of the uniform block containing + * the uniform, or -1 for the default uniform block. Note that the + * index is into the linked program's UniformBlocks[] array, not + * the linked shader's. + */ + int block_index; + + /** GL_UNIFORM_OFFSET: byte offset within the uniform block, or -1. */ + int offset; + + /** + * GL_UNIFORM_MATRIX_STRIDE: byte stride between columns or rows of + * a matrix. Set to 0 for non-matrices in UBOs, or -1 for uniforms + * in the default uniform block. + */ + int matrix_stride; + + /** + * GL_UNIFORM_ARRAY_STRIDE: byte stride between elements of the + * array. Set to zero for non-arrays in UBOs, or -1 for uniforms + * in the default uniform block. + */ + int array_stride; + + /** GL_UNIFORM_ROW_MAJOR: true iff it's a row-major matrix in a UBO */ + bool row_major; + + /** @} */ + + /** + * This is a compiler-generated uniform that should not be advertised + * via the API. + */ + bool hidden; + + /** + * This is a built-in uniform that should not be modified through any gl API. + */ + bool builtin; + + /** + * This is a shader storage buffer variable, not an uniform. + */ + bool is_shader_storage; + + /** + * Index within gl_shader_program::AtomicBuffers[] of the atomic + * counter buffer this uniform is stored in, or -1 if this is not + * an atomic counter. + */ + int atomic_buffer_index; + + /** + * The 'base location' for this uniform in the uniform remap table. For + * arrays this is the first element in the array. + * for subroutines this is in shader subroutine uniform remap table. + */ + unsigned remap_location; + + /** + * The number of compatible subroutines with this subroutine uniform. + */ + unsigned num_compatible_subroutines; + + /** + * A single integer identifying the number of active array elements of + * the top-level shader storage block member (GL_TOP_LEVEL_ARRAY_SIZE). + */ + unsigned top_level_array_size; + + /** + * A single integer identifying the stride between array elements of the + * top-level shader storage block member. (GL_TOP_LEVEL_ARRAY_STRIDE). + */ + unsigned top_level_array_stride; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* IR_UNIFORM_H */ diff --git a/src/compiler/glsl/ir_validate.cpp b/src/compiler/glsl/ir_validate.cpp new file mode 100644 index 0000000..cad7069 --- /dev/null +++ b/src/compiler/glsl/ir_validate.cpp @@ -0,0 +1,930 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_validate.cpp + * + * Attempts to verify that various invariants of the IR tree are true. + * + * In particular, at the moment it makes sure that no single + * ir_instruction node except for ir_variable appears multiple times + * in the ir tree. ir_variable does appear multiple times: Once as a + * declaration in an exec_list, and multiple times as the endpoint of + * a dereference chain. + */ + +#include "ir.h" +#include "ir_hierarchical_visitor.h" +#include "util/hash_table.h" +#include "util/set.h" +#include "compiler/glsl_types.h" + +namespace { + +class ir_validate : public ir_hierarchical_visitor { +public: + ir_validate() + { + this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + this->current_function = NULL; + + this->callback_enter = ir_validate::validate_ir; + this->data_enter = ir_set; + } + + ~ir_validate() + { + _mesa_set_destroy(this->ir_set, NULL); + } + + virtual ir_visitor_status visit(ir_variable *v); + virtual ir_visitor_status visit(ir_dereference_variable *ir); + + virtual ir_visitor_status visit_enter(ir_discard *ir); + virtual ir_visitor_status visit_enter(ir_if *ir); + + virtual ir_visitor_status visit_enter(ir_function *ir); + virtual ir_visitor_status visit_leave(ir_function *ir); + virtual ir_visitor_status visit_enter(ir_function_signature *ir); + + virtual ir_visitor_status visit_leave(ir_expression *ir); + virtual ir_visitor_status visit_leave(ir_swizzle *ir); + + virtual ir_visitor_status visit_enter(class ir_dereference_array *); + + virtual ir_visitor_status visit_enter(ir_assignment *ir); + virtual ir_visitor_status visit_enter(ir_call *ir); + + static void validate_ir(ir_instruction *ir, void *data); + + ir_function *current_function; + + struct set *ir_set; +}; + +} /* anonymous namespace */ + +ir_visitor_status +ir_validate::visit(ir_dereference_variable *ir) +{ + if ((ir->var == NULL) || (ir->var->as_variable() == NULL)) { + printf("ir_dereference_variable @ %p does not specify a variable %p\n", + (void *) ir, (void *) ir->var); + abort(); + } + + if (_mesa_set_search(ir_set, ir->var) == NULL) { + printf("ir_dereference_variable @ %p specifies undeclared variable " + "`%s' @ %p\n", + (void *) ir, ir->var->name, (void *) ir->var); + abort(); + } + + this->validate_ir(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(class ir_dereference_array *ir) +{ + if (!ir->array->type->is_array() && !ir->array->type->is_matrix() && + !ir->array->type->is_vector()) { + printf("ir_dereference_array @ %p does not specify an array, a vector " + "or a matrix\n", + (void *) ir); + ir->print(); + printf("\n"); + abort(); + } + + if (!ir->array_index->type->is_scalar()) { + printf("ir_dereference_array @ %p does not have scalar index: %s\n", + (void *) ir, ir->array_index->type->name); + abort(); + } + + if (!ir->array_index->type->is_integer()) { + printf("ir_dereference_array @ %p does not have integer index: %s\n", + (void *) ir, ir->array_index->type->name); + abort(); + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(ir_discard *ir) +{ + if (ir->condition && ir->condition->type != glsl_type::bool_type) { + printf("ir_discard condition %s type instead of bool.\n", + ir->condition->type->name); + ir->print(); + printf("\n"); + abort(); + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(ir_if *ir) +{ + if (ir->condition->type != glsl_type::bool_type) { + printf("ir_if condition %s type instead of bool.\n", + ir->condition->type->name); + ir->print(); + printf("\n"); + abort(); + } + + return visit_continue; +} + + +ir_visitor_status +ir_validate::visit_enter(ir_function *ir) +{ + /* Function definitions cannot be nested. + */ + if (this->current_function != NULL) { + printf("Function definition nested inside another function " + "definition:\n"); + printf("%s %p inside %s %p\n", + ir->name, (void *) ir, + this->current_function->name, (void *) this->current_function); + abort(); + } + + /* Store the current function hierarchy being traversed. This is used + * by the function signature visitor to ensure that the signatures are + * linked with the correct functions. + */ + this->current_function = ir; + + this->validate_ir(ir, this->data_enter); + + /* Verify that all of the things stored in the list of signatures are, + * in fact, function signatures. + */ + foreach_in_list(ir_instruction, sig, &ir->signatures) { + if (sig->ir_type != ir_type_function_signature) { + printf("Non-signature in signature list of function `%s'\n", + ir->name); + abort(); + } + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_leave(ir_function *ir) +{ + assert(ralloc_parent(ir->name) == ir); + + this->current_function = NULL; + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(ir_function_signature *ir) +{ + if (this->current_function != ir->function()) { + printf("Function signature nested inside wrong function " + "definition:\n"); + printf("%p inside %s %p instead of %s %p\n", + (void *) ir, + this->current_function->name, (void *) this->current_function, + ir->function_name(), (void *) ir->function()); + abort(); + } + + if (ir->return_type == NULL) { + printf("Function signature %p for function %s has NULL return type.\n", + (void *) ir, ir->function_name()); + abort(); + } + + this->validate_ir(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_leave(ir_expression *ir) +{ + switch (ir->operation) { + case ir_unop_bit_not: + assert(ir->operands[0]->type == ir->type); + break; + case ir_unop_logic_not: + assert(ir->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + break; + + case ir_unop_neg: + case ir_unop_abs: + case ir_unop_sign: + case ir_unop_rcp: + case ir_unop_rsq: + case ir_unop_sqrt: + assert(ir->type == ir->operands[0]->type); + break; + + case ir_unop_exp: + case ir_unop_log: + case ir_unop_exp2: + case ir_unop_log2: + case ir_unop_saturate: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type == ir->operands[0]->type); + break; + + case ir_unop_f2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_f2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_i2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_f2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + break; + case ir_unop_b2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_i2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + break; + case ir_unop_b2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_u2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_i2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_u2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_bitcast_i2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_bitcast_f2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_bitcast_u2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_bitcast_f2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + + case ir_unop_trunc: + case ir_unop_round_even: + case ir_unop_ceil: + case ir_unop_floor: + case ir_unop_fract: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->operands[0]->type == ir->type); + break; + case ir_unop_sin: + case ir_unop_cos: + case ir_unop_dFdx: + case ir_unop_dFdx_coarse: + case ir_unop_dFdx_fine: + case ir_unop_dFdy: + case ir_unop_dFdy_coarse: + case ir_unop_dFdy_fine: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->operands[0]->type == ir->type); + break; + + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_half_2x16: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::vec2_type); + break; + + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_4x8: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::vec4_type); + break; + + case ir_unop_pack_double_2x32: + assert(ir->type == glsl_type::double_type); + assert(ir->operands[0]->type == glsl_type::uvec2_type); + break; + + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + assert(ir->type == glsl_type::vec2_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + assert(ir->type == glsl_type::vec4_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + assert(ir->type == glsl_type::float_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_unpack_double_2x32: + assert(ir->type == glsl_type::uvec2_type); + assert(ir->operands[0]->type == glsl_type::double_type); + break; + + case ir_unop_bitfield_reverse: + assert(ir->operands[0]->type == ir->type); + assert(ir->type->is_integer()); + break; + + case ir_unop_bit_count: + case ir_unop_find_msb: + case ir_unop_find_lsb: + assert(ir->operands[0]->type->vector_elements == ir->type->vector_elements); + assert(ir->operands[0]->type->is_integer()); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + + case ir_unop_noise: + /* XXX what can we assert here? */ + break; + + case ir_unop_interpolate_at_centroid: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float()); + break; + + case ir_unop_get_buffer_size: + assert(ir->type == glsl_type::int_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_ssbo_unsized_array_length: + assert(ir->type == glsl_type::int_type); + assert(ir->operands[0]->type->is_array()); + assert(ir->operands[0]->type->is_unsized_array()); + break; + + case ir_unop_d2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_f2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_i2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_u2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + break; + + case ir_unop_frexp_sig: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_frexp_exp: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_subroutine_to_int: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_SUBROUTINE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_binop_add: + case ir_binop_sub: + case ir_binop_mul: + case ir_binop_div: + case ir_binop_mod: + case ir_binop_min: + case ir_binop_max: + case ir_binop_pow: + assert(ir->operands[0]->type->base_type == + ir->operands[1]->type->base_type); + + if (ir->operands[0]->type->is_scalar()) + assert(ir->operands[1]->type == ir->type); + else if (ir->operands[1]->type->is_scalar()) + assert(ir->operands[0]->type == ir->type); + else if (ir->operands[0]->type->is_vector() && + ir->operands[1]->type->is_vector()) { + assert(ir->operands[0]->type == ir->operands[1]->type); + assert(ir->operands[0]->type == ir->type); + } + break; + + case ir_binop_imul_high: + assert(ir->type == ir->operands[0]->type); + assert(ir->type == ir->operands[1]->type); + assert(ir->type->is_integer()); + break; + + case ir_binop_carry: + case ir_binop_borrow: + assert(ir->type == ir->operands[0]->type); + assert(ir->type == ir->operands[1]->type); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: + /* The semantics of the IR operators differ from the GLSL <, >, <=, >=, + * ==, and != operators. The IR operators perform a component-wise + * comparison on scalar or vector types and return a boolean scalar or + * vector type of the same size. + */ + assert(ir->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[0]->type == ir->operands[1]->type); + assert(ir->operands[0]->type->is_vector() + || ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->vector_elements + == ir->type->vector_elements); + break; + + case ir_binop_all_equal: + case ir_binop_any_nequal: + /* GLSL == and != operate on scalars, vectors, matrices and arrays, and + * return a scalar boolean. The IR matches that. + */ + assert(ir->type == glsl_type::bool_type); + assert(ir->operands[0]->type == ir->operands[1]->type); + break; + + case ir_binop_lshift: + case ir_binop_rshift: + assert(ir->operands[0]->type->is_integer() && + ir->operands[1]->type->is_integer()); + if (ir->operands[0]->type->is_scalar()) { + assert(ir->operands[1]->type->is_scalar()); + } + if (ir->operands[0]->type->is_vector() && + ir->operands[1]->type->is_vector()) { + assert(ir->operands[0]->type->components() == + ir->operands[1]->type->components()); + } + assert(ir->type == ir->operands[0]->type); + break; + + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + assert(ir->operands[0]->type->base_type == + ir->operands[1]->type->base_type); + assert(ir->type->is_integer()); + if (ir->operands[0]->type->is_vector() && + ir->operands[1]->type->is_vector()) { + assert(ir->operands[0]->type->vector_elements == + ir->operands[1]->type->vector_elements); + } + break; + + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + assert(ir->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[1]->type->base_type == GLSL_TYPE_BOOL); + break; + + case ir_binop_dot: + assert(ir->type == glsl_type::float_type || + ir->type == glsl_type::double_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + break; + + case ir_binop_pack_half_2x16_split: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::float_type); + assert(ir->operands[1]->type == glsl_type::float_type); + break; + + case ir_binop_ubo_load: + assert(ir->operands[0]->type == glsl_type::uint_type); + + assert(ir->operands[1]->type == glsl_type::uint_type); + break; + + case ir_binop_ldexp: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float() || + ir->operands[0]->type->is_double()); + assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT); + assert(ir->operands[0]->type->components() == + ir->operands[1]->type->components()); + break; + + case ir_binop_vector_extract: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[1]->type->is_scalar() + && ir->operands[1]->type->is_integer()); + break; + + case ir_binop_interpolate_at_offset: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float()); + assert(ir->operands[1]->type->components() == 2); + assert(ir->operands[1]->type->is_float()); + break; + + case ir_binop_interpolate_at_sample: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float()); + assert(ir->operands[1]->type == glsl_type::int_type); + break; + + case ir_triop_fma: + assert(ir->type->base_type == GLSL_TYPE_FLOAT || + ir->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type == ir->operands[0]->type); + assert(ir->type == ir->operands[1]->type); + assert(ir->type == ir->operands[2]->type); + break; + + case ir_triop_lrp: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->operands[0]->type == ir->operands[1]->type); + assert(ir->operands[2]->type == ir->operands[0]->type || + ir->operands[2]->type == glsl_type::float_type || + ir->operands[2]->type == glsl_type::double_type); + break; + + case ir_triop_csel: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->type->vector_elements == ir->operands[0]->type->vector_elements); + assert(ir->type == ir->operands[1]->type); + assert(ir->type == ir->operands[2]->type); + break; + + case ir_triop_bitfield_extract: + assert(ir->type->is_integer()); + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[1]->type == ir->type); + assert(ir->operands[2]->type == ir->type); + break; + + case ir_triop_vector_insert: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->operands[1]->type->base_type); + assert(ir->operands[2]->type->is_scalar() + && ir->operands[2]->type->is_integer()); + assert(ir->type == ir->operands[0]->type); + break; + + case ir_quadop_bitfield_insert: + assert(ir->type->is_integer()); + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[1]->type == ir->type); + assert(ir->operands[2]->type == ir->type); + assert(ir->operands[3]->type == ir->type); + break; + + case ir_quadop_vector: + /* The vector operator collects some number of scalars and generates a + * vector from them. + * + * - All of the operands must be scalar. + * - Number of operands must matche the size of the resulting vector. + * - Base type of the operands must match the base type of the result. + */ + assert(ir->type->is_vector()); + switch (ir->type->vector_elements) { + case 2: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2] == NULL); + assert(ir->operands[3] == NULL); + break; + case 3: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2]->type->is_scalar()); + assert(ir->operands[2]->type->base_type == ir->type->base_type); + assert(ir->operands[3] == NULL); + break; + case 4: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2]->type->is_scalar()); + assert(ir->operands[2]->type->base_type == ir->type->base_type); + assert(ir->operands[3]->type->is_scalar()); + assert(ir->operands[3]->type->base_type == ir->type->base_type); + break; + default: + /* The is_vector assertion above should prevent execution from ever + * getting here. + */ + assert(!"Should not get here."); + break; + } + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_leave(ir_swizzle *ir) +{ + unsigned int chans[4] = {ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w}; + + for (unsigned int i = 0; i < ir->type->vector_elements; i++) { + if (chans[i] >= ir->val->type->vector_elements) { + printf("ir_swizzle @ %p specifies a channel not present " + "in the value.\n", (void *) ir); + ir->print(); + abort(); + } + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit(ir_variable *ir) +{ + /* An ir_variable is the one thing that can (and will) appear multiple times + * in an IR tree. It is added to the hashtable so that it can be used + * in the ir_dereference_variable handler to ensure that a variable is + * declared before it is dereferenced. + */ + if (ir->name && ir->is_name_ralloced()) + assert(ralloc_parent(ir->name) == ir); + + _mesa_set_add(ir_set, ir); + + /* If a variable is an array, verify that the maximum array index is in + * bounds. There was once an error in AST-to-HIR conversion that set this + * to be out of bounds. + */ + if (ir->type->array_size() > 0) { + if (ir->data.max_array_access >= ir->type->length) { + printf("ir_variable has maximum access out of bounds (%d vs %d)\n", + ir->data.max_array_access, ir->type->length - 1); + ir->print(); + abort(); + } + } + + /* If a variable is an interface block (or an array of interface blocks), + * verify that the maximum array index for each interface member is in + * bounds. + */ + if (ir->is_interface_instance()) { + const glsl_struct_field *fields = + ir->get_interface_type()->fields.structure; + for (unsigned i = 0; i < ir->get_interface_type()->length; i++) { + if (fields[i].type->array_size() > 0) { + const unsigned *const max_ifc_array_access = + ir->get_max_ifc_array_access(); + + assert(max_ifc_array_access != NULL); + + if (max_ifc_array_access[i] >= fields[i].type->length) { + printf("ir_variable has maximum access out of bounds for " + "field %s (%d vs %d)\n", fields[i].name, + max_ifc_array_access[i], fields[i].type->length); + ir->print(); + abort(); + } + } + } + } + + if (ir->constant_initializer != NULL && !ir->data.has_initializer) { + printf("ir_variable didn't have an initializer, but has a constant " + "initializer value.\n"); + ir->print(); + abort(); + } + + if (ir->data.mode == ir_var_uniform + && is_gl_identifier(ir->name) + && ir->get_state_slots() == NULL) { + printf("built-in uniform has no state\n"); + ir->print(); + abort(); + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(ir_assignment *ir) +{ + const ir_dereference *const lhs = ir->lhs; + if (lhs->type->is_scalar() || lhs->type->is_vector()) { + if (ir->write_mask == 0) { + printf("Assignment LHS is %s, but write mask is 0:\n", + lhs->type->is_scalar() ? "scalar" : "vector"); + ir->print(); + abort(); + } + + int lhs_components = 0; + for (int i = 0; i < 4; i++) { + if (ir->write_mask & (1 << i)) + lhs_components++; + } + + if (lhs_components != ir->rhs->type->vector_elements) { + printf("Assignment count of LHS write mask channels enabled not\n" + "matching RHS vector size (%d LHS, %d RHS).\n", + lhs_components, ir->rhs->type->vector_elements); + ir->print(); + abort(); + } + } + + this->validate_ir(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(ir_call *ir) +{ + ir_function_signature *const callee = ir->callee; + + if (callee->ir_type != ir_type_function_signature) { + printf("IR called by ir_call is not ir_function_signature!\n"); + abort(); + } + + if (ir->return_deref) { + if (ir->return_deref->type != callee->return_type) { + printf("callee type %s does not match return storage type %s\n", + callee->return_type->name, ir->return_deref->type->name); + abort(); + } + } else if (callee->return_type != glsl_type::void_type) { + printf("ir_call has non-void callee but no return storage\n"); + abort(); + } + + const exec_node *formal_param_node = callee->parameters.head; + const exec_node *actual_param_node = ir->actual_parameters.head; + while (true) { + if (formal_param_node->is_tail_sentinel() + != actual_param_node->is_tail_sentinel()) { + printf("ir_call has the wrong number of parameters:\n"); + goto dump_ir; + } + if (formal_param_node->is_tail_sentinel()) { + break; + } + const ir_variable *formal_param + = (const ir_variable *) formal_param_node; + const ir_rvalue *actual_param + = (const ir_rvalue *) actual_param_node; + if (formal_param->type != actual_param->type) { + printf("ir_call parameter type mismatch:\n"); + goto dump_ir; + } + if (formal_param->data.mode == ir_var_function_out + || formal_param->data.mode == ir_var_function_inout) { + if (!actual_param->is_lvalue()) { + printf("ir_call out/inout parameters must be lvalues:\n"); + goto dump_ir; + } + } + formal_param_node = formal_param_node->next; + actual_param_node = actual_param_node->next; + } + + return visit_continue; + +dump_ir: + ir->print(); + printf("callee:\n"); + callee->print(); + abort(); + return visit_stop; +} + +void +ir_validate::validate_ir(ir_instruction *ir, void *data) +{ + struct set *ir_set = (struct set *) data; + + if (_mesa_set_search(ir_set, ir)) { + printf("Instruction node present twice in ir tree:\n"); + ir->print(); + printf("\n"); + abort(); + } + _mesa_set_add(ir_set, ir); +} + +void +check_node_type(ir_instruction *ir, void *data) +{ + (void) data; + + if (ir->ir_type >= ir_type_max) { + printf("Instruction node with unset type\n"); + ir->print(); printf("\n"); + } + ir_rvalue *value = ir->as_rvalue(); + if (value != NULL) + assert(value->type != glsl_type::error_type); +} + +void +validate_ir_tree(exec_list *instructions) +{ + /* We shouldn't have any reason to validate IR in a release build, + * and it's half composed of assert()s anyway which wouldn't do + * anything. + */ +#ifdef DEBUG + ir_validate v; + + v.run(instructions); + + foreach_in_list(ir_instruction, ir, instructions) { + visit_tree(ir, check_node_type, NULL); + } +#endif +} diff --git a/src/compiler/glsl/ir_variable_refcount.cpp b/src/compiler/glsl/ir_variable_refcount.cpp new file mode 100644 index 0000000..8306be1 --- /dev/null +++ b/src/compiler/glsl/ir_variable_refcount.cpp @@ -0,0 +1,153 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_variable_refcount.cpp + * + * Provides a visitor which produces a list of variables referenced, + * how many times they were referenced and assigned, and whether they + * were defined in the scope. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_variable_refcount.h" +#include "compiler/glsl_types.h" +#include "util/hash_table.h" + +ir_variable_refcount_visitor::ir_variable_refcount_visitor() +{ + this->mem_ctx = ralloc_context(NULL); + this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + +static void +free_entry(struct hash_entry *entry) +{ + ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) entry->data; + + /* Free assignment list */ + exec_node *n; + while ((n = ivre->assign_list.pop_head()) != NULL) { + struct assignment_entry *assignment_entry = + exec_node_data(struct assignment_entry, n, link); + free(assignment_entry); + } + + delete ivre; +} + +ir_variable_refcount_visitor::~ir_variable_refcount_visitor() +{ + ralloc_free(this->mem_ctx); + _mesa_hash_table_destroy(this->ht, free_entry); +} + +// constructor +ir_variable_refcount_entry::ir_variable_refcount_entry(ir_variable *var) +{ + this->var = var; + assigned_count = 0; + declaration = false; + referenced_count = 0; +} + + +ir_variable_refcount_entry * +ir_variable_refcount_visitor::get_variable_entry(ir_variable *var) +{ + assert(var); + + struct hash_entry *e = _mesa_hash_table_search(this->ht, var); + if (e) + return (ir_variable_refcount_entry *)e->data; + + ir_variable_refcount_entry *entry = new ir_variable_refcount_entry(var); + assert(entry->referenced_count == 0); + _mesa_hash_table_insert(this->ht, var, entry); + + return entry; +} + + +ir_visitor_status +ir_variable_refcount_visitor::visit(ir_variable *ir) +{ + ir_variable_refcount_entry *entry = this->get_variable_entry(ir); + if (entry) + entry->declaration = true; + + return visit_continue; +} + + +ir_visitor_status +ir_variable_refcount_visitor::visit(ir_dereference_variable *ir) +{ + ir_variable *const var = ir->variable_referenced(); + ir_variable_refcount_entry *entry = this->get_variable_entry(var); + + if (entry) + entry->referenced_count++; + + return visit_continue; +} + + +ir_visitor_status +ir_variable_refcount_visitor::visit_enter(ir_function_signature *ir) +{ + /* We don't want to descend into the function parameters and + * dead-code eliminate them, so just accept the body here. + */ + visit_list_elements(this, &ir->body); + return visit_continue_with_parent; +} + + +ir_visitor_status +ir_variable_refcount_visitor::visit_leave(ir_assignment *ir) +{ + ir_variable_refcount_entry *entry; + entry = this->get_variable_entry(ir->lhs->variable_referenced()); + if (entry) { + entry->assigned_count++; + + /* Build a list for dead code optimisation. Don't add assignment if it + * was declared out of scope (outside the instruction stream). Also don't + * bother adding any more to the list if there are more references than + * assignments as this means the variable is used and won't be optimised + * out. + */ + assert(entry->referenced_count >= entry->assigned_count); + if (entry->referenced_count == entry->assigned_count) { + struct assignment_entry *assignment_entry = + (struct assignment_entry *)calloc(1, sizeof(*assignment_entry)); + assignment_entry->assign = ir; + entry->assign_list.push_head(&assignment_entry->link); + } + } + + return visit_continue; +} diff --git a/src/compiler/glsl/ir_variable_refcount.h b/src/compiler/glsl/ir_variable_refcount.h new file mode 100644 index 0000000..08a11c0 --- /dev/null +++ b/src/compiler/glsl/ir_variable_refcount.h @@ -0,0 +1,80 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_variable_refcount.h + * + * Provides a visitor which produces a list of variables referenced, + * how many times they were referenced and assigned, and whether they + * were defined in the scope. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "compiler/glsl_types.h" + +struct assignment_entry { + exec_node link; + ir_assignment *assign; +}; + +class ir_variable_refcount_entry +{ +public: + ir_variable_refcount_entry(ir_variable *var); + + ir_variable *var; /* The key: the variable's pointer. */ + + /** + * List of assignments to the variable, if any. + * This is intended to be used for dead code optimisation and may + * not be a complete list. + */ + exec_list assign_list; + + /** Number of times the variable is referenced, including assignments. */ + unsigned referenced_count; + + /** Number of times the variable is assigned. */ + unsigned assigned_count; + + bool declaration; /* If the variable had a decl in the instruction stream */ +}; + +class ir_variable_refcount_visitor : public ir_hierarchical_visitor { +public: + ir_variable_refcount_visitor(void); + ~ir_variable_refcount_visitor(void); + + virtual ir_visitor_status visit(ir_variable *); + virtual ir_visitor_status visit(ir_dereference_variable *); + + virtual ir_visitor_status visit_enter(ir_function_signature *); + virtual ir_visitor_status visit_leave(ir_assignment *); + + ir_variable_refcount_entry *get_variable_entry(ir_variable *var); + + struct hash_table *ht; + + void *mem_ctx; +}; diff --git a/src/compiler/glsl/ir_visitor.h b/src/compiler/glsl/ir_visitor.h new file mode 100644 index 0000000..7c38481 --- /dev/null +++ b/src/compiler/glsl/ir_visitor.h @@ -0,0 +1,93 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_VISITOR_H +#define IR_VISITOR_H + +#ifdef __cplusplus +/** + * Abstract base class of visitors of IR instruction trees + */ +class ir_visitor { +public: + virtual ~ir_visitor() + { + /* empty */ + } + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(class ir_rvalue *) { assert(!"unhandled error_type"); } + virtual void visit(class ir_variable *) = 0; + virtual void visit(class ir_function_signature *) = 0; + virtual void visit(class ir_function *) = 0; + virtual void visit(class ir_expression *) = 0; + virtual void visit(class ir_texture *) = 0; + virtual void visit(class ir_swizzle *) = 0; + virtual void visit(class ir_dereference_variable *) = 0; + virtual void visit(class ir_dereference_array *) = 0; + virtual void visit(class ir_dereference_record *) = 0; + virtual void visit(class ir_assignment *) = 0; + virtual void visit(class ir_constant *) = 0; + virtual void visit(class ir_call *) = 0; + virtual void visit(class ir_return *) = 0; + virtual void visit(class ir_discard *) = 0; + virtual void visit(class ir_if *) = 0; + virtual void visit(class ir_loop *) = 0; + virtual void visit(class ir_loop_jump *) = 0; + virtual void visit(class ir_emit_vertex *) = 0; + virtual void visit(class ir_end_primitive *) = 0; + virtual void visit(class ir_barrier *) = 0; + /*@}*/ +}; + +/* NOTE: function calls may never return due to discards inside them + * This is usually not an issue, but if it is, keep it in mind + */ +class ir_control_flow_visitor : public ir_visitor { +public: + virtual void visit(class ir_variable *) {} + virtual void visit(class ir_expression *) {} + virtual void visit(class ir_texture *) {} + virtual void visit(class ir_swizzle *) {} + virtual void visit(class ir_dereference_variable *) {} + virtual void visit(class ir_dereference_array *) {} + virtual void visit(class ir_dereference_record *) {} + virtual void visit(class ir_assignment *) {} + virtual void visit(class ir_constant *) {} + virtual void visit(class ir_call *) {} + virtual void visit(class ir_emit_vertex *) {} + virtual void visit(class ir_end_primitive *) {} + virtual void visit(class ir_barrier *) {} +}; +#endif /* __cplusplus */ + +#endif /* IR_VISITOR_H */ diff --git a/src/compiler/glsl/link_atomics.cpp b/src/compiler/glsl/link_atomics.cpp new file mode 100644 index 0000000..277d473 --- /dev/null +++ b/src/compiler/glsl/link_atomics.cpp @@ -0,0 +1,346 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "glsl_parser_extras.h" +#include "ir.h" +#include "ir_uniform.h" +#include "linker.h" +#include "program/hash_table.h" +#include "main/macros.h" + +namespace { + /* + * Atomic counter as seen by the program. + */ + struct active_atomic_counter { + unsigned uniform_loc; + ir_variable *var; + }; + + /* + * Atomic counter buffer referenced by the program. There is a one + * to one correspondence between these and the objects that can be + * queried using glGetActiveAtomicCounterBufferiv(). + */ + struct active_atomic_buffer { + active_atomic_buffer() + : counters(0), num_counters(0), stage_references(), size(0) + {} + + ~active_atomic_buffer() + { + free(counters); + } + + void push_back(unsigned uniform_loc, ir_variable *var) + { + active_atomic_counter *new_counters; + + new_counters = (active_atomic_counter *) + realloc(counters, sizeof(active_atomic_counter) * + (num_counters + 1)); + + if (new_counters == NULL) { + _mesa_error_no_memory(__func__); + return; + } + + counters = new_counters; + counters[num_counters].uniform_loc = uniform_loc; + counters[num_counters].var = var; + num_counters++; + } + + active_atomic_counter *counters; + unsigned num_counters; + unsigned stage_references[MESA_SHADER_STAGES]; + unsigned size; + }; + + int + cmp_actives(const void *a, const void *b) + { + const active_atomic_counter *const first = (active_atomic_counter *) a; + const active_atomic_counter *const second = (active_atomic_counter *) b; + + return int(first->var->data.offset) - int(second->var->data.offset); + } + + bool + check_atomic_counters_overlap(const ir_variable *x, const ir_variable *y) + { + return ((x->data.offset >= y->data.offset && + x->data.offset < y->data.offset + y->type->atomic_size()) || + (y->data.offset >= x->data.offset && + y->data.offset < x->data.offset + x->type->atomic_size())); + } + + void + process_atomic_variable(const glsl_type *t, struct gl_shader_program *prog, + unsigned *uniform_loc, ir_variable *var, + active_atomic_buffer *const buffers, + unsigned *num_buffers, int *offset, + const unsigned shader_stage) + { + /* FIXME: Arrays of arrays get counted separately. For example: + * x1[3][3][2] = 9 counters + * x2[3][2] = 3 counters + * x3[2] = 1 counter + * + * However this code marks all the counters as active even when they + * might not be used. + */ + if (t->is_array() && t->fields.array->is_array()) { + for (unsigned i = 0; i < t->length; i++) { + process_atomic_variable(t->fields.array, prog, uniform_loc, + var, buffers, num_buffers, offset, + shader_stage); + } + } else { + active_atomic_buffer *buf = &buffers[var->data.binding]; + gl_uniform_storage *const storage = + &prog->UniformStorage[*uniform_loc]; + + /* If this is the first time the buffer is used, increment + * the counter of buffers used. + */ + if (buf->size == 0) + (*num_buffers)++; + + buf->push_back(*uniform_loc, var); + + buf->stage_references[shader_stage]++; + buf->size = MAX2(buf->size, *offset + t->atomic_size()); + + storage->offset = *offset; + *offset += t->atomic_size(); + + (*uniform_loc)++; + } + } + + active_atomic_buffer * + find_active_atomic_counters(struct gl_context *ctx, + struct gl_shader_program *prog, + unsigned *num_buffers) + { + active_atomic_buffer *const buffers = + new active_atomic_buffer[ctx->Const.MaxAtomicBufferBindings]; + + *num_buffers = 0; + + for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + if (sh == NULL) + continue; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + + if (var && var->type->contains_atomic()) { + int offset = var->data.offset; + unsigned uniform_loc = var->data.location; + process_atomic_variable(var->type, prog, &uniform_loc, + var, buffers, num_buffers, &offset, i); + } + } + } + + for (unsigned i = 0; i < ctx->Const.MaxAtomicBufferBindings; i++) { + if (buffers[i].size == 0) + continue; + + qsort(buffers[i].counters, buffers[i].num_counters, + sizeof(active_atomic_counter), + cmp_actives); + + for (unsigned j = 1; j < buffers[i].num_counters; j++) { + /* If an overlapping counter found, it must be a reference to the + * same counter from a different shader stage. + */ + if (check_atomic_counters_overlap(buffers[i].counters[j-1].var, + buffers[i].counters[j].var) + && strcmp(buffers[i].counters[j-1].var->name, + buffers[i].counters[j].var->name) != 0) { + linker_error(prog, "Atomic counter %s declared at offset %d " + "which is already in use.", + buffers[i].counters[j].var->name, + buffers[i].counters[j].var->data.offset); + } + } + } + return buffers; + } +} + +void +link_assign_atomic_counter_resources(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + unsigned num_buffers; + unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {}; + active_atomic_buffer *abs = + find_active_atomic_counters(ctx, prog, &num_buffers); + + prog->AtomicBuffers = rzalloc_array(prog, gl_active_atomic_buffer, + num_buffers); + prog->NumAtomicBuffers = num_buffers; + + unsigned i = 0; + for (unsigned binding = 0; + binding < ctx->Const.MaxAtomicBufferBindings; + binding++) { + + /* If the binding was not used, skip. + */ + if (abs[binding].size == 0) + continue; + + active_atomic_buffer &ab = abs[binding]; + gl_active_atomic_buffer &mab = prog->AtomicBuffers[i]; + + /* Assign buffer-specific fields. */ + mab.Binding = binding; + mab.MinimumSize = ab.size; + mab.Uniforms = rzalloc_array(prog->AtomicBuffers, GLuint, + ab.num_counters); + mab.NumUniforms = ab.num_counters; + + /* Assign counter-specific fields. */ + for (unsigned j = 0; j < ab.num_counters; j++) { + ir_variable *const var = ab.counters[j].var; + gl_uniform_storage *const storage = + &prog->UniformStorage[ab.counters[j].uniform_loc]; + + mab.Uniforms[j] = ab.counters[j].uniform_loc; + if (!var->data.explicit_binding) + var->data.binding = i; + + storage->atomic_buffer_index = i; + storage->offset = var->data.offset; + storage->array_stride = (var->type->is_array() ? + var->type->without_array()->atomic_size() : 0); + if (!var->type->is_matrix()) + storage->matrix_stride = 0; + } + + /* Assign stage-specific fields. */ + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + if (ab.stage_references[j]) { + mab.StageReferences[j] = GL_TRUE; + num_atomic_buffers[j]++; + } else { + mab.StageReferences[j] = GL_FALSE; + } + } + + i++; + } + + /* Store a list pointers to atomic buffers per stage and store the index + * to the intra-stage buffer list in uniform storage. + */ + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) { + prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j]; + prog->_LinkedShaders[j]->AtomicBuffers = + rzalloc_array(prog, gl_active_atomic_buffer *, + num_atomic_buffers[j]); + + unsigned intra_stage_idx = 0; + for (unsigned i = 0; i < num_buffers; i++) { + struct gl_active_atomic_buffer *atomic_buffer = + &prog->AtomicBuffers[i]; + if (atomic_buffer->StageReferences[j]) { + prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] = + atomic_buffer; + + for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) { + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index = + intra_stage_idx; + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active = + true; + } + + intra_stage_idx++; + } + } + } + } + + delete [] abs; + assert(i == num_buffers); +} + +void +link_check_atomic_counter_resources(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + unsigned num_buffers; + active_atomic_buffer *const abs = + find_active_atomic_counters(ctx, prog, &num_buffers); + unsigned atomic_counters[MESA_SHADER_STAGES] = {}; + unsigned atomic_buffers[MESA_SHADER_STAGES] = {}; + unsigned total_atomic_counters = 0; + unsigned total_atomic_buffers = 0; + + /* Sum the required resources. Note that this counts buffers and + * counters referenced by several shader stages multiple times + * against the combined limit -- That's the behavior the spec + * requires. + */ + for (unsigned i = 0; i < ctx->Const.MaxAtomicBufferBindings; i++) { + if (abs[i].size == 0) + continue; + + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + const unsigned n = abs[i].stage_references[j]; + + if (n) { + atomic_counters[j] += n; + total_atomic_counters += n; + atomic_buffers[j]++; + total_atomic_buffers++; + } + } + } + + /* Check that they are within the supported limits. */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (atomic_counters[i] > ctx->Const.Program[i].MaxAtomicCounters) + linker_error(prog, "Too many %s shader atomic counters", + _mesa_shader_stage_to_string(i)); + + if (atomic_buffers[i] > ctx->Const.Program[i].MaxAtomicBuffers) + linker_error(prog, "Too many %s shader atomic counter buffers", + _mesa_shader_stage_to_string(i)); + } + + if (total_atomic_counters > ctx->Const.MaxCombinedAtomicCounters) + linker_error(prog, "Too many combined atomic counters"); + + if (total_atomic_buffers > ctx->Const.MaxCombinedAtomicBuffers) + linker_error(prog, "Too many combined atomic buffers"); + + delete [] abs; +} diff --git a/src/compiler/glsl/link_functions.cpp b/src/compiler/glsl/link_functions.cpp new file mode 100644 index 0000000..537f4dc --- /dev/null +++ b/src/compiler/glsl/link_functions.cpp @@ -0,0 +1,348 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" +#include "glsl_symbol_table.h" +#include "glsl_parser_extras.h" +#include "ir.h" +#include "program.h" +#include "program/hash_table.h" +#include "linker.h" + +static ir_function_signature * +find_matching_signature(const char *name, const exec_list *actual_parameters, + gl_shader **shader_list, unsigned num_shaders, + bool use_builtin); + +namespace { + +class call_link_visitor : public ir_hierarchical_visitor { +public: + call_link_visitor(gl_shader_program *prog, gl_shader *linked, + gl_shader **shader_list, unsigned num_shaders) + { + this->prog = prog; + this->shader_list = shader_list; + this->num_shaders = num_shaders; + this->success = true; + this->linked = linked; + + this->locals = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~call_link_visitor() + { + hash_table_dtor(this->locals); + } + + virtual ir_visitor_status visit(ir_variable *ir) + { + hash_table_insert(locals, ir, ir); + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_call *ir) + { + /* If ir is an ir_call from a function that was imported from another + * shader callee will point to an ir_function_signature in the original + * shader. In this case the function signature MUST NOT BE MODIFIED. + * Doing so will modify the original shader. This may prevent that + * shader from being linkable in other programs. + */ + const ir_function_signature *const callee = ir->callee; + assert(callee != NULL); + const char *const name = callee->function_name(); + + /* Determine if the requested function signature already exists in the + * final linked shader. If it does, use it as the target of the call. + */ + ir_function_signature *sig = + find_matching_signature(name, &callee->parameters, &linked, 1, + ir->use_builtin); + if (sig != NULL) { + ir->callee = sig; + return visit_continue; + } + + /* Try to find the signature in one of the other shaders that is being + * linked. If it's not found there, return an error. + */ + sig = find_matching_signature(name, &ir->actual_parameters, shader_list, + num_shaders, ir->use_builtin); + if (sig == NULL) { + /* FINISHME: Log the full signature of unresolved function. + */ + linker_error(this->prog, "unresolved reference to function `%s'\n", + name); + this->success = false; + return visit_stop; + } + + /* Find the prototype information in the linked shader. Generate any + * details that may be missing. + */ + ir_function *f = linked->symbols->get_function(name); + if (f == NULL) { + f = new(linked) ir_function(name); + + /* Add the new function to the linked IR. Put it at the end + * so that it comes after any global variable declarations + * that it refers to. + */ + linked->symbols->add_function(f); + linked->ir->push_tail(f); + } + + ir_function_signature *linked_sig = + f->exact_matching_signature(NULL, &callee->parameters); + if ((linked_sig == NULL) + || ((linked_sig != NULL) + && (linked_sig->is_builtin() != ir->use_builtin))) { + linked_sig = new(linked) ir_function_signature(callee->return_type); + f->add_signature(linked_sig); + } + + /* At this point linked_sig and called may be the same. If ir is an + * ir_call from linked then linked_sig and callee will be + * ir_function_signatures that have no definitions (is_defined is false). + */ + assert(!linked_sig->is_defined); + assert(linked_sig->body.is_empty()); + + /* Create an in-place clone of the function definition. This multistep + * process introduces some complexity here, but it has some advantages. + * The parameter list and the and function body are cloned separately. + * The clone of the parameter list is used to prime the hashtable used + * to replace variable references in the cloned body. + * + * The big advantage is that the ir_function_signature does not change. + * This means that we don't have to process the rest of the IR tree to + * patch ir_call nodes. In addition, there is no way to remove or + * replace signature stored in a function. One could easily be added, + * but this avoids the need. + */ + struct hash_table *ht = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + exec_list formal_parameters; + foreach_in_list(const ir_instruction, original, &sig->parameters) { + assert(const_cast<ir_instruction *>(original)->as_variable()); + + ir_instruction *copy = original->clone(linked, ht); + formal_parameters.push_tail(copy); + } + + linked_sig->replace_parameters(&formal_parameters); + + linked_sig->is_intrinsic = sig->is_intrinsic; + + if (sig->is_defined) { + foreach_in_list(const ir_instruction, original, &sig->body) { + ir_instruction *copy = original->clone(linked, ht); + linked_sig->body.push_tail(copy); + } + + linked_sig->is_defined = true; + } + + hash_table_dtor(ht); + + /* Patch references inside the function to things outside the function + * (i.e., function calls and global variables). + */ + linked_sig->accept(this); + + ir->callee = linked_sig; + + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_call *ir) + { + /* Traverse list of function parameters, and for array parameters + * propagate max_array_access. Otherwise arrays that are only referenced + * from inside functions via function parameters will be incorrectly + * optimized. This will lead to incorrect code being generated (or worse). + * Do it when leaving the node so the children would propagate their + * array accesses first. + */ + + const exec_node *formal_param_node = ir->callee->parameters.get_head(); + if (formal_param_node) { + const exec_node *actual_param_node = ir->actual_parameters.get_head(); + while (!actual_param_node->is_tail_sentinel()) { + ir_variable *formal_param = (ir_variable *) formal_param_node; + ir_rvalue *actual_param = (ir_rvalue *) actual_param_node; + + formal_param_node = formal_param_node->get_next(); + actual_param_node = actual_param_node->get_next(); + + if (formal_param->type->is_array()) { + ir_dereference_variable *deref = actual_param->as_dereference_variable(); + if (deref && deref->var && deref->var->type->is_array()) { + deref->var->data.max_array_access = + MAX2(formal_param->data.max_array_access, + deref->var->data.max_array_access); + } + } + } + } + return visit_continue; + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (hash_table_find(locals, ir->var) == NULL) { + /* The non-function variable must be a global, so try to find the + * variable in the shader's symbol table. If the variable is not + * found, then it's a global that *MUST* be defined in the original + * shader. + */ + ir_variable *var = linked->symbols->get_variable(ir->var->name); + if (var == NULL) { + /* Clone the ir_variable that the dereference already has and add + * it to the linked shader. + */ + var = ir->var->clone(linked, NULL); + linked->symbols->add_variable(var); + linked->ir->push_head(var); + } else { + if (var->type->is_array()) { + /* It is possible to have a global array declared in multiple + * shaders without a size. The array is implicitly sized by + * the maximal access to it in *any* shader. Because of this, + * we need to track the maximal access to the array as linking + * pulls more functions in that access the array. + */ + var->data.max_array_access = + MAX2(var->data.max_array_access, + ir->var->data.max_array_access); + + if (var->type->length == 0 && ir->var->type->length != 0) + var->type = ir->var->type; + } + if (var->is_interface_instance()) { + /* Similarly, we need implicit sizes of arrays within interface + * blocks to be sized by the maximal access in *any* shader. + */ + unsigned *const linked_max_ifc_array_access = + var->get_max_ifc_array_access(); + unsigned *const ir_max_ifc_array_access = + ir->var->get_max_ifc_array_access(); + + assert(linked_max_ifc_array_access != NULL); + assert(ir_max_ifc_array_access != NULL); + + for (unsigned i = 0; i < var->get_interface_type()->length; + i++) { + linked_max_ifc_array_access[i] = + MAX2(linked_max_ifc_array_access[i], + ir_max_ifc_array_access[i]); + } + } + } + + ir->var = var; + } + + return visit_continue; + } + + /** Was function linking successful? */ + bool success; + +private: + /** + * Shader program being linked + * + * This is only used for logging error messages. + */ + gl_shader_program *prog; + + /** List of shaders available for linking. */ + gl_shader **shader_list; + + /** Number of shaders available for linking. */ + unsigned num_shaders; + + /** + * Final linked shader + * + * This is used two ways. It is used to find global variables in the + * linked shader that are accessed by the function. It is also used to add + * global variables from the shader where the function originated. + */ + gl_shader *linked; + + /** + * Table of variables local to the function. + */ + hash_table *locals; +}; + +} /* anonymous namespace */ + +/** + * Searches a list of shaders for a particular function definition + */ +ir_function_signature * +find_matching_signature(const char *name, const exec_list *actual_parameters, + gl_shader **shader_list, unsigned num_shaders, + bool use_builtin) +{ + for (unsigned i = 0; i < num_shaders; i++) { + ir_function *const f = shader_list[i]->symbols->get_function(name); + + if (f == NULL) + continue; + + ir_function_signature *sig = + f->matching_signature(NULL, actual_parameters, use_builtin); + + if ((sig == NULL) || + (!sig->is_defined && !sig->is_intrinsic)) + continue; + + /* If this function expects to bind to a built-in function and the + * signature that we found isn't a built-in, keep looking. Also keep + * looking if we expect a non-built-in but found a built-in. + */ + if (use_builtin != sig->is_builtin()) + continue; + + return sig; + } + + return NULL; +} + + +bool +link_function_calls(gl_shader_program *prog, gl_shader *main, + gl_shader **shader_list, unsigned num_shaders) +{ + call_link_visitor v(prog, main, shader_list, num_shaders); + + v.run(main->ir); + return v.success; +} diff --git a/src/compiler/glsl/link_interface_blocks.cpp b/src/compiler/glsl/link_interface_blocks.cpp new file mode 100644 index 0000000..64c30fe --- /dev/null +++ b/src/compiler/glsl/link_interface_blocks.cpp @@ -0,0 +1,357 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file link_interface_blocks.cpp + * Linker support for GLSL's interface blocks. + */ + +#include "ir.h" +#include "glsl_symbol_table.h" +#include "linker.h" +#include "main/macros.h" +#include "util/hash_table.h" + + +namespace { + +/** + * Check if two interfaces match, according to intrastage interface matching + * rules. If they do, and the first interface uses an unsized array, it will + * be updated to reflect the array size declared in the second interface. + */ +bool +intrastage_match(ir_variable *a, + ir_variable *b, + struct gl_shader_program *prog) +{ + /* Types must match. */ + if (a->get_interface_type() != b->get_interface_type()) { + /* Exception: if both the interface blocks are implicitly declared, + * don't force their types to match. They might mismatch due to the two + * shaders using different GLSL versions, and that's ok. + */ + if (a->data.how_declared != ir_var_declared_implicitly || + b->data.how_declared != ir_var_declared_implicitly) + return false; + } + + /* Presence/absence of interface names must match. */ + if (a->is_interface_instance() != b->is_interface_instance()) + return false; + + /* For uniforms, instance names need not match. For shader ins/outs, + * it's not clear from the spec whether they need to match, but + * Mesa's implementation relies on them matching. + */ + if (a->is_interface_instance() && b->data.mode != ir_var_uniform && + b->data.mode != ir_var_shader_storage && + strcmp(a->name, b->name) != 0) { + return false; + } + + /* If a block is an array then it must match across the shader. + * Unsized arrays are also processed and matched agaist sized arrays. + */ + if (b->type != a->type && + (b->is_interface_instance() || a->is_interface_instance()) && + !validate_intrastage_arrays(prog, b, a)) + return false; + + return true; +} + + +/** + * Check if two interfaces match, according to interstage (in/out) interface + * matching rules. + * + * If \c extra_array_level is true, the consumer interface is required to be + * an array and the producer interface is required to be a non-array. + * This is used for tessellation control and geometry shader consumers. + */ +bool +interstage_match(ir_variable *producer, + ir_variable *consumer, + bool extra_array_level) +{ + /* Unsized arrays should not occur during interstage linking. They + * should have all been assigned a size by link_intrastage_shaders. + */ + assert(!consumer->type->is_unsized_array()); + assert(!producer->type->is_unsized_array()); + + /* Types must match. */ + if (consumer->get_interface_type() != producer->get_interface_type()) { + /* Exception: if both the interface blocks are implicitly declared, + * don't force their types to match. They might mismatch due to the two + * shaders using different GLSL versions, and that's ok. + */ + if (consumer->data.how_declared != ir_var_declared_implicitly || + producer->data.how_declared != ir_var_declared_implicitly) + return false; + } + + /* Ignore outermost array if geom shader */ + const glsl_type *consumer_instance_type; + if (extra_array_level) { + consumer_instance_type = consumer->type->fields.array; + } else { + consumer_instance_type = consumer->type; + } + + /* If a block is an array then it must match across shaders. + * Since unsized arrays have been ruled out, we can check this by just + * making sure the types are equal. + */ + if ((consumer->is_interface_instance() && + consumer_instance_type->is_array()) || + (producer->is_interface_instance() && + producer->type->is_array())) { + if (consumer_instance_type != producer->type) + return false; + } + + return true; +} + + +/** + * This class keeps track of a mapping from an interface block name to the + * necessary information about that interface block to determine whether to + * generate a link error. + * + * Note: this class is expected to be short lived, so it doesn't make copies + * of the strings it references; it simply borrows the pointers from the + * ir_variable class. + */ +class interface_block_definitions +{ +public: + interface_block_definitions() + : mem_ctx(ralloc_context(NULL)), + ht(_mesa_hash_table_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal)) + { + } + + ~interface_block_definitions() + { + ralloc_free(mem_ctx); + _mesa_hash_table_destroy(ht, NULL); + } + + /** + * Lookup the interface definition. Return NULL if none is found. + */ + ir_variable *lookup(ir_variable *var) + { + if (var->data.explicit_location && + var->data.location >= VARYING_SLOT_VAR0) { + char location_str[11]; + snprintf(location_str, 11, "%d", var->data.location); + + const struct hash_entry *entry = + _mesa_hash_table_search(ht, location_str); + return entry ? (ir_variable *) entry->data : NULL; + } else { + const struct hash_entry *entry = + _mesa_hash_table_search(ht, var->get_interface_type()->name); + return entry ? (ir_variable *) entry->data : NULL; + } + } + + /** + * Add a new interface definition. + */ + void store(ir_variable *var) + { + if (var->data.explicit_location && + var->data.location >= VARYING_SLOT_VAR0) { + /* If explicit location is given then lookup the variable by location. + * We turn the location into a string and use this as the hash key + * rather than the name. Note: We allocate enough space for a 32-bit + * unsigned location value which is overkill but future proof. + */ + char location_str[11]; + snprintf(location_str, 11, "%d", var->data.location); + _mesa_hash_table_insert(ht, ralloc_strdup(mem_ctx, location_str), var); + } else { + _mesa_hash_table_insert(ht, var->get_interface_type()->name, var); + } + } + +private: + /** + * Ralloc context for data structures allocated by this class. + */ + void *mem_ctx; + + /** + * Hash table mapping interface block name to an \c + * ir_variable. + */ + hash_table *ht; +}; + + +}; /* anonymous namespace */ + + +void +validate_intrastage_interface_blocks(struct gl_shader_program *prog, + const gl_shader **shader_list, + unsigned num_shaders) +{ + interface_block_definitions in_interfaces; + interface_block_definitions out_interfaces; + interface_block_definitions uniform_interfaces; + interface_block_definitions buffer_interfaces; + + for (unsigned int i = 0; i < num_shaders; i++) { + if (shader_list[i] == NULL) + continue; + + foreach_in_list(ir_instruction, node, shader_list[i]->ir) { + ir_variable *var = node->as_variable(); + if (!var) + continue; + + const glsl_type *iface_type = var->get_interface_type(); + + if (iface_type == NULL) + continue; + + interface_block_definitions *definitions; + switch (var->data.mode) { + case ir_var_shader_in: + definitions = &in_interfaces; + break; + case ir_var_shader_out: + definitions = &out_interfaces; + break; + case ir_var_uniform: + definitions = &uniform_interfaces; + break; + case ir_var_shader_storage: + definitions = &buffer_interfaces; + break; + default: + /* Only in, out, and uniform interfaces are legal, so we should + * never get here. + */ + assert(!"illegal interface type"); + continue; + } + + ir_variable *prev_def = definitions->lookup(var); + if (prev_def == NULL) { + /* This is the first time we've seen the interface, so save + * it into the appropriate data structure. + */ + definitions->store(var); + } else if (!intrastage_match(prev_def, var, prog)) { + linker_error(prog, "definitions of interface block `%s' do not" + " match\n", iface_type->name); + return; + } + } + } +} + +void +validate_interstage_inout_blocks(struct gl_shader_program *prog, + const gl_shader *producer, + const gl_shader *consumer) +{ + interface_block_definitions definitions; + /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ + const bool extra_array_level = (producer->Stage == MESA_SHADER_VERTEX && + consumer->Stage != MESA_SHADER_FRAGMENT) || + consumer->Stage == MESA_SHADER_GEOMETRY; + + /* Add input interfaces from the consumer to the symbol table. */ + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *var = node->as_variable(); + if (!var || !var->get_interface_type() || var->data.mode != ir_var_shader_in) + continue; + + definitions.store(var); + } + + /* Verify that the producer's output interfaces match. */ + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *var = node->as_variable(); + if (!var || !var->get_interface_type() || var->data.mode != ir_var_shader_out) + continue; + + ir_variable *consumer_def = definitions.lookup(var); + + /* The consumer doesn't use this output block. Ignore it. */ + if (consumer_def == NULL) + continue; + + if (!interstage_match(var, consumer_def, extra_array_level)) { + linker_error(prog, "definitions of interface block `%s' do not " + "match\n", var->get_interface_type()->name); + return; + } + } +} + + +void +validate_interstage_uniform_blocks(struct gl_shader_program *prog, + gl_shader **stages, int num_stages) +{ + interface_block_definitions definitions; + + for (int i = 0; i < num_stages; i++) { + if (stages[i] == NULL) + continue; + + const gl_shader *stage = stages[i]; + foreach_in_list(ir_instruction, node, stage->ir) { + ir_variable *var = node->as_variable(); + if (!var || !var->get_interface_type() || + (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_shader_storage)) + continue; + + ir_variable *old_def = definitions.lookup(var); + if (old_def == NULL) { + definitions.store(var); + } else { + /* Interstage uniform matching rules are the same as intrastage + * uniform matchin rules (for uniforms, it is as though all + * shaders are in the same shader stage). + */ + if (!intrastage_match(old_def, var, prog)) { + linker_error(prog, "definitions of interface block `%s' do not " + "match\n", var->get_interface_type()->name); + return; + } + } + } + } +} diff --git a/src/compiler/glsl/link_uniform_block_active_visitor.cpp b/src/compiler/glsl/link_uniform_block_active_visitor.cpp new file mode 100644 index 0000000..54fea70 --- /dev/null +++ b/src/compiler/glsl/link_uniform_block_active_visitor.cpp @@ -0,0 +1,296 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "link_uniform_block_active_visitor.h" +#include "program.h" + +static link_uniform_block_active * +process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var) +{ + const hash_entry *const existing_block = + _mesa_hash_table_search(ht, var->get_interface_type()->name); + + const glsl_type *const block_type = var->is_interface_instance() + ? var->type : var->get_interface_type(); + + + /* If a block with this block-name has not previously been seen, add it. + * If a block with this block-name has been seen, it must be identical to + * the block currently being examined. + */ + if (existing_block == NULL) { + link_uniform_block_active *const b = + rzalloc(mem_ctx, struct link_uniform_block_active); + + b->type = block_type; + b->has_instance_name = var->is_interface_instance(); + b->is_shader_storage = var->data.mode == ir_var_shader_storage; + + if (var->data.explicit_binding) { + b->has_binding = true; + b->binding = var->data.binding; + } else { + b->has_binding = false; + b->binding = 0; + } + + _mesa_hash_table_insert(ht, var->get_interface_type()->name, (void *) b); + return b; + } else { + link_uniform_block_active *const b = + (link_uniform_block_active *) existing_block->data; + + if (b->type != block_type + || b->has_instance_name != var->is_interface_instance()) + return NULL; + else + return b; + } + + assert(!"Should not get here."); + return NULL; +} + +/* For arrays of arrays this function will give us a middle ground between + * detecting inactive uniform blocks and structuring them in a way that makes + * it easy to calculate the offset for indirect indexing. + * + * For example given the shader: + * + * uniform ArraysOfArraysBlock + * { + * vec4 a; + * } i[3][4][5]; + * + * void main() + * { + * vec4 b = i[0][1][1].a; + * gl_Position = i[2][2][3].a + b; + * } + * + * There are only 2 active blocks above but for the sake of indirect indexing + * and not over complicating the code we will end up with a count of 8. + * Here each dimension has 2 different indices counted so we end up with 2*2*2 + */ +static struct uniform_block_array_elements ** +process_arrays(void *mem_ctx, ir_dereference_array *ir, + struct link_uniform_block_active *block) +{ + if (ir) { + struct uniform_block_array_elements **ub_array_ptr = + process_arrays(mem_ctx, ir->array->as_dereference_array(), block); + if (*ub_array_ptr == NULL) { + *ub_array_ptr = rzalloc(mem_ctx, struct uniform_block_array_elements); + (*ub_array_ptr)->ir = ir; + } + + struct uniform_block_array_elements *ub_array = *ub_array_ptr; + ir_constant *c = ir->array_index->as_constant(); + if (c) { + /* Index is a constant, so mark just that element used, + * if not already. + */ + const unsigned idx = c->get_uint_component(0); + + unsigned i; + for (i = 0; i < ub_array->num_array_elements; i++) { + if (ub_array->array_elements[i] == idx) + break; + } + + assert(i <= ub_array->num_array_elements); + + if (i == ub_array->num_array_elements) { + ub_array->array_elements = reralloc(mem_ctx, + ub_array->array_elements, + unsigned, + ub_array->num_array_elements + 1); + + ub_array->array_elements[ub_array->num_array_elements] = idx; + + ub_array->num_array_elements++; + } + } else { + /* The array index is not a constant, + * so mark the entire array used. + */ + assert(ir->array->type->is_array()); + if (ub_array->num_array_elements < ir->array->type->length) { + ub_array->num_array_elements = ir->array->type->length; + ub_array->array_elements = reralloc(mem_ctx, + ub_array->array_elements, + unsigned, + ub_array->num_array_elements); + + for (unsigned i = 0; i < ub_array->num_array_elements; i++) { + ub_array->array_elements[i] = i; + } + } + } + return &ub_array->array; + } else { + return &block->array; + } +} + +ir_visitor_status +link_uniform_block_active_visitor::visit(ir_variable *var) +{ + if (!var->is_in_buffer_block()) + return visit_continue; + + /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec says: + * + * "All members of a named uniform block declared with a shared or + * std140 layout qualifier are considered active, even if they are not + * referenced in any shader in the program. The uniform block itself is + * also considered active, even if no member of the block is + * referenced." + */ + if (var->get_interface_type()->interface_packing == + GLSL_INTERFACE_PACKING_PACKED) + return visit_continue; + + /* Process the block. Bail if there was an error. + */ + link_uniform_block_active *const b = + process_block(this->mem_ctx, this->ht, var); + if (b == NULL) { + linker_error(this->prog, + "uniform block `%s' has mismatching definitions", + var->get_interface_type()->name); + this->success = false; + return visit_stop; + } + + assert(b->array == NULL); + assert(b->type != NULL); + assert(!b->type->is_array() || b->has_instance_name); + + /* For uniform block arrays declared with a shared or std140 layout + * qualifier, mark all its instances as used. + */ + const glsl_type *type = b->type; + struct uniform_block_array_elements **ub_array = &b->array; + while (type->is_array()) { + assert(b->type->length > 0); + + *ub_array = rzalloc(this->mem_ctx, struct uniform_block_array_elements); + (*ub_array)->num_array_elements = type->length; + (*ub_array)->array_elements = reralloc(this->mem_ctx, + (*ub_array)->array_elements, + unsigned, + (*ub_array)->num_array_elements); + + for (unsigned i = 0; i < (*ub_array)->num_array_elements; i++) { + (*ub_array)->array_elements[i] = i; + } + ub_array = &(*ub_array)->array; + type = type->fields.array; + } + + return visit_continue; +} + +ir_visitor_status +link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir) +{ + /* cycle through arrays of arrays */ + ir_dereference_array *base_ir = ir; + while (base_ir->array->ir_type == ir_type_dereference_array) + base_ir = base_ir->array->as_dereference_array(); + + ir_dereference_variable *const d = + base_ir->array->as_dereference_variable(); + ir_variable *const var = (d == NULL) ? NULL : d->var; + + /* If the r-value being dereferenced is not a variable (e.g., a field of a + * structure) or is not a uniform block instance, continue. + * + * WARNING: It is not enough for the variable to be part of uniform block. + * It must represent the entire block. Arrays (or matrices) inside blocks + * that lack an instance name are handled by the ir_dereference_variable + * function. + */ + if (var == NULL + || !var->is_in_buffer_block() + || !var->is_interface_instance()) + return visit_continue; + + /* Process the block. Bail if there was an error. + */ + link_uniform_block_active *const b = + process_block(this->mem_ctx, this->ht, var); + if (b == NULL) { + linker_error(prog, + "uniform block `%s' has mismatching definitions", + var->get_interface_type()->name); + this->success = false; + return visit_stop; + } + + /* Block arrays must be declared with an instance name. + */ + assert(b->has_instance_name); + assert(b->type != NULL); + + /* If the block array was declared with a shared or + * std140 layout qualifier, all its instances have been already marked + * as used in link_uniform_block_active_visitor::visit(ir_variable *). + */ + if (var->get_interface_type()->interface_packing == + GLSL_INTERFACE_PACKING_PACKED) { + b->var = var; + process_arrays(this->mem_ctx, ir, b); + } + + return visit_continue_with_parent; +} + +ir_visitor_status +link_uniform_block_active_visitor::visit(ir_dereference_variable *ir) +{ + ir_variable *var = ir->var; + + if (!var->is_in_buffer_block()) + return visit_continue; + + assert(!var->is_interface_instance() || !var->type->is_array()); + + /* Process the block. Bail if there was an error. + */ + link_uniform_block_active *const b = + process_block(this->mem_ctx, this->ht, var); + if (b == NULL) { + linker_error(this->prog, + "uniform block `%s' has mismatching definitions", + var->get_interface_type()->name); + this->success = false; + return visit_stop; + } + + assert(b->array == NULL); + assert(b->type != NULL); + + return visit_continue; +} diff --git a/src/compiler/glsl/link_uniform_block_active_visitor.h b/src/compiler/glsl/link_uniform_block_active_visitor.h new file mode 100644 index 0000000..afb52c1 --- /dev/null +++ b/src/compiler/glsl/link_uniform_block_active_visitor.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H +#define LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H + +#include "ir.h" +#include "util/hash_table.h" + +struct uniform_block_array_elements { + unsigned *array_elements; + unsigned num_array_elements; + + ir_dereference_array *ir; + + struct uniform_block_array_elements *array; +}; + +struct link_uniform_block_active { + const glsl_type *type; + ir_variable *var; + + struct uniform_block_array_elements *array; + + unsigned binding; + + bool has_instance_name; + bool has_binding; + bool is_shader_storage; +}; + +class link_uniform_block_active_visitor : public ir_hierarchical_visitor { +public: + link_uniform_block_active_visitor(void *mem_ctx, struct hash_table *ht, + struct gl_shader_program *prog) + : success(true), prog(prog), ht(ht), mem_ctx(mem_ctx) + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit(ir_dereference_variable *); + virtual ir_visitor_status visit(ir_variable *); + + bool success; + +private: + struct gl_shader_program *prog; + struct hash_table *ht; + void *mem_ctx; +}; + +#endif /* LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H */ diff --git a/src/compiler/glsl/link_uniform_blocks.cpp b/src/compiler/glsl/link_uniform_blocks.cpp new file mode 100644 index 0000000..7d75576 --- /dev/null +++ b/src/compiler/glsl/link_uniform_blocks.cpp @@ -0,0 +1,472 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" +#include "ir.h" +#include "linker.h" +#include "ir_uniform.h" +#include "link_uniform_block_active_visitor.h" +#include "util/hash_table.h" +#include "program.h" + +namespace { + +class ubo_visitor : public program_resource_visitor { +public: + ubo_visitor(void *mem_ctx, gl_uniform_buffer_variable *variables, + unsigned num_variables) + : index(0), offset(0), buffer_size(0), variables(variables), + num_variables(num_variables), mem_ctx(mem_ctx), is_array_instance(false) + { + /* empty */ + } + + void process(const glsl_type *type, const char *name) + { + this->offset = 0; + this->buffer_size = 0; + this->is_array_instance = strchr(name, ']') != NULL; + this->program_resource_visitor::process(type, name); + } + + unsigned index; + unsigned offset; + unsigned buffer_size; + gl_uniform_buffer_variable *variables; + unsigned num_variables; + void *mem_ctx; + bool is_array_instance; + +private: + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + (void) type; + (void) name; + (void) row_major; + assert(!"Should not get here."); + } + + virtual void enter_record(const glsl_type *type, const char *, + bool row_major, const unsigned packing) { + assert(type->is_record()); + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->offset = glsl_align( + this->offset, type->std430_base_alignment(row_major)); + else + this->offset = glsl_align( + this->offset, type->std140_base_alignment(row_major)); + } + + virtual void leave_record(const glsl_type *type, const char *, + bool row_major, const unsigned packing) { + assert(type->is_record()); + + /* If this is the last field of a structure, apply rule #9. The + * GL_ARB_uniform_buffer_object spec says: + * + * "The structure may have padding at the end; the base offset of + * the member following the sub-structure is rounded up to the next + * multiple of the base alignment of the structure." + */ + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->offset = glsl_align( + this->offset, type->std430_base_alignment(row_major)); + else + this->offset = glsl_align( + this->offset, type->std140_base_alignment(row_major)); + } + + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major, const glsl_type *, + const unsigned packing, + bool last_field) + { + assert(this->index < this->num_variables); + + gl_uniform_buffer_variable *v = &this->variables[this->index++]; + + v->Name = ralloc_strdup(mem_ctx, name); + v->Type = type; + v->RowMajor = type->without_array()->is_matrix() && row_major; + + if (this->is_array_instance) { + v->IndexName = ralloc_strdup(mem_ctx, name); + + char *open_bracket = strchr(v->IndexName, '['); + assert(open_bracket != NULL); + + char *close_bracket = strchr(open_bracket, '.') - 1; + assert(close_bracket != NULL); + + /* Length of the tail without the ']' but with the NUL. + */ + unsigned len = strlen(close_bracket + 1) + 1; + + memmove(open_bracket, close_bracket + 1, len); + } else { + v->IndexName = v->Name; + } + + unsigned alignment = 0; + unsigned size = 0; + + /* From ARB_program_interface_query: + * + * "If the final member of an active shader storage block is array + * with no declared size, the minimum buffer size is computed + * assuming the array was declared as an array with one element." + * + * For that reason, we use the base type of the unsized array to calculate + * its size. We don't need to check if the unsized array is the last member + * of a shader storage block (that check was already done by the parser). + */ + const glsl_type *type_for_size = type; + if (type->is_unsized_array()) { + assert(last_field); + type_for_size = type->without_array(); + } + + if (packing == GLSL_INTERFACE_PACKING_STD430) { + alignment = type->std430_base_alignment(v->RowMajor); + size = type_for_size->std430_size(v->RowMajor); + } else { + alignment = type->std140_base_alignment(v->RowMajor); + size = type_for_size->std140_size(v->RowMajor); + } + + this->offset = glsl_align(this->offset, alignment); + v->Offset = this->offset; + + this->offset += size; + + /* From the GL_ARB_uniform_buffer_object spec: + * + * "For uniform blocks laid out according to [std140] rules, the + * minimum buffer object size returned by the + * UNIFORM_BLOCK_DATA_SIZE query is derived by taking the offset of + * the last basic machine unit consumed by the last uniform of the + * uniform block (including any end-of-array or end-of-structure + * padding), adding one, and rounding up to the next multiple of + * the base alignment required for a vec4." + */ + this->buffer_size = glsl_align(this->offset, 16); + } +}; + +class count_block_size : public program_resource_visitor { +public: + count_block_size() : num_active_uniforms(0) + { + /* empty */ + } + + unsigned num_active_uniforms; + +private: + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + (void) type; + (void) name; + (void) row_major; + this->num_active_uniforms++; + } +}; + +} /* anonymous namespace */ + +struct block { + const glsl_type *type; + bool has_instance_name; +}; + +static void +process_block_array(struct uniform_block_array_elements *ub_array, char **name, + size_t name_length, gl_uniform_block *blocks, + ubo_visitor *parcel, gl_uniform_buffer_variable *variables, + const struct link_uniform_block_active *const b, + unsigned *block_index, unsigned *binding_offset, + struct gl_context *ctx, struct gl_shader_program *prog) +{ + if (ub_array) { + for (unsigned j = 0; j < ub_array->num_array_elements; j++) { + size_t new_length = name_length; + + /* Append the subscript to the current variable name */ + ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", + ub_array->array_elements[j]); + + process_block_array(ub_array->array, name, new_length, blocks, + parcel, variables, b, block_index, + binding_offset, ctx, prog); + } + } else { + unsigned i = *block_index; + const glsl_type *type = b->type->without_array(); + + blocks[i].Name = ralloc_strdup(blocks, *name); + blocks[i].Uniforms = &variables[(*parcel).index]; + + /* The GL_ARB_shading_language_420pack spec says: + * + * "If the binding identifier is used with a uniform block + * instanced as an array then the first element of the array + * takes the specified block binding and each subsequent + * element takes the next consecutive uniform block binding + * point." + */ + blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0; + + blocks[i].UniformBufferSize = 0; + blocks[i]._Packing = gl_uniform_block_packing(type->interface_packing); + + parcel->process(type, blocks[i].Name); + + blocks[i].UniformBufferSize = parcel->buffer_size; + + /* Check SSBO size is lower than maximum supported size for SSBO */ + if (b->is_shader_storage && + parcel->buffer_size > ctx->Const.MaxShaderStorageBlockSize) { + linker_error(prog, "shader storage block `%s' has size %d, " + "which is larger than than the maximum allowed (%d)", + b->type->name, + parcel->buffer_size, + ctx->Const.MaxShaderStorageBlockSize); + } + blocks[i].NumUniforms = + (unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms); + blocks[i].IsShaderStorage = b->is_shader_storage; + + *block_index = *block_index + 1; + *binding_offset = *binding_offset + 1; + } +} + +/* This function resizes the array types of the block so that later we can use + * this new size to correctly calculate the offest for indirect indexing. + */ +static const glsl_type * +resize_block_array(const glsl_type *type, + struct uniform_block_array_elements *ub_array) +{ + if (type->is_array()) { + struct uniform_block_array_elements *child_array = + type->fields.array->is_array() ? ub_array->array : NULL; + const glsl_type *new_child_type = + resize_block_array(type->fields.array, child_array); + + const glsl_type *new_type = + glsl_type::get_array_instance(new_child_type, + ub_array->num_array_elements); + ub_array->ir->array->type = new_type; + return new_type; + } else { + return type; + } +} + +unsigned +link_uniform_blocks(void *mem_ctx, + struct gl_context *ctx, + struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders, + struct gl_uniform_block **blocks_ret) +{ + /* This hash table will track all of the uniform blocks that have been + * encountered. Since blocks with the same block-name must be the same, + * the hash is organized by block-name. + */ + struct hash_table *block_hash = + _mesa_hash_table_create(mem_ctx, _mesa_key_hash_string, + _mesa_key_string_equal); + + if (block_hash == NULL) { + _mesa_error_no_memory(__func__); + linker_error(prog, "out of memory\n"); + return 0; + } + + /* Determine which uniform blocks are active. + */ + link_uniform_block_active_visitor v(mem_ctx, block_hash, prog); + for (unsigned i = 0; i < num_shaders; i++) { + visit_list_elements(&v, shader_list[i]->ir); + } + + /* Count the number of active uniform blocks. Count the total number of + * active slots in those uniform blocks. + */ + unsigned num_blocks = 0; + unsigned num_variables = 0; + count_block_size block_size; + struct hash_entry *entry; + + hash_table_foreach (block_hash, entry) { + struct link_uniform_block_active *const b = + (struct link_uniform_block_active *) entry->data; + + assert((b->array != NULL) == b->type->is_array()); + + if (b->array != NULL && + (b->type->without_array()->interface_packing == + GLSL_INTERFACE_PACKING_PACKED)) { + b->type = resize_block_array(b->type, b->array); + b->var->type = b->type; + } + + block_size.num_active_uniforms = 0; + block_size.process(b->type->without_array(), ""); + + if (b->array != NULL) { + unsigned aoa_size = b->type->arrays_of_arrays_size(); + num_blocks += aoa_size; + num_variables += aoa_size * block_size.num_active_uniforms; + } else { + num_blocks++; + num_variables += block_size.num_active_uniforms; + } + + } + + if (num_blocks == 0) { + assert(num_variables == 0); + _mesa_hash_table_destroy(block_hash, NULL); + return 0; + } + + assert(num_variables != 0); + + /* Allocate storage to hold all of the informatation related to uniform + * blocks that can be queried through the API. + */ + gl_uniform_block *blocks = + ralloc_array(mem_ctx, gl_uniform_block, num_blocks); + gl_uniform_buffer_variable *variables = + ralloc_array(blocks, gl_uniform_buffer_variable, num_variables); + + /* Add each variable from each uniform block to the API tracking + * structures. + */ + unsigned i = 0; + ubo_visitor parcel(blocks, variables, num_variables); + + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140) + == unsigned(ubo_packing_std140)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED) + == unsigned(ubo_packing_shared)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED) + == unsigned(ubo_packing_packed)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430) + == unsigned(ubo_packing_std430)); + + hash_table_foreach (block_hash, entry) { + const struct link_uniform_block_active *const b = + (const struct link_uniform_block_active *) entry->data; + const glsl_type *block_type = b->type; + + if (b->array != NULL) { + unsigned binding_offset = 0; + char *name = ralloc_strdup(NULL, block_type->without_array()->name); + size_t name_length = strlen(name); + + assert(b->has_instance_name); + process_block_array(b->array, &name, name_length, blocks, &parcel, + variables, b, &i, &binding_offset, ctx, prog); + ralloc_free(name); + } else { + blocks[i].Name = ralloc_strdup(blocks, block_type->name); + blocks[i].Uniforms = &variables[parcel.index]; + blocks[i].Binding = (b->has_binding) ? b->binding : 0; + blocks[i].UniformBufferSize = 0; + blocks[i]._Packing = + gl_uniform_block_packing(block_type->interface_packing); + + parcel.process(block_type, + b->has_instance_name ? block_type->name : ""); + + blocks[i].UniformBufferSize = parcel.buffer_size; + + /* Check SSBO size is lower than maximum supported size for SSBO */ + if (b->is_shader_storage && + parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) { + linker_error(prog, "shader storage block `%s' has size %d, " + "which is larger than than the maximum allowed (%d)", + block_type->name, + parcel.buffer_size, + ctx->Const.MaxShaderStorageBlockSize); + } + blocks[i].NumUniforms = + (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); + + blocks[i].IsShaderStorage = b->is_shader_storage; + + i++; + } + } + + assert(parcel.index == num_variables); + + _mesa_hash_table_destroy(block_hash, NULL); + + *blocks_ret = blocks; + return num_blocks; +} + +bool +link_uniform_blocks_are_compatible(const gl_uniform_block *a, + const gl_uniform_block *b) +{ + assert(strcmp(a->Name, b->Name) == 0); + + /* Page 35 (page 42 of the PDF) in section 4.3.7 of the GLSL 1.50 spec says: + * + * "Matched block names within an interface (as defined above) must + * match in terms of having the same number of declarations with the + * same sequence of types and the same sequence of member names, as + * well as having the same member-wise layout qualification....if a + * matching block is declared as an array, then the array sizes must + * also match... Any mismatch will generate a link error." + * + * Arrays are not yet supported, so there is no check for that. + */ + if (a->NumUniforms != b->NumUniforms) + return false; + + if (a->_Packing != b->_Packing) + return false; + + for (unsigned i = 0; i < a->NumUniforms; i++) { + if (strcmp(a->Uniforms[i].Name, b->Uniforms[i].Name) != 0) + return false; + + if (a->Uniforms[i].Type != b->Uniforms[i].Type) + return false; + + if (a->Uniforms[i].RowMajor != b->Uniforms[i].RowMajor) + return false; + } + + return true; +} diff --git a/src/compiler/glsl/link_uniform_initializers.cpp b/src/compiler/glsl/link_uniform_initializers.cpp new file mode 100644 index 0000000..58d21e5 --- /dev/null +++ b/src/compiler/glsl/link_uniform_initializers.cpp @@ -0,0 +1,355 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" +#include "ir.h" +#include "linker.h" +#include "ir_uniform.h" + +/* These functions are put in a "private" namespace instead of being marked + * static so that the unit tests can access them. See + * http://code.google.com/p/googletest/wiki/AdvancedGuide#Testing_Private_Code + */ +namespace linker { + +gl_uniform_storage * +get_storage(gl_uniform_storage *storage, unsigned num_storage, + const char *name) +{ + for (unsigned int i = 0; i < num_storage; i++) { + if (strcmp(name, storage[i].name) == 0) + return &storage[i]; + } + + return NULL; +} + +static unsigned +get_uniform_block_index(const gl_shader_program *shProg, + const char *uniformBlockName) +{ + for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { + if (!strcmp(shProg->BufferInterfaceBlocks[i].Name, uniformBlockName)) + return i; + } + + return GL_INVALID_INDEX; +} + +void +copy_constant_to_storage(union gl_constant_value *storage, + const ir_constant *val, + const enum glsl_base_type base_type, + const unsigned int elements, + unsigned int boolean_true) +{ + for (unsigned int i = 0; i < elements; i++) { + switch (base_type) { + case GLSL_TYPE_UINT: + storage[i].u = val->value.u[i]; + break; + case GLSL_TYPE_INT: + case GLSL_TYPE_SAMPLER: + storage[i].i = val->value.i[i]; + break; + case GLSL_TYPE_FLOAT: + storage[i].f = val->value.f[i]; + break; + case GLSL_TYPE_DOUBLE: + /* XXX need to check on big-endian */ + storage[i * 2].u = *(uint32_t *)&val->value.d[i]; + storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1); + break; + case GLSL_TYPE_BOOL: + storage[i].b = val->value.b[i] ? boolean_true : 0; + break; + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_VOID: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_ERROR: + /* All other types should have already been filtered by other + * paths in the caller. + */ + assert(!"Should not get here."); + break; + } + } +} + +/** + * Initialize an opaque uniform from the value of an explicit binding + * qualifier specified in the shader. Atomic counters are different because + * they have no storage and should be handled elsewhere. + */ +void +set_opaque_binding(void *mem_ctx, gl_shader_program *prog, + const glsl_type *type, const char *name, int *binding) +{ + + if (type->is_array() && type->fields.array->is_array()) { + const glsl_type *const element_type = type->fields.array; + + for (unsigned int i = 0; i < type->length; i++) { + const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i); + + set_opaque_binding(mem_ctx, prog, element_type, + element_name, binding); + } + } else { + struct gl_uniform_storage *const storage = + get_storage(prog->UniformStorage, prog->NumUniformStorage, name); + + if (storage == NULL) { + assert(storage != NULL); + return; + } + + const unsigned elements = MAX2(storage->array_elements, 1); + + /* Section 4.4.4 (Opaque-Uniform Layout Qualifiers) of the GLSL 4.20 spec + * says: + * + * "If the binding identifier is used with an array, the first element + * of the array takes the specified unit and each subsequent element + * takes the next consecutive unit." + */ + for (unsigned int i = 0; i < elements; i++) { + storage->storage[i].i = (*binding)++; + } + + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + gl_shader *shader = prog->_LinkedShaders[sh]; + + if (shader) { + if (storage->type->base_type == GLSL_TYPE_SAMPLER && + storage->opaque[sh].active) { + for (unsigned i = 0; i < elements; i++) { + const unsigned index = storage->opaque[sh].index + i; + shader->SamplerUnits[index] = storage->storage[i].i; + } + + } else if (storage->type->base_type == GLSL_TYPE_IMAGE && + storage->opaque[sh].active) { + for (unsigned i = 0; i < elements; i++) { + const unsigned index = storage->opaque[sh].index + i; + shader->ImageUnits[index] = storage->storage[i].i; + } + } + } + } + + storage->initialized = true; + } +} + +void +set_block_binding(gl_shader_program *prog, const char *block_name, int binding) +{ + const unsigned block_index = get_uniform_block_index(prog, block_name); + + if (block_index == GL_INVALID_INDEX) { + assert(block_index != GL_INVALID_INDEX); + return; + } + + /* This is a field of a UBO. val is the binding index. */ + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + int stage_index = prog->InterfaceBlockStageIndex[i][block_index]; + + if (stage_index != -1) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + sh->BufferInterfaceBlocks[stage_index].Binding = binding; + } + } +} + +void +set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, + const char *name, const glsl_type *type, + ir_constant *val, unsigned int boolean_true) +{ + const glsl_type *t_without_array = type->without_array(); + if (type->is_record()) { + ir_constant *field_constant; + + field_constant = (ir_constant *)val->components.get_head(); + + for (unsigned int i = 0; i < type->length; i++) { + const glsl_type *field_type = type->fields.structure[i].type; + const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, + type->fields.structure[i].name); + set_uniform_initializer(mem_ctx, prog, field_name, + field_type, field_constant, boolean_true); + field_constant = (ir_constant *)field_constant->next; + } + return; + } else if (t_without_array->is_record() || + (type->is_array() && type->fields.array->is_array())) { + const glsl_type *const element_type = type->fields.array; + + for (unsigned int i = 0; i < type->length; i++) { + const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i); + + set_uniform_initializer(mem_ctx, prog, element_name, + element_type, val->array_elements[i], + boolean_true); + } + return; + } + + struct gl_uniform_storage *const storage = + get_storage(prog->UniformStorage, + prog->NumUniformStorage, + name); + if (storage == NULL) { + assert(storage != NULL); + return; + } + + if (val->type->is_array()) { + const enum glsl_base_type base_type = + val->array_elements[0]->type->base_type; + const unsigned int elements = val->array_elements[0]->type->components(); + unsigned int idx = 0; + unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1; + + assert(val->type->length >= storage->array_elements); + for (unsigned int i = 0; i < storage->array_elements; i++) { + copy_constant_to_storage(& storage->storage[idx], + val->array_elements[i], + base_type, + elements, + boolean_true); + + idx += elements * dmul; + } + } else { + copy_constant_to_storage(storage->storage, + val, + val->type->base_type, + val->type->components(), + boolean_true); + + if (storage->type->is_sampler()) { + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + gl_shader *shader = prog->_LinkedShaders[sh]; + + if (shader && storage->opaque[sh].active) { + unsigned index = storage->opaque[sh].index; + + shader->SamplerUnits[index] = storage->storage[0].i; + } + } + } + } + + storage->initialized = true; +} +} + +void +link_set_uniform_initializers(struct gl_shader_program *prog, + unsigned int boolean_true) +{ + void *mem_ctx = NULL; + + for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *shader = prog->_LinkedShaders[i]; + + if (shader == NULL) + continue; + + foreach_in_list(ir_instruction, node, shader->ir) { + ir_variable *const var = node->as_variable(); + + if (!var || (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_shader_storage)) + continue; + + if (!mem_ctx) + mem_ctx = ralloc_context(NULL); + + if (var->data.explicit_binding) { + const glsl_type *const type = var->type; + + if (type->without_array()->is_sampler() || + type->without_array()->is_image()) { + int binding = var->data.binding; + linker::set_opaque_binding(mem_ctx, prog, var->type, + var->name, &binding); + } else if (var->is_in_buffer_block()) { + const glsl_type *const iface_type = var->get_interface_type(); + + /* If the variable is an array and it is an interface instance, + * we need to set the binding for each array element. Just + * checking that the variable is an array is not sufficient. + * The variable could be an array element of a uniform block + * that lacks an instance name. For example: + * + * uniform U { + * float f[4]; + * }; + * + * In this case "f" would pass is_in_buffer_block (above) and + * type->is_array(), but it will fail is_interface_instance(). + */ + if (var->is_interface_instance() && var->type->is_array()) { + for (unsigned i = 0; i < var->type->length; i++) { + const char *name = + ralloc_asprintf(mem_ctx, "%s[%u]", iface_type->name, i); + + /* Section 4.4.3 (Uniform Block Layout Qualifiers) of the + * GLSL 4.20 spec says: + * + * "If the binding identifier is used with a uniform + * block instanced as an array then the first element + * of the array takes the specified block binding and + * each subsequent element takes the next consecutive + * uniform block binding point." + */ + linker::set_block_binding(prog, name, + var->data.binding + i); + } + } else { + linker::set_block_binding(prog, iface_type->name, + var->data.binding); + } + } else if (type->contains_atomic()) { + /* we don't actually need to do anything. */ + } else { + assert(!"Explicit binding not on a sampler, UBO or atomic."); + } + } else if (var->constant_initializer) { + linker::set_uniform_initializer(mem_ctx, prog, var->name, + var->type, var->constant_initializer, + boolean_true); + } + } + } + + ralloc_free(mem_ctx); +} diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp new file mode 100644 index 0000000..33b2d4c --- /dev/null +++ b/src/compiler/glsl/link_uniforms.cpp @@ -0,0 +1,1330 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" +#include "ir.h" +#include "linker.h" +#include "ir_uniform.h" +#include "glsl_symbol_table.h" +#include "program/hash_table.h" +#include "program.h" +#include "util/hash_table.h" + +/** + * \file link_uniforms.cpp + * Assign locations for GLSL uniforms. + * + * \author Ian Romanick <ian.d.romanick@intel.com> + */ + +/** + * Used by linker to indicate uniforms that have no location set. + */ +#define UNMAPPED_UNIFORM_LOC ~0u + +/** + * Count the backing storage requirements for a type + */ +static unsigned +values_for_type(const glsl_type *type) +{ + if (type->is_sampler()) { + return 1; + } else if (type->is_array() && type->fields.array->is_sampler()) { + return type->array_size(); + } else { + return type->component_slots(); + } +} + +void +program_resource_visitor::process(const glsl_type *type, const char *name) +{ + assert(type->without_array()->is_record() + || type->without_array()->is_interface()); + + unsigned record_array_count = 1; + char *name_copy = ralloc_strdup(NULL, name); + unsigned packing = type->interface_packing; + + recursion(type, &name_copy, strlen(name), false, NULL, packing, false, + record_array_count); + ralloc_free(name_copy); +} + +void +program_resource_visitor::process(ir_variable *var) +{ + unsigned record_array_count = 1; + const glsl_type *t = var->type; + const glsl_type *t_without_array = var->type->without_array(); + const bool row_major = + var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; + + const unsigned packing = var->get_interface_type() ? + var->get_interface_type()->interface_packing : + var->type->interface_packing; + + /* false is always passed for the row_major parameter to the other + * processing functions because no information is available to do + * otherwise. See the warning in linker.h. + */ + + /* Only strdup the name if we actually will need to modify it. */ + if (var->data.from_named_ifc_block_array) { + /* lower_named_interface_blocks created this variable by lowering an + * interface block array to an array variable. For example if the + * original source code was: + * + * out Blk { vec4 bar } foo[3]; + * + * Then the variable is now: + * + * out vec4 bar[3]; + * + * We need to visit each array element using the names constructed like + * so: + * + * Blk[0].bar + * Blk[1].bar + * Blk[2].bar + */ + assert(t->is_array()); + const glsl_type *ifc_type = var->get_interface_type(); + char *name = ralloc_strdup(NULL, ifc_type->name); + size_t name_length = strlen(name); + for (unsigned i = 0; i < t->length; i++) { + size_t new_length = name_length; + ralloc_asprintf_rewrite_tail(&name, &new_length, "[%u].%s", i, + var->name); + /* Note: row_major is only meaningful for uniform blocks, and + * lowering is only applied to non-uniform interface blocks, so we + * can safely pass false for row_major. + */ + recursion(var->type, &name, new_length, row_major, NULL, packing, + false, record_array_count); + } + ralloc_free(name); + } else if (var->data.from_named_ifc_block_nonarray) { + /* lower_named_interface_blocks created this variable by lowering a + * named interface block (non-array) to an ordinary variable. For + * example if the original source code was: + * + * out Blk { vec4 bar } foo; + * + * Then the variable is now: + * + * out vec4 bar; + * + * We need to visit this variable using the name: + * + * Blk.bar + */ + const glsl_type *ifc_type = var->get_interface_type(); + char *name = ralloc_asprintf(NULL, "%s.%s", ifc_type->name, var->name); + /* Note: row_major is only meaningful for uniform blocks, and lowering + * is only applied to non-uniform interface blocks, so we can safely + * pass false for row_major. + */ + recursion(var->type, &name, strlen(name), row_major, NULL, packing, + false, record_array_count); + ralloc_free(name); + } else if (t_without_array->is_record() || + (t->is_array() && t->fields.array->is_array())) { + char *name = ralloc_strdup(NULL, var->name); + recursion(var->type, &name, strlen(name), row_major, NULL, packing, + false, record_array_count); + ralloc_free(name); + } else if (t_without_array->is_interface()) { + char *name = ralloc_strdup(NULL, t_without_array->name); + recursion(var->type, &name, strlen(name), row_major, NULL, packing, + false, record_array_count); + ralloc_free(name); + } else { + this->set_record_array_count(record_array_count); + this->visit_field(t, var->name, row_major, NULL, packing, false); + } +} + +void +program_resource_visitor::recursion(const glsl_type *t, char **name, + size_t name_length, bool row_major, + const glsl_type *record_type, + const unsigned packing, + bool last_field, + unsigned record_array_count) +{ + /* Records need to have each field processed individually. + * + * Arrays of records need to have each array element processed + * individually, then each field of the resulting array elements processed + * individually. + */ + if (t->is_record() || t->is_interface()) { + if (record_type == NULL && t->is_record()) + record_type = t; + + if (t->is_record()) + this->enter_record(t, *name, row_major, packing); + + for (unsigned i = 0; i < t->length; i++) { + const char *field = t->fields.structure[i].name; + size_t new_length = name_length; + + if (t->fields.structure[i].type->is_record()) + this->visit_field(&t->fields.structure[i]); + + /* Append '.field' to the current variable name. */ + if (name_length == 0) { + ralloc_asprintf_rewrite_tail(name, &new_length, "%s", field); + } else { + ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); + } + + /* The layout of structures at the top level of the block is set + * during parsing. For matrices contained in multiple levels of + * structures in the block, the inner structures have no layout. + * These cases must potentially inherit the layout from the outer + * levels. + */ + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(t->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + recursion(t->fields.structure[i].type, name, new_length, + field_row_major, + record_type, + packing, + (i + 1) == t->length, record_array_count); + + /* Only the first leaf-field of the record gets called with the + * record type pointer. + */ + record_type = NULL; + } + + if (t->is_record()) { + (*name)[name_length] = '\0'; + this->leave_record(t, *name, row_major, packing); + } + } else if (t->without_array()->is_record() || + t->without_array()->is_interface() || + (t->is_array() && t->fields.array->is_array())) { + if (record_type == NULL && t->fields.array->is_record()) + record_type = t->fields.array; + + unsigned length = t->length; + /* Shader storage block unsized arrays: add subscript [0] to variable + * names */ + if (t->is_unsized_array()) + length = 1; + + record_array_count *= length; + + for (unsigned i = 0; i < length; i++) { + size_t new_length = name_length; + + /* Append the subscript to the current variable name */ + ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); + + recursion(t->fields.array, name, new_length, row_major, + record_type, + packing, + (i + 1) == t->length, record_array_count); + + /* Only the first leaf-field of the record gets called with the + * record type pointer. + */ + record_type = NULL; + } + } else { + this->set_record_array_count(record_array_count); + this->visit_field(t, *name, row_major, record_type, packing, last_field); + } +} + +void +program_resource_visitor::visit_field(const glsl_type *type, const char *name, + bool row_major, + const glsl_type *, + const unsigned, + bool /* last_field */) +{ + visit_field(type, name, row_major); +} + +void +program_resource_visitor::visit_field(const glsl_struct_field *field) +{ + (void) field; + /* empty */ +} + +void +program_resource_visitor::enter_record(const glsl_type *, const char *, bool, + const unsigned) +{ +} + +void +program_resource_visitor::leave_record(const glsl_type *, const char *, bool, + const unsigned) +{ +} + +void +program_resource_visitor::set_record_array_count(unsigned) +{ +} + +namespace { + +/** + * Class to help calculate the storage requirements for a set of uniforms + * + * As uniforms are added to the active set the number of active uniforms and + * the storage requirements for those uniforms are accumulated. The active + * uniforms are added to the hash table supplied to the constructor. + * + * If the same uniform is added multiple times (i.e., once for each shader + * target), it will only be accounted once. + */ +class count_uniform_size : public program_resource_visitor { +public: + count_uniform_size(struct string_to_uint_map *map, + struct string_to_uint_map *hidden_map) + : num_active_uniforms(0), num_hidden_uniforms(0), num_values(0), + num_shader_samplers(0), num_shader_images(0), + num_shader_uniform_components(0), num_shader_subroutines(0), + is_ubo_var(false), is_shader_storage(false), map(map), + hidden_map(hidden_map) + { + /* empty */ + } + + void start_shader() + { + this->num_shader_samplers = 0; + this->num_shader_images = 0; + this->num_shader_uniform_components = 0; + this->num_shader_subroutines = 0; + } + + void process(ir_variable *var) + { + this->current_var = var; + this->is_ubo_var = var->is_in_buffer_block(); + this->is_shader_storage = var->is_in_shader_storage_block(); + if (var->is_interface_instance()) + program_resource_visitor::process(var->get_interface_type(), + var->get_interface_type()->name); + else + program_resource_visitor::process(var); + } + + /** + * Total number of active uniforms counted + */ + unsigned num_active_uniforms; + + unsigned num_hidden_uniforms; + + /** + * Number of data values required to back the storage for the active uniforms + */ + unsigned num_values; + + /** + * Number of samplers used + */ + unsigned num_shader_samplers; + + /** + * Number of images used + */ + unsigned num_shader_images; + + /** + * Number of uniforms used in the current shader + */ + unsigned num_shader_uniform_components; + + /** + * Number of subroutine uniforms used + */ + unsigned num_shader_subroutines; + + bool is_ubo_var; + bool is_shader_storage; + + struct string_to_uint_map *map; + +private: + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + assert(!type->without_array()->is_record()); + assert(!type->without_array()->is_interface()); + assert(!(type->is_array() && type->fields.array->is_array())); + + (void) row_major; + + /* Count the number of samplers regardless of whether the uniform is + * already in the hash table. The hash table prevents adding the same + * uniform for multiple shader targets, but in this case we want to + * count it for each shader target. + */ + const unsigned values = values_for_type(type); + if (type->contains_subroutine()) { + this->num_shader_subroutines += values; + } else if (type->contains_sampler()) { + this->num_shader_samplers += values; + } else if (type->contains_image()) { + this->num_shader_images += values; + + /* As drivers are likely to represent image uniforms as + * scalar indices, count them against the limit of uniform + * components in the default block. The spec allows image + * uniforms to use up no more than one scalar slot. + */ + if(!is_shader_storage) + this->num_shader_uniform_components += values; + } else { + /* Accumulate the total number of uniform slots used by this shader. + * Note that samplers do not count against this limit because they + * don't use any storage on current hardware. + */ + if (!is_ubo_var && !is_shader_storage) + this->num_shader_uniform_components += values; + } + + /* If the uniform is already in the map, there's nothing more to do. + */ + unsigned id; + if (this->map->get(id, name)) + return; + + if (this->current_var->data.how_declared == ir_var_hidden) { + this->hidden_map->put(this->num_hidden_uniforms, name); + this->num_hidden_uniforms++; + } else { + this->map->put(this->num_active_uniforms-this->num_hidden_uniforms, + name); + } + + /* Each leaf uniform occupies one entry in the list of active + * uniforms. + */ + this->num_active_uniforms++; + this->num_values += values; + } + + struct string_to_uint_map *hidden_map; + + /** + * Current variable being processed. + */ + ir_variable *current_var; +}; + +} /* anonymous namespace */ + +/** + * Class to help parcel out pieces of backing storage to uniforms + * + * Each uniform processed has some range of the \c gl_constant_value + * structures associated with it. The association is done by finding + * the uniform in the \c string_to_uint_map and using the value from + * the map to connect that slot in the \c gl_uniform_storage table + * with the next available slot in the \c gl_constant_value array. + * + * \warning + * This class assumes that every uniform that will be processed is + * already in the \c string_to_uint_map. In addition, it assumes that + * the \c gl_uniform_storage and \c gl_constant_value arrays are "big + * enough." + */ +class parcel_out_uniform_storage : public program_resource_visitor { +public: + parcel_out_uniform_storage(struct string_to_uint_map *map, + struct gl_uniform_storage *uniforms, + union gl_constant_value *values) + : map(map), uniforms(uniforms), values(values) + { + } + + void start_shader(gl_shader_stage shader_type) + { + assert(shader_type < MESA_SHADER_STAGES); + this->shader_type = shader_type; + + this->shader_samplers_used = 0; + this->shader_shadow_samplers = 0; + this->next_sampler = 0; + this->next_image = 0; + this->next_subroutine = 0; + this->record_array_count = 1; + memset(this->targets, 0, sizeof(this->targets)); + } + + void set_and_process(struct gl_shader_program *prog, + ir_variable *var) + { + current_var = var; + field_counter = 0; + this->record_next_sampler = new string_to_uint_map; + + ubo_block_index = -1; + if (var->is_in_buffer_block()) { + if (var->is_interface_instance() && var->type->is_array()) { + unsigned l = strlen(var->get_interface_type()->name); + + for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { + if (strncmp(var->get_interface_type()->name, + prog->BufferInterfaceBlocks[i].Name, + l) == 0 + && prog->BufferInterfaceBlocks[i].Name[l] == '[') { + ubo_block_index = i; + break; + } + } + } else { + for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { + if (strcmp(var->get_interface_type()->name, + prog->BufferInterfaceBlocks[i].Name) == 0) { + ubo_block_index = i; + break; + } + } + } + assert(ubo_block_index != -1); + + /* Uniform blocks that were specified with an instance name must be + * handled a little bit differently. The name of the variable is the + * name used to reference the uniform block instead of being the name + * of a variable within the block. Therefore, searching for the name + * within the block will fail. + */ + if (var->is_interface_instance()) { + ubo_byte_offset = 0; + process(var->get_interface_type(), + var->get_interface_type()->name); + } else { + const struct gl_uniform_block *const block = + &prog->BufferInterfaceBlocks[ubo_block_index]; + + assert(var->data.location != -1); + + const struct gl_uniform_buffer_variable *const ubo_var = + &block->Uniforms[var->data.location]; + + ubo_byte_offset = ubo_var->Offset; + process(var); + } + } else { + /* Store any explicit location and reset data location so we can + * reuse this variable for storing the uniform slot number. + */ + this->explicit_location = current_var->data.location; + current_var->data.location = -1; + + process(var); + } + delete this->record_next_sampler; + } + + int ubo_block_index; + int ubo_byte_offset; + gl_shader_stage shader_type; + +private: + void handle_samplers(const glsl_type *base_type, + struct gl_uniform_storage *uniform, const char *name) + { + if (base_type->is_sampler()) { + uniform->opaque[shader_type].active = true; + + /* Handle multiple samplers inside struct arrays */ + if (this->record_array_count > 1) { + unsigned inner_array_size = MAX2(1, uniform->array_elements); + char *name_copy = ralloc_strdup(NULL, name); + + /* Remove all array subscripts from the sampler name */ + char *str_start; + const char *str_end; + while((str_start = strchr(name_copy, '[')) && + (str_end = strchr(name_copy, ']'))) { + memmove(str_start, str_end + 1, 1 + strlen(str_end)); + } + + unsigned index = 0; + if (this->record_next_sampler->get(index, name_copy)) { + /* In this case, we've already seen this uniform so we just use + * the next sampler index recorded the last time we visited. + */ + uniform->opaque[shader_type].index = index; + index = inner_array_size + uniform->opaque[shader_type].index; + this->record_next_sampler->put(index, name_copy); + + ralloc_free(name_copy); + /* Return as everything else has already been initialised in a + * previous pass. + */ + return; + } else { + /* We've never seen this uniform before so we need to allocate + * enough indices to store it. + * + * Nested struct arrays behave like arrays of arrays so we need + * to increase the index by the total number of elements of the + * sampler in case there is more than one sampler inside the + * structs. This allows the offset to be easily calculated for + * indirect indexing. + */ + uniform->opaque[shader_type].index = this->next_sampler; + this->next_sampler += + inner_array_size * this->record_array_count; + + /* Store the next index for future passes over the struct array + */ + index = uniform->opaque[shader_type].index + inner_array_size; + this->record_next_sampler->put(index, name_copy); + ralloc_free(name_copy); + } + } else { + /* Increment the sampler by 1 for non-arrays and by the number of + * array elements for arrays. + */ + uniform->opaque[shader_type].index = this->next_sampler; + this->next_sampler += MAX2(1, uniform->array_elements); + } + + const gl_texture_index target = base_type->sampler_index(); + const unsigned shadow = base_type->sampler_shadow; + for (unsigned i = uniform->opaque[shader_type].index; + i < MIN2(this->next_sampler, MAX_SAMPLERS); + i++) { + this->targets[i] = target; + this->shader_samplers_used |= 1U << i; + this->shader_shadow_samplers |= shadow << i; + } + } + } + + void handle_images(const glsl_type *base_type, + struct gl_uniform_storage *uniform) + { + if (base_type->is_image()) { + uniform->opaque[shader_type].index = this->next_image; + uniform->opaque[shader_type].active = true; + + /* Increment the image index by 1 for non-arrays and by the + * number of array elements for arrays. + */ + this->next_image += MAX2(1, uniform->array_elements); + + } + } + + void handle_subroutines(const glsl_type *base_type, + struct gl_uniform_storage *uniform) + { + if (base_type->is_subroutine()) { + uniform->opaque[shader_type].index = this->next_subroutine; + uniform->opaque[shader_type].active = true; + + /* Increment the subroutine index by 1 for non-arrays and by the + * number of array elements for arrays. + */ + this->next_subroutine += MAX2(1, uniform->array_elements); + + } + } + + virtual void set_record_array_count(unsigned record_array_count) + { + this->record_array_count = record_array_count; + } + + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + (void) type; + (void) name; + (void) row_major; + assert(!"Should not get here."); + } + + virtual void enter_record(const glsl_type *type, const char *, + bool row_major, const unsigned packing) { + assert(type->is_record()); + if (this->ubo_block_index == -1) + return; + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->ubo_byte_offset = glsl_align( + this->ubo_byte_offset, type->std430_base_alignment(row_major)); + else + this->ubo_byte_offset = glsl_align( + this->ubo_byte_offset, type->std140_base_alignment(row_major)); + } + + virtual void leave_record(const glsl_type *type, const char *, + bool row_major, const unsigned packing) { + assert(type->is_record()); + if (this->ubo_block_index == -1) + return; + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->ubo_byte_offset = glsl_align( + this->ubo_byte_offset, type->std430_base_alignment(row_major)); + else + this->ubo_byte_offset = glsl_align( + this->ubo_byte_offset, type->std140_base_alignment(row_major)); + } + + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major, const glsl_type *record_type, + const unsigned packing, + bool /* last_field */) + { + assert(!type->without_array()->is_record()); + assert(!type->without_array()->is_interface()); + assert(!(type->is_array() && type->fields.array->is_array())); + + unsigned id; + bool found = this->map->get(id, name); + assert(found); + + if (!found) + return; + + const glsl_type *base_type; + if (type->is_array()) { + this->uniforms[id].array_elements = type->length; + base_type = type->fields.array; + } else { + this->uniforms[id].array_elements = 0; + base_type = type; + } + + /* Initialise opaque data */ + this->uniforms[id].opaque[shader_type].index = ~0; + this->uniforms[id].opaque[shader_type].active = false; + + /* This assigns uniform indices to sampler and image uniforms. */ + handle_samplers(base_type, &this->uniforms[id], name); + handle_images(base_type, &this->uniforms[id]); + handle_subroutines(base_type, &this->uniforms[id]); + + /* For array of arrays or struct arrays the base location may have + * already been set so don't set it again. + */ + if (ubo_block_index == -1 && current_var->data.location == -1) { + current_var->data.location = id; + } + + /* If there is already storage associated with this uniform or if the + * uniform is set as builtin, it means that it was set while processing + * an earlier shader stage. For example, we may be processing the + * uniform in the fragment shader, but the uniform was already processed + * in the vertex shader. + */ + if (this->uniforms[id].storage != NULL || this->uniforms[id].builtin) { + return; + } + + /* Assign explicit locations. */ + if (current_var->data.explicit_location) { + /* Set sequential locations for struct fields. */ + if (current_var->type->without_array()->is_record() || + current_var->type->is_array_of_arrays()) { + const unsigned entries = MAX2(1, this->uniforms[id].array_elements); + this->uniforms[id].remap_location = + this->explicit_location + field_counter; + field_counter += entries; + } else { + this->uniforms[id].remap_location = this->explicit_location; + } + } else { + /* Initialize to to indicate that no location is set */ + this->uniforms[id].remap_location = UNMAPPED_UNIFORM_LOC; + } + + this->uniforms[id].name = ralloc_strdup(this->uniforms, name); + this->uniforms[id].type = base_type; + this->uniforms[id].initialized = 0; + this->uniforms[id].num_driver_storage = 0; + this->uniforms[id].driver_storage = NULL; + this->uniforms[id].atomic_buffer_index = -1; + this->uniforms[id].hidden = + current_var->data.how_declared == ir_var_hidden; + this->uniforms[id].builtin = is_gl_identifier(name); + + /* Do not assign storage if the uniform is builtin */ + if (!this->uniforms[id].builtin) + this->uniforms[id].storage = this->values; + + this->uniforms[id].is_shader_storage = + current_var->is_in_shader_storage_block(); + + if (this->ubo_block_index != -1) { + this->uniforms[id].block_index = this->ubo_block_index; + + unsigned alignment = type->std140_base_alignment(row_major); + if (packing == GLSL_INTERFACE_PACKING_STD430) + alignment = type->std430_base_alignment(row_major); + this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment); + this->uniforms[id].offset = this->ubo_byte_offset; + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->ubo_byte_offset += type->std430_size(row_major); + else + this->ubo_byte_offset += type->std140_size(row_major); + + if (type->is_array()) { + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->uniforms[id].array_stride = + type->without_array()->std430_array_stride(row_major); + else + this->uniforms[id].array_stride = + glsl_align(type->without_array()->std140_size(row_major), + 16); + } else { + this->uniforms[id].array_stride = 0; + } + + if (type->without_array()->is_matrix()) { + const glsl_type *matrix = type->without_array(); + const unsigned N = matrix->base_type == GLSL_TYPE_DOUBLE ? 8 : 4; + const unsigned items = + row_major ? matrix->matrix_columns : matrix->vector_elements; + + assert(items <= 4); + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->uniforms[id].matrix_stride = items < 3 ? items * N : + glsl_align(items * N, 16); + else + this->uniforms[id].matrix_stride = glsl_align(items * N, 16); + this->uniforms[id].row_major = row_major; + } else { + this->uniforms[id].matrix_stride = 0; + this->uniforms[id].row_major = false; + } + } else { + this->uniforms[id].block_index = -1; + this->uniforms[id].offset = -1; + this->uniforms[id].array_stride = -1; + this->uniforms[id].matrix_stride = -1; + this->uniforms[id].row_major = false; + } + + this->values += values_for_type(type); + } + + struct string_to_uint_map *map; + + struct gl_uniform_storage *uniforms; + unsigned next_sampler; + unsigned next_image; + unsigned next_subroutine; + + /** + * Field counter is used to take care that uniform structures + * with explicit locations get sequential locations. + */ + unsigned field_counter; + + /** + * Current variable being processed. + */ + ir_variable *current_var; + + /* Used to store the explicit location from current_var so that we can + * reuse the location field for storing the uniform slot id. + */ + int explicit_location; + + /* Stores total struct array elements including nested structs */ + unsigned record_array_count; + + /* Map for temporarily storing next sampler index when handling samplers in + * struct arrays. + */ + struct string_to_uint_map *record_next_sampler; + +public: + union gl_constant_value *values; + + gl_texture_index targets[MAX_SAMPLERS]; + + /** + * Mask of samplers used by the current shader stage. + */ + unsigned shader_samplers_used; + + /** + * Mask of samplers used by the current shader stage for shadows. + */ + unsigned shader_shadow_samplers; +}; + +/** + * Merges a uniform block into an array of uniform blocks that may or + * may not already contain a copy of it. + * + * Returns the index of the new block in the array. + */ +int +link_cross_validate_uniform_block(void *mem_ctx, + struct gl_uniform_block **linked_blocks, + unsigned int *num_linked_blocks, + struct gl_uniform_block *new_block) +{ + for (unsigned int i = 0; i < *num_linked_blocks; i++) { + struct gl_uniform_block *old_block = &(*linked_blocks)[i]; + + if (strcmp(old_block->Name, new_block->Name) == 0) + return link_uniform_blocks_are_compatible(old_block, new_block) + ? i : -1; + } + + *linked_blocks = reralloc(mem_ctx, *linked_blocks, + struct gl_uniform_block, + *num_linked_blocks + 1); + int linked_block_index = (*num_linked_blocks)++; + struct gl_uniform_block *linked_block = &(*linked_blocks)[linked_block_index]; + + memcpy(linked_block, new_block, sizeof(*new_block)); + linked_block->Uniforms = ralloc_array(*linked_blocks, + struct gl_uniform_buffer_variable, + linked_block->NumUniforms); + + memcpy(linked_block->Uniforms, + new_block->Uniforms, + sizeof(*linked_block->Uniforms) * linked_block->NumUniforms); + + for (unsigned int i = 0; i < linked_block->NumUniforms; i++) { + struct gl_uniform_buffer_variable *ubo_var = + &linked_block->Uniforms[i]; + + if (ubo_var->Name == ubo_var->IndexName) { + ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); + ubo_var->IndexName = ubo_var->Name; + } else { + ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); + ubo_var->IndexName = ralloc_strdup(*linked_blocks, ubo_var->IndexName); + } + } + + return linked_block_index; +} + +/** + * Walks the IR and update the references to uniform blocks in the + * ir_variables to point at linked shader's list (previously, they + * would point at the uniform block list in one of the pre-linked + * shaders). + */ +static void +link_update_uniform_buffer_variables(struct gl_shader *shader) +{ + foreach_in_list(ir_instruction, node, shader->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || !var->is_in_buffer_block()) + continue; + + assert(var->data.mode == ir_var_uniform || + var->data.mode == ir_var_shader_storage); + + if (var->is_interface_instance()) { + var->data.location = 0; + continue; + } + + bool found = false; + char sentinel = '\0'; + + if (var->type->is_record()) { + sentinel = '.'; + } else if (var->type->is_array() && (var->type->fields.array->is_array() + || var->type->without_array()->is_record())) { + sentinel = '['; + } + + const unsigned l = strlen(var->name); + for (unsigned i = 0; i < shader->NumBufferInterfaceBlocks; i++) { + for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i].NumUniforms; j++) { + if (sentinel) { + const char *begin = shader->BufferInterfaceBlocks[i].Uniforms[j].Name; + const char *end = strchr(begin, sentinel); + + if (end == NULL) + continue; + + if ((ptrdiff_t) l != (end - begin)) + continue; + + if (strncmp(var->name, begin, l) == 0) { + found = true; + var->data.location = j; + break; + } + } else if (!strcmp(var->name, + shader->BufferInterfaceBlocks[i].Uniforms[j].Name)) { + found = true; + var->data.location = j; + break; + } + } + if (found) + break; + } + assert(found); + } +} + +static void +link_set_image_access_qualifiers(struct gl_shader_program *prog, + gl_shader *sh, unsigned shader_stage, + ir_variable *var, const glsl_type *type, + char **name, size_t name_length) +{ + /* Handle arrays of arrays */ + if (type->is_array() && type->fields.array->is_array()) { + for (unsigned i = 0; i < type->length; i++) { + size_t new_length = name_length; + + /* Append the subscript to the current variable name */ + ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); + + link_set_image_access_qualifiers(prog, sh, shader_stage, var, + type->fields.array, name, + new_length); + } + } else { + unsigned id = 0; + bool found = prog->UniformHash->get(id, *name); + assert(found); + (void) found; + const gl_uniform_storage *storage = &prog->UniformStorage[id]; + const unsigned index = storage->opaque[shader_stage].index; + const GLenum access = (var->data.image_read_only ? GL_READ_ONLY : + var->data.image_write_only ? GL_WRITE_ONLY : + GL_READ_WRITE); + + for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j) + sh->ImageAccess[index + j] = access; + } +} + +/** + * Combine the hidden uniform hash map with the uniform hash map so that the + * hidden uniforms will be given indicies at the end of the uniform storage + * array. + */ +static void +assign_hidden_uniform_slot_id(const char *name, unsigned hidden_id, + void *closure) +{ + count_uniform_size *uniform_size = (count_uniform_size *) closure; + unsigned hidden_uniform_start = uniform_size->num_active_uniforms - + uniform_size->num_hidden_uniforms; + + uniform_size->map->put(hidden_uniform_start + hidden_id, name); +} + +void +link_assign_uniform_locations(struct gl_shader_program *prog, + unsigned int boolean_true) +{ + ralloc_free(prog->UniformStorage); + prog->UniformStorage = NULL; + prog->NumUniformStorage = 0; + + if (prog->UniformHash != NULL) { + prog->UniformHash->clear(); + } else { + prog->UniformHash = new string_to_uint_map; + } + + /* First pass: Count the uniform resources used by the user-defined + * uniforms. While this happens, each active uniform will have an index + * assigned to it. + * + * Note: this is *NOT* the index that is returned to the application by + * glGetUniformLocation. + */ + struct string_to_uint_map *hiddenUniforms = new string_to_uint_map; + count_uniform_size uniform_size(prog->UniformHash, hiddenUniforms); + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh == NULL) + continue; + + /* Uniforms that lack an initializer in the shader code have an initial + * value of zero. This includes sampler uniforms. + * + * Page 24 (page 30 of the PDF) of the GLSL 1.20 spec says: + * + * "The link time initial value is either the value of the variable's + * initializer, if present, or 0 if no initializer is present. Sampler + * types cannot have initializers." + */ + memset(sh->SamplerUnits, 0, sizeof(sh->SamplerUnits)); + memset(sh->ImageUnits, 0, sizeof(sh->ImageUnits)); + + link_update_uniform_buffer_variables(sh); + + /* Reset various per-shader target counts. + */ + uniform_size.start_shader(); + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_shader_storage)) + continue; + + uniform_size.process(var); + } + + sh->num_samplers = uniform_size.num_shader_samplers; + sh->NumImages = uniform_size.num_shader_images; + sh->num_uniform_components = uniform_size.num_shader_uniform_components; + sh->num_combined_uniform_components = sh->num_uniform_components; + + for (unsigned i = 0; i < sh->NumBufferInterfaceBlocks; i++) { + if (!sh->BufferInterfaceBlocks[i].IsShaderStorage) { + sh->num_combined_uniform_components += + sh->BufferInterfaceBlocks[i].UniformBufferSize / 4; + } + } + } + + const unsigned num_uniforms = uniform_size.num_active_uniforms; + const unsigned num_data_slots = uniform_size.num_values; + const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms; + + /* assign hidden uniforms a slot id */ + hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size); + delete hiddenUniforms; + + /* On the outside chance that there were no uniforms, bail out. + */ + if (num_uniforms == 0) + return; + + struct gl_uniform_storage *uniforms = + rzalloc_array(prog, struct gl_uniform_storage, num_uniforms); + union gl_constant_value *data = + rzalloc_array(uniforms, union gl_constant_value, num_data_slots); +#ifndef NDEBUG + union gl_constant_value *data_end = &data[num_data_slots]; +#endif + + parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data); + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + parcel.start_shader((gl_shader_stage)i); + + foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_shader_storage)) + continue; + + parcel.set_and_process(prog, var); + } + + prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used; + prog->_LinkedShaders[i]->shadow_samplers = parcel.shader_shadow_samplers; + + STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) == + sizeof(parcel.targets)); + memcpy(prog->_LinkedShaders[i]->SamplerTargets, parcel.targets, + sizeof(prog->_LinkedShaders[i]->SamplerTargets)); + } + + /* Reserve all the explicit locations of the active uniforms. */ + for (unsigned i = 0; i < num_uniforms; i++) { + if (uniforms[i].type->is_subroutine() || + uniforms[i].is_shader_storage) + continue; + + if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) { + /* How many new entries for this uniform? */ + const unsigned entries = MAX2(1, uniforms[i].array_elements); + + /* Set remap table entries point to correct gl_uniform_storage. */ + for (unsigned j = 0; j < entries; j++) { + unsigned element_loc = uniforms[i].remap_location + j; + assert(prog->UniformRemapTable[element_loc] == + INACTIVE_UNIFORM_EXPLICIT_LOCATION); + prog->UniformRemapTable[element_loc] = &uniforms[i]; + } + } + } + + /* Reserve locations for rest of the uniforms. */ + for (unsigned i = 0; i < num_uniforms; i++) { + + if (uniforms[i].type->is_subroutine() || + uniforms[i].is_shader_storage) + continue; + + /* Built-in uniforms should not get any location. */ + if (uniforms[i].builtin) + continue; + + /* Explicit ones have been set already. */ + if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) + continue; + + /* how many new entries for this uniform? */ + const unsigned entries = MAX2(1, uniforms[i].array_elements); + + /* resize remap table to fit new entries */ + prog->UniformRemapTable = + reralloc(prog, + prog->UniformRemapTable, + gl_uniform_storage *, + prog->NumUniformRemapTable + entries); + + /* set pointers for this uniform */ + for (unsigned j = 0; j < entries; j++) + prog->UniformRemapTable[prog->NumUniformRemapTable+j] = &uniforms[i]; + + /* set the base location in remap table for the uniform */ + uniforms[i].remap_location = prog->NumUniformRemapTable; + + prog->NumUniformRemapTable += entries; + } + + /* Reserve all the explicit locations of the active subroutine uniforms. */ + for (unsigned i = 0; i < num_uniforms; i++) { + if (!uniforms[i].type->is_subroutine()) + continue; + + if (uniforms[i].remap_location == UNMAPPED_UNIFORM_LOC) + continue; + + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + struct gl_shader *sh = prog->_LinkedShaders[j]; + if (!sh) + continue; + + if (!uniforms[i].opaque[j].active) + continue; + + /* How many new entries for this uniform? */ + const unsigned entries = MAX2(1, uniforms[i].array_elements); + + /* Set remap table entries point to correct gl_uniform_storage. */ + for (unsigned k = 0; k < entries; k++) { + unsigned element_loc = uniforms[i].remap_location + k; + assert(sh->SubroutineUniformRemapTable[element_loc] == + INACTIVE_UNIFORM_EXPLICIT_LOCATION); + sh->SubroutineUniformRemapTable[element_loc] = &uniforms[i]; + } + } + } + + /* reserve subroutine locations */ + for (unsigned i = 0; i < num_uniforms; i++) { + + if (!uniforms[i].type->is_subroutine()) + continue; + const unsigned entries = MAX2(1, uniforms[i].array_elements); + + if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) + continue; + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + struct gl_shader *sh = prog->_LinkedShaders[j]; + if (!sh) + continue; + + if (!uniforms[i].opaque[j].active) + continue; + + sh->SubroutineUniformRemapTable = + reralloc(sh, + sh->SubroutineUniformRemapTable, + gl_uniform_storage *, + sh->NumSubroutineUniformRemapTable + entries); + + for (unsigned k = 0; k < entries; k++) + sh->SubroutineUniformRemapTable[sh->NumSubroutineUniformRemapTable + k] = &uniforms[i]; + uniforms[i].remap_location = sh->NumSubroutineUniformRemapTable; + sh->NumSubroutineUniformRemapTable += entries; + } + } + +#ifndef NDEBUG + for (unsigned i = 0; i < num_uniforms; i++) { + assert(uniforms[i].storage != NULL || uniforms[i].builtin); + } + + assert(parcel.values == data_end); +#endif + + prog->NumUniformStorage = num_uniforms; + prog->NumHiddenUniforms = hidden_uniforms; + prog->UniformStorage = uniforms; + + /** + * Scan the program for image uniforms and store image unit access + * information into the gl_shader data structure. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh == NULL) + continue; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + + if (var && var->data.mode == ir_var_uniform && + var->type->contains_image()) { + char *name_copy = ralloc_strdup(NULL, var->name); + link_set_image_access_qualifiers(prog, sh, i, var, var->type, + &name_copy, strlen(var->name)); + ralloc_free(name_copy); + } + } + } + + link_set_uniform_initializers(prog, boolean_true); + + return; +} diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp new file mode 100644 index 0000000..264b69c --- /dev/null +++ b/src/compiler/glsl/link_varyings.cpp @@ -0,0 +1,1888 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file link_varyings.cpp + * + * Linker functions related specifically to linking varyings between shader + * stages. + */ + + +#include "main/mtypes.h" +#include "glsl_symbol_table.h" +#include "glsl_parser_extras.h" +#include "ir_optimization.h" +#include "linker.h" +#include "link_varyings.h" +#include "main/macros.h" +#include "program/hash_table.h" +#include "program.h" + + +/** + * Get the varying type stripped of the outermost array if we're processing + * a stage whose varyings are arrays indexed by a vertex number (such as + * geometry shader inputs). + */ +static const glsl_type * +get_varying_type(const ir_variable *var, gl_shader_stage stage) +{ + const glsl_type *type = var->type; + + if (!var->data.patch && + ((var->data.mode == ir_var_shader_out && + stage == MESA_SHADER_TESS_CTRL) || + (var->data.mode == ir_var_shader_in && + (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL || + stage == MESA_SHADER_GEOMETRY)))) { + assert(type->is_array()); + type = type->fields.array; + } + + return type; +} + +/** + * Validate the types and qualifiers of an output from one stage against the + * matching input to another stage. + */ +static void +cross_validate_types_and_qualifiers(struct gl_shader_program *prog, + const ir_variable *input, + const ir_variable *output, + gl_shader_stage consumer_stage, + gl_shader_stage producer_stage) +{ + /* Check that the types match between stages. + */ + const glsl_type *type_to_match = input->type; + + /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ + const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX && + consumer_stage != MESA_SHADER_FRAGMENT) || + consumer_stage == MESA_SHADER_GEOMETRY; + if (extra_array_level) { + assert(type_to_match->is_array()); + type_to_match = type_to_match->fields.array; + } + + if (type_to_match != output->type) { + /* There is a bit of a special case for gl_TexCoord. This + * built-in is unsized by default. Applications that variable + * access it must redeclare it with a size. There is some + * language in the GLSL spec that implies the fragment shader + * and vertex shader do not have to agree on this size. Other + * driver behave this way, and one or two applications seem to + * rely on it. + * + * Neither declaration needs to be modified here because the array + * sizes are fixed later when update_array_sizes is called. + * + * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec: + * + * "Unlike user-defined varying variables, the built-in + * varying variables don't have a strict one-to-one + * correspondence between the vertex language and the + * fragment language." + */ + if (!output->type->is_array() || !is_gl_identifier(output->name)) { + linker_error(prog, + "%s shader output `%s' declared as type `%s', " + "but %s shader input declared as type `%s'\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + output->type->name, + _mesa_shader_stage_to_string(consumer_stage), + input->type->name); + return; + } + } + + /* Check that all of the qualifiers match between stages. + */ + if (input->data.centroid != output->data.centroid) { + linker_error(prog, + "%s shader output `%s' %s centroid qualifier, " + "but %s shader input %s centroid qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + (output->data.centroid) ? "has" : "lacks", + _mesa_shader_stage_to_string(consumer_stage), + (input->data.centroid) ? "has" : "lacks"); + return; + } + + if (input->data.sample != output->data.sample) { + linker_error(prog, + "%s shader output `%s' %s sample qualifier, " + "but %s shader input %s sample qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + (output->data.sample) ? "has" : "lacks", + _mesa_shader_stage_to_string(consumer_stage), + (input->data.sample) ? "has" : "lacks"); + return; + } + + if (input->data.patch != output->data.patch) { + linker_error(prog, + "%s shader output `%s' %s patch qualifier, " + "but %s shader input %s patch qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + (output->data.patch) ? "has" : "lacks", + _mesa_shader_stage_to_string(consumer_stage), + (input->data.patch) ? "has" : "lacks"); + return; + } + + if (!prog->IsES && input->data.invariant != output->data.invariant) { + linker_error(prog, + "%s shader output `%s' %s invariant qualifier, " + "but %s shader input %s invariant qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + (output->data.invariant) ? "has" : "lacks", + _mesa_shader_stage_to_string(consumer_stage), + (input->data.invariant) ? "has" : "lacks"); + return; + } + + /* GLSL >= 4.40 removes text requiring interpolation qualifiers + * to match cross stage, they must only match within the same stage. + * + * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec: + * + * "It is a link-time error if, within the same stage, the interpolation + * qualifiers of variables of the same name do not match. + * + */ + if (input->data.interpolation != output->data.interpolation && + prog->Version < 440) { + linker_error(prog, + "%s shader output `%s' specifies %s " + "interpolation qualifier, " + "but %s shader input specifies %s " + "interpolation qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + interpolation_string(output->data.interpolation), + _mesa_shader_stage_to_string(consumer_stage), + interpolation_string(input->data.interpolation)); + return; + } +} + +/** + * Validate front and back color outputs against single color input + */ +static void +cross_validate_front_and_back_color(struct gl_shader_program *prog, + const ir_variable *input, + const ir_variable *front_color, + const ir_variable *back_color, + gl_shader_stage consumer_stage, + gl_shader_stage producer_stage) +{ + if (front_color != NULL && front_color->data.assigned) + cross_validate_types_and_qualifiers(prog, input, front_color, + consumer_stage, producer_stage); + + if (back_color != NULL && back_color->data.assigned) + cross_validate_types_and_qualifiers(prog, input, back_color, + consumer_stage, producer_stage); +} + +/** + * Validate that outputs from one stage match inputs of another + */ +void +cross_validate_outputs_to_inputs(struct gl_shader_program *prog, + gl_shader *producer, gl_shader *consumer) +{ + glsl_symbol_table parameters; + ir_variable *explicit_locations[MAX_VARYING] = { NULL, }; + + /* Find all shader outputs in the "producer" stage. + */ + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_shader_out)) + continue; + + if (!var->data.explicit_location + || var->data.location < VARYING_SLOT_VAR0) + parameters.add_variable(var); + else { + /* User-defined varyings with explicit locations are handled + * differently because they do not need to have matching names. + */ + const unsigned idx = var->data.location - VARYING_SLOT_VAR0; + + if (explicit_locations[idx] != NULL) { + linker_error(prog, + "%s shader has multiple outputs explicitly " + "assigned to location %d\n", + _mesa_shader_stage_to_string(producer->Stage), + idx); + return; + } + + explicit_locations[idx] = var; + } + } + + + /* Find all shader inputs in the "consumer" stage. Any variables that have + * matching outputs already in the symbol table must have the same type and + * qualifiers. + * + * Exception: if the consumer is the geometry shader, then the inputs + * should be arrays and the type of the array element should match the type + * of the corresponding producer output. + */ + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const input = node->as_variable(); + + if ((input == NULL) || (input->data.mode != ir_var_shader_in)) + continue; + + if (strcmp(input->name, "gl_Color") == 0 && input->data.used) { + const ir_variable *const front_color = + parameters.get_variable("gl_FrontColor"); + + const ir_variable *const back_color = + parameters.get_variable("gl_BackColor"); + + cross_validate_front_and_back_color(prog, input, + front_color, back_color, + consumer->Stage, producer->Stage); + } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) { + const ir_variable *const front_color = + parameters.get_variable("gl_FrontSecondaryColor"); + + const ir_variable *const back_color = + parameters.get_variable("gl_BackSecondaryColor"); + + cross_validate_front_and_back_color(prog, input, + front_color, back_color, + consumer->Stage, producer->Stage); + } else { + /* The rules for connecting inputs and outputs change in the presence + * of explicit locations. In this case, we no longer care about the + * names of the variables. Instead, we care only about the + * explicitly assigned location. + */ + ir_variable *output = NULL; + if (input->data.explicit_location + && input->data.location >= VARYING_SLOT_VAR0) { + output = explicit_locations[input->data.location - VARYING_SLOT_VAR0]; + + if (output == NULL) { + linker_error(prog, + "%s shader input `%s' with explicit location " + "has no matching output\n", + _mesa_shader_stage_to_string(consumer->Stage), + input->name); + } + } else { + output = parameters.get_variable(input->name); + } + + if (output != NULL) { + cross_validate_types_and_qualifiers(prog, input, output, + consumer->Stage, producer->Stage); + } else { + /* Check for input vars with unmatched output vars in prev stage + * taking into account that interface blocks could have a matching + * output but with different name, so we ignore them. + */ + assert(!input->data.assigned); + if (input->data.used && !input->get_interface_type() && + !input->data.explicit_location && !prog->SeparateShader) + linker_error(prog, + "%s shader input `%s' " + "has no matching output in the previous stage\n", + _mesa_shader_stage_to_string(consumer->Stage), + input->name); + } + } + } +} + +/** + * Demote shader inputs and outputs that are not used in other stages, and + * remove them via dead code elimination. + */ +void +remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, + gl_shader *sh, + enum ir_variable_mode mode) +{ + if (is_separate_shader_object) + return; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != int(mode))) + continue; + + /* A shader 'in' or 'out' variable is only really an input or output if + * its value is used by other shader stages. This will cause the + * variable to have a location assigned. + */ + if (var->data.is_unmatched_generic_inout) { + assert(var->data.mode != ir_var_temporary); + var->data.mode = ir_var_auto; + } + } + + /* Eliminate code that is now dead due to unused inputs/outputs being + * demoted. + */ + while (do_dead_code(sh->ir, false)) + ; + +} + +/** + * Initialize this object based on a string that was passed to + * glTransformFeedbackVaryings. + * + * If the input is mal-formed, this call still succeeds, but it sets + * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var() + * will fail to find any matching variable. + */ +void +tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx, + const char *input) +{ + /* We don't have to be pedantic about what is a valid GLSL variable name, + * because any variable with an invalid name can't exist in the IR anyway. + */ + + this->location = -1; + this->orig_name = input; + this->lowered_builtin_array_variable = none; + this->skip_components = 0; + this->next_buffer_separator = false; + this->matched_candidate = NULL; + this->stream_id = 0; + + if (ctx->Extensions.ARB_transform_feedback3) { + /* Parse gl_NextBuffer. */ + if (strcmp(input, "gl_NextBuffer") == 0) { + this->next_buffer_separator = true; + return; + } + + /* Parse gl_SkipComponents. */ + if (strcmp(input, "gl_SkipComponents1") == 0) + this->skip_components = 1; + else if (strcmp(input, "gl_SkipComponents2") == 0) + this->skip_components = 2; + else if (strcmp(input, "gl_SkipComponents3") == 0) + this->skip_components = 3; + else if (strcmp(input, "gl_SkipComponents4") == 0) + this->skip_components = 4; + + if (this->skip_components) + return; + } + + /* Parse a declaration. */ + const char *base_name_end; + long subscript = parse_program_resource_name(input, &base_name_end); + this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input); + if (this->var_name == NULL) { + _mesa_error_no_memory(__func__); + return; + } + + if (subscript >= 0) { + this->array_subscript = subscript; + this->is_subscripted = true; + } else { + this->is_subscripted = false; + } + + /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this + * class must behave specially to account for the fact that gl_ClipDistance + * is converted from a float[8] to a vec4[2]. + */ + if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerClipDistance && + strcmp(this->var_name, "gl_ClipDistance") == 0) { + this->lowered_builtin_array_variable = clip_distance; + } + + if (ctx->Const.LowerTessLevel && + (strcmp(this->var_name, "gl_TessLevelOuter") == 0)) + this->lowered_builtin_array_variable = tess_level_outer; + if (ctx->Const.LowerTessLevel && + (strcmp(this->var_name, "gl_TessLevelInner") == 0)) + this->lowered_builtin_array_variable = tess_level_inner; +} + + +/** + * Determine whether two tfeedback_decl objects refer to the same variable and + * array index (if applicable). + */ +bool +tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y) +{ + assert(x.is_varying() && y.is_varying()); + + if (strcmp(x.var_name, y.var_name) != 0) + return false; + if (x.is_subscripted != y.is_subscripted) + return false; + if (x.is_subscripted && x.array_subscript != y.array_subscript) + return false; + return true; +} + + +/** + * Assign a location and stream ID for this tfeedback_decl object based on the + * transform feedback candidate found by find_candidate. + * + * If an error occurs, the error is reported through linker_error() and false + * is returned. + */ +bool +tfeedback_decl::assign_location(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + assert(this->is_varying()); + + unsigned fine_location + = this->matched_candidate->toplevel_var->data.location * 4 + + this->matched_candidate->toplevel_var->data.location_frac + + this->matched_candidate->offset; + + if (this->matched_candidate->type->is_array()) { + /* Array variable */ + const unsigned matrix_cols = + this->matched_candidate->type->fields.array->matrix_columns; + const unsigned vector_elements = + this->matched_candidate->type->fields.array->vector_elements; + const unsigned dmul = + this->matched_candidate->type->fields.array->is_double() ? 2 : 1; + unsigned actual_array_size; + switch (this->lowered_builtin_array_variable) { + case clip_distance: + actual_array_size = prog->LastClipDistanceArraySize; + break; + case tess_level_outer: + actual_array_size = 4; + break; + case tess_level_inner: + actual_array_size = 2; + break; + case none: + default: + actual_array_size = this->matched_candidate->type->array_size(); + break; + } + + if (this->is_subscripted) { + /* Check array bounds. */ + if (this->array_subscript >= actual_array_size) { + linker_error(prog, "Transform feedback varying %s has index " + "%i, but the array size is %u.", + this->orig_name, this->array_subscript, + actual_array_size); + return false; + } + unsigned array_elem_size = this->lowered_builtin_array_variable ? + 1 : vector_elements * matrix_cols * dmul; + fine_location += array_elem_size * this->array_subscript; + this->size = 1; + } else { + this->size = actual_array_size; + } + this->vector_elements = vector_elements; + this->matrix_columns = matrix_cols; + if (this->lowered_builtin_array_variable) + this->type = GL_FLOAT; + else + this->type = this->matched_candidate->type->fields.array->gl_type; + } else { + /* Regular variable (scalar, vector, or matrix) */ + if (this->is_subscripted) { + linker_error(prog, "Transform feedback varying %s requested, " + "but %s is not an array.", + this->orig_name, this->var_name); + return false; + } + this->size = 1; + this->vector_elements = this->matched_candidate->type->vector_elements; + this->matrix_columns = this->matched_candidate->type->matrix_columns; + this->type = this->matched_candidate->type->gl_type; + } + this->location = fine_location / 4; + this->location_frac = fine_location % 4; + + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * the total number of components to capture in any varying + * variable in <varyings> is greater than the constant + * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the + * buffer mode is SEPARATE_ATTRIBS_EXT; + */ + if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && + this->num_components() > + ctx->Const.MaxTransformFeedbackSeparateComponents) { + linker_error(prog, "Transform feedback varying %s exceeds " + "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.", + this->orig_name); + return false; + } + + /* Only transform feedback varyings can be assigned to non-zero streams, + * so assign the stream id here. + */ + this->stream_id = this->matched_candidate->toplevel_var->data.stream; + + return true; +} + + +unsigned +tfeedback_decl::get_num_outputs() const +{ + if (!this->is_varying()) { + return 0; + } + return (this->num_components() + this->location_frac + 3)/4; +} + + +/** + * Update gl_transform_feedback_info to reflect this tfeedback_decl. + * + * If an error occurs, the error is reported through linker_error() and false + * is returned. + */ +bool +tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, + struct gl_transform_feedback_info *info, + unsigned buffer, const unsigned max_outputs) const +{ + assert(!this->next_buffer_separator); + + /* Handle gl_SkipComponents. */ + if (this->skip_components) { + info->BufferStride[buffer] += this->skip_components; + return true; + } + + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * the total number of components to capture is greater than + * the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT + * and the buffer mode is INTERLEAVED_ATTRIBS_EXT. + */ + if (prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS && + info->BufferStride[buffer] + this->num_components() > + ctx->Const.MaxTransformFeedbackInterleavedComponents) { + linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " + "limit has been exceeded."); + return false; + } + + unsigned location = this->location; + unsigned location_frac = this->location_frac; + unsigned num_components = this->num_components(); + while (num_components > 0) { + unsigned output_size = MIN2(num_components, 4 - location_frac); + assert(info->NumOutputs < max_outputs); + info->Outputs[info->NumOutputs].ComponentOffset = location_frac; + info->Outputs[info->NumOutputs].OutputRegister = location; + info->Outputs[info->NumOutputs].NumComponents = output_size; + info->Outputs[info->NumOutputs].StreamId = stream_id; + info->Outputs[info->NumOutputs].OutputBuffer = buffer; + info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer]; + ++info->NumOutputs; + info->BufferStride[buffer] += output_size; + info->BufferStream[buffer] = this->stream_id; + num_components -= output_size; + location++; + location_frac = 0; + } + + info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, this->orig_name); + info->Varyings[info->NumVarying].Type = this->type; + info->Varyings[info->NumVarying].Size = this->size; + info->NumVarying++; + + return true; +} + + +const tfeedback_candidate * +tfeedback_decl::find_candidate(gl_shader_program *prog, + hash_table *tfeedback_candidates) +{ + const char *name = this->var_name; + switch (this->lowered_builtin_array_variable) { + case none: + name = this->var_name; + break; + case clip_distance: + name = "gl_ClipDistanceMESA"; + break; + case tess_level_outer: + name = "gl_TessLevelOuterMESA"; + break; + case tess_level_inner: + name = "gl_TessLevelInnerMESA"; + break; + } + this->matched_candidate = (const tfeedback_candidate *) + hash_table_find(tfeedback_candidates, name); + if (!this->matched_candidate) { + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * any variable name specified in the <varyings> array is not + * declared as an output in the geometry shader (if present) or + * the vertex shader (if no geometry shader is present); + */ + linker_error(prog, "Transform feedback varying %s undeclared.", + this->orig_name); + } + return this->matched_candidate; +} + + +/** + * Parse all the transform feedback declarations that were passed to + * glTransformFeedbackVaryings() and store them in tfeedback_decl objects. + * + * If an error occurs, the error is reported through linker_error() and false + * is returned. + */ +bool +parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, + const void *mem_ctx, unsigned num_names, + char **varying_names, tfeedback_decl *decls) +{ + for (unsigned i = 0; i < num_names; ++i) { + decls[i].init(ctx, mem_ctx, varying_names[i]); + + if (!decls[i].is_varying()) + continue; + + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * any two entries in the <varyings> array specify the same varying + * variable; + * + * We interpret this to mean "any two entries in the <varyings> array + * specify the same varying variable and array index", since transform + * feedback of arrays would be useless otherwise. + */ + for (unsigned j = 0; j < i; ++j) { + if (!decls[j].is_varying()) + continue; + + if (tfeedback_decl::is_same(decls[i], decls[j])) { + linker_error(prog, "Transform feedback varying %s specified " + "more than once.", varying_names[i]); + return false; + } + } + } + return true; +} + + +/** + * Store transform feedback location assignments into + * prog->LinkedTransformFeedback based on the data stored in tfeedback_decls. + * + * If an error occurs, the error is reported through linker_error() and false + * is returned. + */ +bool +store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls) +{ + bool separate_attribs_mode = + prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS; + + ralloc_free(prog->LinkedTransformFeedback.Varyings); + ralloc_free(prog->LinkedTransformFeedback.Outputs); + + memset(&prog->LinkedTransformFeedback, 0, + sizeof(prog->LinkedTransformFeedback)); + + prog->LinkedTransformFeedback.Varyings = + rzalloc_array(prog, + struct gl_transform_feedback_varying_info, + num_tfeedback_decls); + + unsigned num_outputs = 0; + for (unsigned i = 0; i < num_tfeedback_decls; ++i) + num_outputs += tfeedback_decls[i].get_num_outputs(); + + prog->LinkedTransformFeedback.Outputs = + rzalloc_array(prog, + struct gl_transform_feedback_output, + num_outputs); + + unsigned num_buffers = 0; + + if (separate_attribs_mode) { + /* GL_SEPARATE_ATTRIBS */ + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { + if (!tfeedback_decls[i].store(ctx, prog, &prog->LinkedTransformFeedback, + num_buffers, num_outputs)) + return false; + + num_buffers++; + } + } + else { + /* GL_INVERLEAVED_ATTRIBS */ + int buffer_stream_id = -1; + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { + if (tfeedback_decls[i].is_next_buffer_separator()) { + num_buffers++; + buffer_stream_id = -1; + continue; + } else if (buffer_stream_id == -1) { + /* First varying writing to this buffer: remember its stream */ + buffer_stream_id = (int) tfeedback_decls[i].get_stream_id(); + } else if (buffer_stream_id != + (int) tfeedback_decls[i].get_stream_id()) { + /* Varying writes to the same buffer from a different stream */ + linker_error(prog, + "Transform feedback can't capture varyings belonging " + "to different vertex streams in a single buffer. " + "Varying %s writes to buffer from stream %u, other " + "varyings in the same buffer write from stream %u.", + tfeedback_decls[i].name(), + tfeedback_decls[i].get_stream_id(), + buffer_stream_id); + return false; + } + + if (!tfeedback_decls[i].store(ctx, prog, + &prog->LinkedTransformFeedback, + num_buffers, num_outputs)) + return false; + } + num_buffers++; + } + + assert(prog->LinkedTransformFeedback.NumOutputs == num_outputs); + + prog->LinkedTransformFeedback.NumBuffers = num_buffers; + return true; +} + +namespace { + +/** + * Data structure recording the relationship between outputs of one shader + * stage (the "producer") and inputs of another (the "consumer"). + */ +class varying_matches +{ +public: + varying_matches(bool disable_varying_packing, + gl_shader_stage producer_stage, + gl_shader_stage consumer_stage); + ~varying_matches(); + void record(ir_variable *producer_var, ir_variable *consumer_var); + unsigned assign_locations(struct gl_shader_program *prog, + uint64_t reserved_slots, bool separate_shader); + void store_locations() const; + +private: + /** + * If true, this driver disables varying packing, so all varyings need to + * be aligned on slot boundaries, and take up a number of slots equal to + * their number of matrix columns times their array size. + */ + const bool disable_varying_packing; + + /** + * Enum representing the order in which varyings are packed within a + * packing class. + * + * Currently we pack vec4's first, then vec2's, then scalar values, then + * vec3's. This order ensures that the only vectors that are at risk of + * having to be "double parked" (split between two adjacent varying slots) + * are the vec3's. + */ + enum packing_order_enum { + PACKING_ORDER_VEC4, + PACKING_ORDER_VEC2, + PACKING_ORDER_SCALAR, + PACKING_ORDER_VEC3, + }; + + static unsigned compute_packing_class(const ir_variable *var); + static packing_order_enum compute_packing_order(const ir_variable *var); + static int match_comparator(const void *x_generic, const void *y_generic); + + /** + * Structure recording the relationship between a single producer output + * and a single consumer input. + */ + struct match { + /** + * Packing class for this varying, computed by compute_packing_class(). + */ + unsigned packing_class; + + /** + * Packing order for this varying, computed by compute_packing_order(). + */ + packing_order_enum packing_order; + unsigned num_components; + + /** + * The output variable in the producer stage. + */ + ir_variable *producer_var; + + /** + * The input variable in the consumer stage. + */ + ir_variable *consumer_var; + + /** + * The location which has been assigned for this varying. This is + * expressed in multiples of a float, with the first generic varying + * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the + * value 0. + */ + unsigned generic_location; + } *matches; + + /** + * The number of elements in the \c matches array that are currently in + * use. + */ + unsigned num_matches; + + /** + * The number of elements that were set aside for the \c matches array when + * it was allocated. + */ + unsigned matches_capacity; + + gl_shader_stage producer_stage; + gl_shader_stage consumer_stage; +}; + +} /* anonymous namespace */ + +varying_matches::varying_matches(bool disable_varying_packing, + gl_shader_stage producer_stage, + gl_shader_stage consumer_stage) + : disable_varying_packing(disable_varying_packing), + producer_stage(producer_stage), + consumer_stage(consumer_stage) +{ + /* Note: this initial capacity is rather arbitrarily chosen to be large + * enough for many cases without wasting an unreasonable amount of space. + * varying_matches::record() will resize the array if there are more than + * this number of varyings. + */ + this->matches_capacity = 8; + this->matches = (match *) + malloc(sizeof(*this->matches) * this->matches_capacity); + this->num_matches = 0; +} + + +varying_matches::~varying_matches() +{ + free(this->matches); +} + + +/** + * Record the given producer/consumer variable pair in the list of variables + * that should later be assigned locations. + * + * It is permissible for \c consumer_var to be NULL (this happens if a + * variable is output by the producer and consumed by transform feedback, but + * not consumed by the consumer). + * + * If \c producer_var has already been paired up with a consumer_var, or + * producer_var is part of fixed pipeline functionality (and hence already has + * a location assigned), this function has no effect. + * + * Note: as a side effect this function may change the interpolation type of + * \c producer_var, but only when the change couldn't possibly affect + * rendering. + */ +void +varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) +{ + assert(producer_var != NULL || consumer_var != NULL); + + if ((producer_var && (!producer_var->data.is_unmatched_generic_inout || + producer_var->data.explicit_location)) || + (consumer_var && (!consumer_var->data.is_unmatched_generic_inout || + consumer_var->data.explicit_location))) { + /* Either a location already exists for this variable (since it is part + * of fixed functionality), or it has already been recorded as part of a + * previous match. + */ + return; + } + + if ((consumer_var == NULL && producer_var->type->contains_integer()) || + (consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) { + /* Since this varying is not being consumed by the fragment shader, its + * interpolation type varying cannot possibly affect rendering. + * Also, this variable is non-flat and is (or contains) an integer. + * If the consumer stage is unknown, don't modify the interpolation + * type as it could affect rendering later with separate shaders. + * + * lower_packed_varyings requires all integer varyings to flat, + * regardless of where they appear. We can trivially satisfy that + * requirement by changing the interpolation type to flat here. + */ + if (producer_var) { + producer_var->data.centroid = false; + producer_var->data.sample = false; + producer_var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + + if (consumer_var) { + consumer_var->data.centroid = false; + consumer_var->data.sample = false; + consumer_var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + } + + if (this->num_matches == this->matches_capacity) { + this->matches_capacity *= 2; + this->matches = (match *) + realloc(this->matches, + sizeof(*this->matches) * this->matches_capacity); + } + + const ir_variable *const var = (producer_var != NULL) + ? producer_var : consumer_var; + const gl_shader_stage stage = (producer_var != NULL) + ? producer_stage : consumer_stage; + const glsl_type *type = get_varying_type(var, stage); + + this->matches[this->num_matches].packing_class + = this->compute_packing_class(var); + this->matches[this->num_matches].packing_order + = this->compute_packing_order(var); + if (this->disable_varying_packing) { + unsigned slots = type->count_attribute_slots(false); + this->matches[this->num_matches].num_components = slots * 4; + } else { + this->matches[this->num_matches].num_components + = type->component_slots(); + } + this->matches[this->num_matches].producer_var = producer_var; + this->matches[this->num_matches].consumer_var = consumer_var; + this->num_matches++; + if (producer_var) + producer_var->data.is_unmatched_generic_inout = 0; + if (consumer_var) + consumer_var->data.is_unmatched_generic_inout = 0; +} + + +/** + * Choose locations for all of the variable matches that were previously + * passed to varying_matches::record(). + */ +unsigned +varying_matches::assign_locations(struct gl_shader_program *prog, + uint64_t reserved_slots, + bool separate_shader) +{ + /* We disable varying sorting for separate shader programs for the + * following reasons: + * + * 1/ All programs must sort the code in the same order to guarantee the + * interface matching. However varying_matches::record() will change the + * interpolation qualifier of some stages. + * + * 2/ GLSL version 4.50 removes the matching constrain on the interpolation + * qualifier. + * + * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec: + * + * "The type and presence of interpolation qualifiers of variables with + * the same name declared in all linked shaders for the same cross-stage + * interface must match, otherwise the link command will fail. + * + * When comparing an output from one stage to an input of a subsequent + * stage, the input and output don't match if their interpolation + * qualifiers (or lack thereof) are not the same." + * + * "It is a link-time error if, within the same stage, the interpolation + * qualifiers of variables of the same name do not match." + */ + if (!separate_shader) { + /* Sort varying matches into an order that makes them easy to pack. */ + qsort(this->matches, this->num_matches, sizeof(*this->matches), + &varying_matches::match_comparator); + } + + unsigned generic_location = 0; + unsigned generic_patch_location = MAX_VARYING*4; + + for (unsigned i = 0; i < this->num_matches; i++) { + unsigned *location = &generic_location; + + const ir_variable *var; + const glsl_type *type; + bool is_vertex_input = false; + if (matches[i].consumer_var) { + var = matches[i].consumer_var; + type = get_varying_type(var, consumer_stage); + if (consumer_stage == MESA_SHADER_VERTEX) + is_vertex_input = true; + } else { + var = matches[i].producer_var; + type = get_varying_type(var, producer_stage); + } + + if (var->data.patch) + location = &generic_patch_location; + + /* Advance to the next slot if this varying has a different packing + * class than the previous one, and we're not already on a slot + * boundary. + */ + if (i > 0 && + this->matches[i - 1].packing_class + != this->matches[i].packing_class) { + *location = ALIGN(*location, 4); + } + + unsigned num_elements = type->count_attribute_slots(is_vertex_input); + unsigned slot_end = this->disable_varying_packing ? 4 : + type->without_array()->vector_elements; + slot_end += *location - 1; + + /* FIXME: We could be smarter in the below code and loop back over + * trying to fill any locations that we skipped because we couldn't pack + * the varying between an explicit location. For now just let the user + * hit the linking error if we run out of room and suggest they use + * explicit locations. + */ + for (unsigned j = 0; j < num_elements; j++) { + while ((slot_end < MAX_VARYING * 4u) && + ((reserved_slots & (UINT64_C(1) << *location / 4u) || + (reserved_slots & (UINT64_C(1) << slot_end / 4u))))) { + + *location = ALIGN(*location + 1, 4); + slot_end = *location; + + /* reset the counter and try again */ + j = 0; + } + + /* Increase the slot to make sure there is enough room for next + * array element. + */ + if (this->disable_varying_packing) + slot_end += 4; + else + slot_end += type->without_array()->vector_elements; + } + + if (!var->data.patch && *location >= MAX_VARYING * 4u) { + linker_error(prog, "insufficient contiguous locations available for " + "%s it is possible an array or struct could not be " + "packed between varyings with explicit locations. Try " + "using an explicit location for arrays and structs.", + var->name); + } + + this->matches[i].generic_location = *location; + + *location += this->matches[i].num_components; + } + + return (generic_location + 3) / 4; +} + + +/** + * Update the producer and consumer shaders to reflect the locations + * assignments that were made by varying_matches::assign_locations(). + */ +void +varying_matches::store_locations() const +{ + for (unsigned i = 0; i < this->num_matches; i++) { + ir_variable *producer_var = this->matches[i].producer_var; + ir_variable *consumer_var = this->matches[i].consumer_var; + unsigned generic_location = this->matches[i].generic_location; + unsigned slot = generic_location / 4; + unsigned offset = generic_location % 4; + + if (producer_var) { + producer_var->data.location = VARYING_SLOT_VAR0 + slot; + producer_var->data.location_frac = offset; + } + + if (consumer_var) { + assert(consumer_var->data.location == -1); + consumer_var->data.location = VARYING_SLOT_VAR0 + slot; + consumer_var->data.location_frac = offset; + } + } +} + + +/** + * Compute the "packing class" of the given varying. This is an unsigned + * integer with the property that two variables in the same packing class can + * be safely backed into the same vec4. + */ +unsigned +varying_matches::compute_packing_class(const ir_variable *var) +{ + /* Without help from the back-end, there is no way to pack together + * variables with different interpolation types, because + * lower_packed_varyings must choose exactly one interpolation type for + * each packed varying it creates. + * + * However, we can safely pack together floats, ints, and uints, because: + * + * - varyings of base type "int" and "uint" must use the "flat" + * interpolation type, which can only occur in GLSL 1.30 and above. + * + * - On platforms that support GLSL 1.30 and above, lower_packed_varyings + * can store flat floats as ints without losing any information (using + * the ir_unop_bitcast_* opcodes). + * + * Therefore, the packing class depends only on the interpolation type. + */ + unsigned packing_class = var->data.centroid | (var->data.sample << 1) | + (var->data.patch << 2); + packing_class *= 4; + packing_class += var->data.interpolation; + return packing_class; +} + + +/** + * Compute the "packing order" of the given varying. This is a sort key we + * use to determine when to attempt to pack the given varying relative to + * other varyings in the same packing class. + */ +varying_matches::packing_order_enum +varying_matches::compute_packing_order(const ir_variable *var) +{ + const glsl_type *element_type = var->type; + + while (element_type->base_type == GLSL_TYPE_ARRAY) { + element_type = element_type->fields.array; + } + + switch (element_type->component_slots() % 4) { + case 1: return PACKING_ORDER_SCALAR; + case 2: return PACKING_ORDER_VEC2; + case 3: return PACKING_ORDER_VEC3; + case 0: return PACKING_ORDER_VEC4; + default: + assert(!"Unexpected value of vector_elements"); + return PACKING_ORDER_VEC4; + } +} + + +/** + * Comparison function passed to qsort() to sort varyings by packing_class and + * then by packing_order. + */ +int +varying_matches::match_comparator(const void *x_generic, const void *y_generic) +{ + const match *x = (const match *) x_generic; + const match *y = (const match *) y_generic; + + if (x->packing_class != y->packing_class) + return x->packing_class - y->packing_class; + return x->packing_order - y->packing_order; +} + + +/** + * Is the given variable a varying variable to be counted against the + * limit in ctx->Const.MaxVarying? + * This includes variables such as texcoords, colors and generic + * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord. + */ +static bool +var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var) +{ + /* Only fragment shaders will take a varying variable as an input */ + if (stage == MESA_SHADER_FRAGMENT && + var->data.mode == ir_var_shader_in) { + switch (var->data.location) { + case VARYING_SLOT_POS: + case VARYING_SLOT_FACE: + case VARYING_SLOT_PNTC: + return false; + default: + return true; + } + } + return false; +} + + +/** + * Visitor class that generates tfeedback_candidate structs describing all + * possible targets of transform feedback. + * + * tfeedback_candidate structs are stored in the hash table + * tfeedback_candidates, which is passed to the constructor. This hash table + * maps varying names to instances of the tfeedback_candidate struct. + */ +class tfeedback_candidate_generator : public program_resource_visitor +{ +public: + tfeedback_candidate_generator(void *mem_ctx, + hash_table *tfeedback_candidates) + : mem_ctx(mem_ctx), + tfeedback_candidates(tfeedback_candidates), + toplevel_var(NULL), + varying_floats(0) + { + } + + void process(ir_variable *var) + { + /* All named varying interface blocks should be flattened by now */ + assert(!var->is_interface_instance()); + + this->toplevel_var = var; + this->varying_floats = 0; + program_resource_visitor::process(var); + } + +private: + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + assert(!type->without_array()->is_record()); + assert(!type->without_array()->is_interface()); + + (void) row_major; + + tfeedback_candidate *candidate + = rzalloc(this->mem_ctx, tfeedback_candidate); + candidate->toplevel_var = this->toplevel_var; + candidate->type = type; + candidate->offset = this->varying_floats; + hash_table_insert(this->tfeedback_candidates, candidate, + ralloc_strdup(this->mem_ctx, name)); + this->varying_floats += type->component_slots(); + } + + /** + * Memory context used to allocate hash table keys and values. + */ + void * const mem_ctx; + + /** + * Hash table in which tfeedback_candidate objects should be stored. + */ + hash_table * const tfeedback_candidates; + + /** + * Pointer to the toplevel variable that is being traversed. + */ + ir_variable *toplevel_var; + + /** + * Total number of varying floats that have been visited so far. This is + * used to determine the offset to each varying within the toplevel + * variable. + */ + unsigned varying_floats; +}; + + +namespace linker { + +bool +populate_consumer_input_sets(void *mem_ctx, exec_list *ir, + hash_table *consumer_inputs, + hash_table *consumer_interface_inputs, + ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) +{ + memset(consumer_inputs_with_locations, + 0, + sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX); + + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const input_var = node->as_variable(); + + if ((input_var != NULL) && (input_var->data.mode == ir_var_shader_in)) { + if (input_var->type->is_interface()) + return false; + + if (input_var->data.explicit_location) { + /* assign_varying_locations only cares about finding the + * ir_variable at the start of a contiguous location block. + * + * - For !producer, consumer_inputs_with_locations isn't used. + * + * - For !consumer, consumer_inputs_with_locations is empty. + * + * For consumer && producer, if you were trying to set some + * ir_variable to the middle of a location block on the other side + * of producer/consumer, cross_validate_outputs_to_inputs() should + * be link-erroring due to either type mismatch or location + * overlaps. If the variables do match up, then they've got a + * matching data.location and you only looked at + * consumer_inputs_with_locations[var->data.location], not any + * following entries for the array/structure. + */ + consumer_inputs_with_locations[input_var->data.location] = + input_var; + } else if (input_var->get_interface_type() != NULL) { + char *const iface_field_name = + ralloc_asprintf(mem_ctx, "%s.%s", + input_var->get_interface_type()->name, + input_var->name); + hash_table_insert(consumer_interface_inputs, input_var, + iface_field_name); + } else { + hash_table_insert(consumer_inputs, input_var, + ralloc_strdup(mem_ctx, input_var->name)); + } + } + } + + return true; +} + +/** + * Find a variable from the consumer that "matches" the specified variable + * + * This function only finds inputs with names that match. There is no + * validation (here) that the types, etc. are compatible. + */ +ir_variable * +get_matching_input(void *mem_ctx, + const ir_variable *output_var, + hash_table *consumer_inputs, + hash_table *consumer_interface_inputs, + ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) +{ + ir_variable *input_var; + + if (output_var->data.explicit_location) { + input_var = consumer_inputs_with_locations[output_var->data.location]; + } else if (output_var->get_interface_type() != NULL) { + char *const iface_field_name = + ralloc_asprintf(mem_ctx, "%s.%s", + output_var->get_interface_type()->name, + output_var->name); + input_var = + (ir_variable *) hash_table_find(consumer_interface_inputs, + iface_field_name); + } else { + input_var = + (ir_variable *) hash_table_find(consumer_inputs, output_var->name); + } + + return (input_var == NULL || input_var->data.mode != ir_var_shader_in) + ? NULL : input_var; +} + +} + +static int +io_variable_cmp(const void *_a, const void *_b) +{ + const ir_variable *const a = *(const ir_variable **) _a; + const ir_variable *const b = *(const ir_variable **) _b; + + if (a->data.explicit_location && b->data.explicit_location) + return b->data.location - a->data.location; + + if (a->data.explicit_location && !b->data.explicit_location) + return 1; + + if (!a->data.explicit_location && b->data.explicit_location) + return -1; + + return -strcmp(a->name, b->name); +} + +/** + * Sort the shader IO variables into canonical order + */ +static void +canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) +{ + ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4]; + unsigned num_variables = 0; + + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode) + continue; + + /* If we have already encountered more I/O variables that could + * successfully link, bail. + */ + if (num_variables == ARRAY_SIZE(var_table)) + return; + + var_table[num_variables++] = var; + } + + if (num_variables == 0) + return; + + /* Sort the list in reverse order (io_variable_cmp handles this). Later + * we're going to push the variables on to the IR list as a stack, so we + * want the last variable (in canonical order) to be first in the list. + */ + qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp); + + /* Remove the variable from it's current location in the IR, and put it at + * the front. + */ + for (unsigned i = 0; i < num_variables; i++) { + var_table[i]->remove(); + ir->push_head(var_table[i]); + } +} + +/** + * Generate a bitfield map of the explicit locations for shader varyings. + * + * In theory a 32 bits value will be enough but a 64 bits value is future proof. + */ +uint64_t +reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode) +{ + assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); + assert(MAX_VARYING <= 64); /* avoid an overflow of the returned value */ + + uint64_t slots = 0; + int var_slot; + + if (!stage) + return slots; + + foreach_in_list(ir_instruction, node, stage->ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode || + !var->data.explicit_location || + var->data.location < VARYING_SLOT_VAR0) + continue; + + var_slot = var->data.location - VARYING_SLOT_VAR0; + + unsigned num_elements = get_varying_type(var, stage->Stage) + ->count_attribute_slots(stage->Stage == MESA_SHADER_VERTEX); + for (unsigned i = 0; i < num_elements; i++) { + if (var_slot >= 0 && var_slot < MAX_VARYING) + slots |= UINT64_C(1) << var_slot; + var_slot += 1; + } + } + + return slots; +} + + +/** + * Assign locations for all variables that are produced in one pipeline stage + * (the "producer") and consumed in the next stage (the "consumer"). + * + * Variables produced by the producer may also be consumed by transform + * feedback. + * + * \param num_tfeedback_decls is the number of declarations indicating + * variables that may be consumed by transform feedback. + * + * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects + * representing the result of parsing the strings passed to + * glTransformFeedbackVaryings(). assign_location() will be called for + * each of these objects that matches one of the outputs of the + * producer. + * + * When num_tfeedback_decls is nonzero, it is permissible for the consumer to + * be NULL. In this case, varying locations are assigned solely based on the + * requirements of transform feedback. + */ +bool +assign_varying_locations(struct gl_context *ctx, + void *mem_ctx, + struct gl_shader_program *prog, + gl_shader *producer, gl_shader *consumer, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls) +{ + if (ctx->Const.DisableVaryingPacking) { + /* Transform feedback code assumes varyings are packed, so if the driver + * has disabled varying packing, make sure it does not support transform + * feedback. + */ + assert(!ctx->Extensions.EXT_transform_feedback); + } + + /* Tessellation shaders treat inputs and outputs as shared memory and can + * access inputs and outputs of other invocations. + * Therefore, they can't be lowered to temps easily (and definitely not + * efficiently). + */ + bool disable_varying_packing = + ctx->Const.DisableVaryingPacking || + (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || + (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || + (producer && producer->Stage == MESA_SHADER_TESS_CTRL); + + varying_matches matches(disable_varying_packing, + producer ? producer->Stage : (gl_shader_stage)-1, + consumer ? consumer->Stage : (gl_shader_stage)-1); + hash_table *tfeedback_candidates + = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); + hash_table *consumer_inputs + = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); + hash_table *consumer_interface_inputs + = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); + ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = { + NULL, + }; + + unsigned consumer_vertices = 0; + if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY) + consumer_vertices = prog->Geom.VerticesIn; + + /* Operate in a total of four passes. + * + * 1. Sort inputs / outputs into a canonical order. This is necessary so + * that inputs / outputs of separable shaders will be assigned + * predictable locations regardless of the order in which declarations + * appeared in the shader source. + * + * 2. Assign locations for any matching inputs and outputs. + * + * 3. Mark output variables in the producer that do not have locations as + * not being outputs. This lets the optimizer eliminate them. + * + * 4. Mark input variables in the consumer that do not have locations as + * not being inputs. This lets the optimizer eliminate them. + */ + if (consumer) + canonicalize_shader_io(consumer->ir, ir_var_shader_in); + + if (producer) + canonicalize_shader_io(producer->ir, ir_var_shader_out); + + if (consumer + && !linker::populate_consumer_input_sets(mem_ctx, + consumer->ir, + consumer_inputs, + consumer_interface_inputs, + consumer_inputs_with_locations)) { + assert(!"populate_consumer_input_sets failed"); + hash_table_dtor(tfeedback_candidates); + hash_table_dtor(consumer_inputs); + hash_table_dtor(consumer_interface_inputs); + return false; + } + + if (producer) { + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *const output_var = node->as_variable(); + + if ((output_var == NULL) || + (output_var->data.mode != ir_var_shader_out)) + continue; + + /* Only geometry shaders can use non-zero streams */ + assert(output_var->data.stream == 0 || + (output_var->data.stream < MAX_VERTEX_STREAMS && + producer->Stage == MESA_SHADER_GEOMETRY)); + + tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates); + g.process(output_var); + + ir_variable *const input_var = + linker::get_matching_input(mem_ctx, output_var, consumer_inputs, + consumer_interface_inputs, + consumer_inputs_with_locations); + + /* If a matching input variable was found, add this ouptut (and the + * input) to the set. If this is a separable program and there is no + * consumer stage, add the output. + * + * Always add TCS outputs. They are shared by all invocations + * within a patch and can be used as shared memory. + */ + if (input_var || (prog->SeparateShader && consumer == NULL) || + producer->Type == GL_TESS_CONTROL_SHADER) { + matches.record(output_var, input_var); + } + + /* Only stream 0 outputs can be consumed in the next stage */ + if (input_var && output_var->data.stream != 0) { + linker_error(prog, "output %s is assigned to stream=%d but " + "is linked to an input, which requires stream=0", + output_var->name, output_var->data.stream); + return false; + } + } + } else { + /* If there's no producer stage, then this must be a separable program. + * For example, we may have a program that has just a fragment shader. + * Later this program will be used with some arbitrary vertex (or + * geometry) shader program. This means that locations must be assigned + * for all the inputs. + */ + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const input_var = node->as_variable(); + + if ((input_var == NULL) || + (input_var->data.mode != ir_var_shader_in)) + continue; + + matches.record(NULL, input_var); + } + } + + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { + if (!tfeedback_decls[i].is_varying()) + continue; + + const tfeedback_candidate *matched_candidate + = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates); + + if (matched_candidate == NULL) { + hash_table_dtor(tfeedback_candidates); + hash_table_dtor(consumer_inputs); + hash_table_dtor(consumer_interface_inputs); + return false; + } + + if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) + matches.record(matched_candidate->toplevel_var, NULL); + } + + const uint64_t reserved_slots = + reserved_varying_slot(producer, ir_var_shader_out) | + reserved_varying_slot(consumer, ir_var_shader_in); + + const unsigned slots_used = matches.assign_locations(prog, reserved_slots, + prog->SeparateShader); + matches.store_locations(); + + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { + if (!tfeedback_decls[i].is_varying()) + continue; + + if (!tfeedback_decls[i].assign_location(ctx, prog)) { + hash_table_dtor(tfeedback_candidates); + hash_table_dtor(consumer_inputs); + hash_table_dtor(consumer_interface_inputs); + return false; + } + } + + hash_table_dtor(tfeedback_candidates); + hash_table_dtor(consumer_inputs); + hash_table_dtor(consumer_interface_inputs); + + if (consumer && producer) { + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const var = node->as_variable(); + + if (var && var->data.mode == ir_var_shader_in && + var->data.is_unmatched_generic_inout) { + if (prog->IsES) { + /* + * On Page 91 (Page 97 of the PDF) of the GLSL ES 1.0 spec: + * + * If the vertex shader declares but doesn't write to a + * varying and the fragment shader declares and reads it, + * is this an error? + * + * RESOLUTION: No. + */ + linker_warning(prog, "%s shader varying %s not written " + "by %s shader\n.", + _mesa_shader_stage_to_string(consumer->Stage), + var->name, + _mesa_shader_stage_to_string(producer->Stage)); + } else if (prog->Version <= 120) { + /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: + * + * Only those varying variables used (i.e. read) in + * the fragment shader executable must be written to + * by the vertex shader executable; declaring + * superfluous varying variables in a vertex shader is + * permissible. + * + * We interpret this text as meaning that the VS must + * write the variable for the FS to read it. See + * "glsl1-varying read but not written" in piglit. + */ + linker_error(prog, "%s shader varying %s not written " + "by %s shader\n.", + _mesa_shader_stage_to_string(consumer->Stage), + var->name, + _mesa_shader_stage_to_string(producer->Stage)); + } + } + } + + /* Now that validation is done its safe to remove unused varyings. As + * we have both a producer and consumer its safe to remove unused + * varyings even if the program is a SSO because the stages are being + * linked together i.e. we have a multi-stage SSO. + */ + remove_unused_shader_inputs_and_outputs(false, producer, + ir_var_shader_out); + remove_unused_shader_inputs_and_outputs(false, consumer, + ir_var_shader_in); + } + + if (!disable_varying_packing) { + if (producer) { + lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, + 0, producer); + } + if (consumer) { + lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, + consumer_vertices, consumer); + } + } + + return true; +} + +bool +check_against_output_limit(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_shader *producer) +{ + unsigned output_vectors = 0; + + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *const var = node->as_variable(); + + if (var && var->data.mode == ir_var_shader_out && + var_counts_against_varying_limit(producer->Stage, var)) { + /* outputs for fragment shader can't be doubles */ + output_vectors += var->type->count_attribute_slots(false); + } + } + + assert(producer->Stage != MESA_SHADER_FRAGMENT); + unsigned max_output_components = + ctx->Const.Program[producer->Stage].MaxOutputComponents; + + const unsigned output_components = output_vectors * 4; + if (output_components > max_output_components) { + if (ctx->API == API_OPENGLES2 || prog->IsES) + linker_error(prog, "%s shader uses too many output vectors " + "(%u > %u)\n", + _mesa_shader_stage_to_string(producer->Stage), + output_vectors, + max_output_components / 4); + else + linker_error(prog, "%s shader uses too many output components " + "(%u > %u)\n", + _mesa_shader_stage_to_string(producer->Stage), + output_components, + max_output_components); + + return false; + } + + return true; +} + +bool +check_against_input_limit(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_shader *consumer) +{ + unsigned input_vectors = 0; + + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const var = node->as_variable(); + + if (var && var->data.mode == ir_var_shader_in && + var_counts_against_varying_limit(consumer->Stage, var)) { + /* vertex inputs aren't varying counted */ + input_vectors += var->type->count_attribute_slots(false); + } + } + + assert(consumer->Stage != MESA_SHADER_VERTEX); + unsigned max_input_components = + ctx->Const.Program[consumer->Stage].MaxInputComponents; + + const unsigned input_components = input_vectors * 4; + if (input_components > max_input_components) { + if (ctx->API == API_OPENGLES2 || prog->IsES) + linker_error(prog, "%s shader uses too many input vectors " + "(%u > %u)\n", + _mesa_shader_stage_to_string(consumer->Stage), + input_vectors, + max_input_components / 4); + else + linker_error(prog, "%s shader uses too many input components " + "(%u > %u)\n", + _mesa_shader_stage_to_string(consumer->Stage), + input_components, + max_input_components); + + return false; + } + + return true; +} diff --git a/src/compiler/glsl/link_varyings.h b/src/compiler/glsl/link_varyings.h new file mode 100644 index 0000000..b2812614 --- /dev/null +++ b/src/compiler/glsl/link_varyings.h @@ -0,0 +1,299 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_LINK_VARYINGS_H +#define GLSL_LINK_VARYINGS_H + +/** + * \file link_varyings.h + * + * Linker functions related specifically to linking varyings between shader + * stages. + */ + + +#include "main/glheader.h" + + +struct gl_shader_program; +struct gl_shader; +class ir_variable; + + +/** + * Data structure describing a varying which is available for use in transform + * feedback. + * + * For example, if the vertex shader contains: + * + * struct S { + * vec4 foo; + * float[3] bar; + * }; + * + * varying S[2] v; + * + * Then there would be tfeedback_candidate objects corresponding to the + * following varyings: + * + * v[0].foo + * v[0].bar + * v[1].foo + * v[1].bar + */ +struct tfeedback_candidate +{ + /** + * Toplevel variable containing this varying. In the above example, this + * would point to the declaration of the varying v. + */ + ir_variable *toplevel_var; + + /** + * Type of this varying. In the above example, this would point to the + * glsl_type for "vec4" or "float[3]". + */ + const glsl_type *type; + + /** + * Offset within the toplevel variable where this varying occurs (counted + * in multiples of the size of a float). + */ + unsigned offset; +}; + + +/** + * Data structure tracking information about a transform feedback declaration + * during linking. + */ +class tfeedback_decl +{ +public: + void init(struct gl_context *ctx, const void *mem_ctx, const char *input); + static bool is_same(const tfeedback_decl &x, const tfeedback_decl &y); + bool assign_location(struct gl_context *ctx, + struct gl_shader_program *prog); + unsigned get_num_outputs() const; + bool store(struct gl_context *ctx, struct gl_shader_program *prog, + struct gl_transform_feedback_info *info, unsigned buffer, + const unsigned max_outputs) const; + const tfeedback_candidate *find_candidate(gl_shader_program *prog, + hash_table *tfeedback_candidates); + + bool is_next_buffer_separator() const + { + return this->next_buffer_separator; + } + + bool is_varying() const + { + return !this->next_buffer_separator && !this->skip_components; + } + + const char *name() const + { + return this->orig_name; + } + + unsigned get_stream_id() const + { + return this->stream_id; + } + + /** + * The total number of varying components taken up by this variable. Only + * valid if assign_location() has been called. + */ + unsigned num_components() const + { + if (this->lowered_builtin_array_variable) + return this->size; + else + return this->vector_elements * this->matrix_columns * this->size * + (this->is_double() ? 2 : 1); + } + + unsigned get_location() const { + return this->location; + } + +private: + + bool is_double() const + { + switch (this->type) { + case GL_DOUBLE: + case GL_DOUBLE_VEC2: + case GL_DOUBLE_VEC3: + case GL_DOUBLE_VEC4: + case GL_DOUBLE_MAT2: + case GL_DOUBLE_MAT2x3: + case GL_DOUBLE_MAT2x4: + case GL_DOUBLE_MAT3: + case GL_DOUBLE_MAT3x2: + case GL_DOUBLE_MAT3x4: + case GL_DOUBLE_MAT4: + case GL_DOUBLE_MAT4x2: + case GL_DOUBLE_MAT4x3: + return true; + default: + return false; + } + } + + /** + * The name that was supplied to glTransformFeedbackVaryings. Used for + * error reporting and glGetTransformFeedbackVarying(). + */ + const char *orig_name; + + /** + * The name of the variable, parsed from orig_name. + */ + const char *var_name; + + /** + * True if the declaration in orig_name represents an array. + */ + bool is_subscripted; + + /** + * If is_subscripted is true, the subscript that was specified in orig_name. + */ + unsigned array_subscript; + + /** + * Non-zero if the variable is gl_ClipDistance, glTessLevelOuter or + * gl_TessLevelInner and the driver lowers it to gl_*MESA. + */ + enum { + none, + clip_distance, + tess_level_outer, + tess_level_inner, + } lowered_builtin_array_variable; + + /** + * The vertex shader output location that the linker assigned for this + * variable. -1 if a location hasn't been assigned yet. + */ + int location; + + /** + * If non-zero, then this variable may be packed along with other variables + * into a single varying slot, so this offset should be applied when + * accessing components. For example, an offset of 1 means that the x + * component of this variable is actually stored in component y of the + * location specified by \c location. + * + * Only valid if location != -1. + */ + unsigned location_frac; + + /** + * If location != -1, the number of vector elements in this variable, or 1 + * if this variable is a scalar. + */ + unsigned vector_elements; + + /** + * If location != -1, the number of matrix columns in this variable, or 1 + * if this variable is not a matrix. + */ + unsigned matrix_columns; + + /** Type of the varying returned by glGetTransformFeedbackVarying() */ + GLenum type; + + /** + * If location != -1, the size that should be returned by + * glGetTransformFeedbackVarying(). + */ + unsigned size; + + /** + * How many components to skip. If non-zero, this is + * gl_SkipComponents{1,2,3,4} from ARB_transform_feedback3. + */ + unsigned skip_components; + + /** + * Whether this is gl_NextBuffer from ARB_transform_feedback3. + */ + bool next_buffer_separator; + + /** + * If find_candidate() has been called, pointer to the tfeedback_candidate + * data structure that was found. Otherwise NULL. + */ + const tfeedback_candidate *matched_candidate; + + /** + * StreamId assigned to this varying (defaults to 0). Can only be set to + * values other than 0 in geometry shaders that use the stream layout + * modifier. Accepted values must be in the range [0, MAX_VERTEX_STREAMS-1]. + */ + unsigned stream_id; +}; + + +void +cross_validate_outputs_to_inputs(struct gl_shader_program *prog, + gl_shader *producer, gl_shader *consumer); + +bool +parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, + const void *mem_ctx, unsigned num_names, + char **varying_names, tfeedback_decl *decls); + +void +remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, + gl_shader *sh, + enum ir_variable_mode mode); + +bool +store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls); + +bool +assign_varying_locations(struct gl_context *ctx, + void *mem_ctx, + struct gl_shader_program *prog, + gl_shader *producer, gl_shader *consumer, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls); + +bool +check_against_output_limit(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_shader *producer); + +bool +check_against_input_limit(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_shader *consumer); + +#endif /* GLSL_LINK_VARYINGS_H */ diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp new file mode 100644 index 0000000..6657777 --- /dev/null +++ b/src/compiler/glsl/linker.cpp @@ -0,0 +1,4676 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file linker.cpp + * GLSL linker implementation + * + * Given a set of shaders that are to be linked to generate a final program, + * there are three distinct stages. + * + * In the first stage shaders are partitioned into groups based on the shader + * type. All shaders of a particular type (e.g., vertex shaders) are linked + * together. + * + * - Undefined references in each shader are resolve to definitions in + * another shader. + * - Types and qualifiers of uniforms, outputs, and global variables defined + * in multiple shaders with the same name are verified to be the same. + * - Initializers for uniforms and global variables defined + * in multiple shaders with the same name are verified to be the same. + * + * The result, in the terminology of the GLSL spec, is a set of shader + * executables for each processing unit. + * + * After the first stage is complete, a series of semantic checks are performed + * on each of the shader executables. + * + * - Each shader executable must define a \c main function. + * - Each vertex shader executable must write to \c gl_Position. + * - Each fragment shader executable must write to either \c gl_FragData or + * \c gl_FragColor. + * + * In the final stage individual shader executables are linked to create a + * complete exectuable. + * + * - Types of uniforms defined in multiple shader stages with the same name + * are verified to be the same. + * - Initializers for uniforms defined in multiple shader stages with the + * same name are verified to be the same. + * - Types and qualifiers of outputs defined in one stage are verified to + * be the same as the types and qualifiers of inputs defined with the same + * name in a later stage. + * + * \author Ian Romanick <ian.d.romanick@intel.com> + */ + +#include <ctype.h> +#include "util/strndup.h" +#include "main/core.h" +#include "glsl_symbol_table.h" +#include "glsl_parser_extras.h" +#include "ir.h" +#include "program.h" +#include "program/hash_table.h" +#include "linker.h" +#include "link_varyings.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" +#include "ir_uniform.h" + +#include "main/shaderobj.h" +#include "main/enums.h" + + +void linker_error(gl_shader_program *, const char *, ...); + +namespace { + +/** + * Visitor that determines whether or not a variable is ever written. + */ +class find_assignment_visitor : public ir_hierarchical_visitor { +public: + find_assignment_visitor(const char *name) + : name(name), found(false) + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_assignment *ir) + { + ir_variable *const var = ir->lhs->variable_referenced(); + + if (strcmp(name, var->name) == 0) { + found = true; + return visit_stop; + } + + return visit_continue_with_parent; + } + + virtual ir_visitor_status visit_enter(ir_call *ir) + { + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_rvalue *param_rval = (ir_rvalue *) actual_node; + ir_variable *sig_param = (ir_variable *) formal_node; + + if (sig_param->data.mode == ir_var_function_out || + sig_param->data.mode == ir_var_function_inout) { + ir_variable *var = param_rval->variable_referenced(); + if (var && strcmp(name, var->name) == 0) { + found = true; + return visit_stop; + } + } + } + + if (ir->return_deref != NULL) { + ir_variable *const var = ir->return_deref->variable_referenced(); + + if (strcmp(name, var->name) == 0) { + found = true; + return visit_stop; + } + } + + return visit_continue_with_parent; + } + + bool variable_found() + { + return found; + } + +private: + const char *name; /**< Find writes to a variable with this name. */ + bool found; /**< Was a write to the variable found? */ +}; + + +/** + * Visitor that determines whether or not a variable is ever read. + */ +class find_deref_visitor : public ir_hierarchical_visitor { +public: + find_deref_visitor(const char *name) + : name(name), found(false) + { + /* empty */ + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (strcmp(this->name, ir->var->name) == 0) { + this->found = true; + return visit_stop; + } + + return visit_continue; + } + + bool variable_found() const + { + return this->found; + } + +private: + const char *name; /**< Find writes to a variable with this name. */ + bool found; /**< Was a write to the variable found? */ +}; + + +class geom_array_resize_visitor : public ir_hierarchical_visitor { +public: + unsigned num_vertices; + gl_shader_program *prog; + + geom_array_resize_visitor(unsigned num_vertices, gl_shader_program *prog) + { + this->num_vertices = num_vertices; + this->prog = prog; + } + + virtual ~geom_array_resize_visitor() + { + /* empty */ + } + + virtual ir_visitor_status visit(ir_variable *var) + { + if (!var->type->is_array() || var->data.mode != ir_var_shader_in) + return visit_continue; + + unsigned size = var->type->length; + + /* Generate a link error if the shader has declared this array with an + * incorrect size. + */ + if (size && size != this->num_vertices) { + linker_error(this->prog, "size of array %s declared as %u, " + "but number of input vertices is %u\n", + var->name, size, this->num_vertices); + return visit_continue; + } + + /* Generate a link error if the shader attempts to access an input + * array using an index too large for its actual size assigned at link + * time. + */ + if (var->data.max_array_access >= this->num_vertices) { + linker_error(this->prog, "geometry shader accesses element %i of " + "%s, but only %i input vertices\n", + var->data.max_array_access, var->name, this->num_vertices); + return visit_continue; + } + + var->type = glsl_type::get_array_instance(var->type->fields.array, + this->num_vertices); + var->data.max_array_access = this->num_vertices - 1; + + return visit_continue; + } + + /* Dereferences of input variables need to be updated so that their type + * matches the newly assigned type of the variable they are accessing. */ + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + ir->type = ir->var->type; + return visit_continue; + } + + /* Dereferences of 2D input arrays need to be updated so that their type + * matches the newly assigned type of the array they are accessing. */ + virtual ir_visitor_status visit_leave(ir_dereference_array *ir) + { + const glsl_type *const vt = ir->array->type; + if (vt->is_array()) + ir->type = vt->fields.array; + return visit_continue; + } +}; + +class tess_eval_array_resize_visitor : public ir_hierarchical_visitor { +public: + unsigned num_vertices; + gl_shader_program *prog; + + tess_eval_array_resize_visitor(unsigned num_vertices, gl_shader_program *prog) + { + this->num_vertices = num_vertices; + this->prog = prog; + } + + virtual ~tess_eval_array_resize_visitor() + { + /* empty */ + } + + virtual ir_visitor_status visit(ir_variable *var) + { + if (!var->type->is_array() || var->data.mode != ir_var_shader_in || var->data.patch) + return visit_continue; + + var->type = glsl_type::get_array_instance(var->type->fields.array, + this->num_vertices); + var->data.max_array_access = this->num_vertices - 1; + + return visit_continue; + } + + /* Dereferences of input variables need to be updated so that their type + * matches the newly assigned type of the variable they are accessing. */ + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + ir->type = ir->var->type; + return visit_continue; + } + + /* Dereferences of 2D input arrays need to be updated so that their type + * matches the newly assigned type of the array they are accessing. */ + virtual ir_visitor_status visit_leave(ir_dereference_array *ir) + { + const glsl_type *const vt = ir->array->type; + if (vt->is_array()) + ir->type = vt->fields.array; + return visit_continue; + } +}; + +class barrier_use_visitor : public ir_hierarchical_visitor { +public: + barrier_use_visitor(gl_shader_program *prog) + : prog(prog), in_main(false), after_return(false), control_flow(0) + { + } + + virtual ~barrier_use_visitor() + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_function *ir) + { + if (strcmp(ir->name, "main") == 0) + in_main = true; + + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_function *) + { + in_main = false; + after_return = false; + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_return *) + { + after_return = true; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_if *) + { + ++control_flow; + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_if *) + { + --control_flow; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_loop *) + { + ++control_flow; + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_loop *) + { + --control_flow; + return visit_continue; + } + + /* FINISHME: `switch` is not expressed at the IR level -- it's already + * been lowered to a mess of `if`s. We'll correctly disallow any use of + * barrier() in a conditional path within the switch, but not in a path + * which is always hit. + */ + + virtual ir_visitor_status visit_enter(ir_call *ir) + { + if (ir->use_builtin && strcmp(ir->callee_name(), "barrier") == 0) { + /* Use of barrier(); determine if it is legal: */ + if (!in_main) { + linker_error(prog, "Builtin barrier() may only be used in main"); + return visit_stop; + } + + if (after_return) { + linker_error(prog, "Builtin barrier() may not be used after return"); + return visit_stop; + } + + if (control_flow != 0) { + linker_error(prog, "Builtin barrier() may not be used inside control flow"); + return visit_stop; + } + } + return visit_continue; + } + +private: + gl_shader_program *prog; + bool in_main, after_return; + int control_flow; +}; + +/** + * Visitor that determines the highest stream id to which a (geometry) shader + * emits vertices. It also checks whether End{Stream}Primitive is ever called. + */ +class find_emit_vertex_visitor : public ir_hierarchical_visitor { +public: + find_emit_vertex_visitor(int max_allowed) + : max_stream_allowed(max_allowed), + invalid_stream_id(0), + invalid_stream_id_from_emit_vertex(false), + end_primitive_found(false), + uses_non_zero_stream(false) + { + /* empty */ + } + + virtual ir_visitor_status visit_leave(ir_emit_vertex *ir) + { + int stream_id = ir->stream_id(); + + if (stream_id < 0) { + invalid_stream_id = stream_id; + invalid_stream_id_from_emit_vertex = true; + return visit_stop; + } + + if (stream_id > max_stream_allowed) { + invalid_stream_id = stream_id; + invalid_stream_id_from_emit_vertex = true; + return visit_stop; + } + + if (stream_id != 0) + uses_non_zero_stream = true; + + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_end_primitive *ir) + { + end_primitive_found = true; + + int stream_id = ir->stream_id(); + + if (stream_id < 0) { + invalid_stream_id = stream_id; + invalid_stream_id_from_emit_vertex = false; + return visit_stop; + } + + if (stream_id > max_stream_allowed) { + invalid_stream_id = stream_id; + invalid_stream_id_from_emit_vertex = false; + return visit_stop; + } + + if (stream_id != 0) + uses_non_zero_stream = true; + + return visit_continue; + } + + bool error() + { + return invalid_stream_id != 0; + } + + const char *error_func() + { + return invalid_stream_id_from_emit_vertex ? + "EmitStreamVertex" : "EndStreamPrimitive"; + } + + int error_stream() + { + return invalid_stream_id; + } + + bool uses_streams() + { + return uses_non_zero_stream; + } + + bool uses_end_primitive() + { + return end_primitive_found; + } + +private: + int max_stream_allowed; + int invalid_stream_id; + bool invalid_stream_id_from_emit_vertex; + bool end_primitive_found; + bool uses_non_zero_stream; +}; + +/* Class that finds array derefs and check if indexes are dynamic. */ +class dynamic_sampler_array_indexing_visitor : public ir_hierarchical_visitor +{ +public: + dynamic_sampler_array_indexing_visitor() : + dynamic_sampler_array_indexing(false) + { + } + + ir_visitor_status visit_enter(ir_dereference_array *ir) + { + if (!ir->variable_referenced()) + return visit_continue; + + if (!ir->variable_referenced()->type->contains_sampler()) + return visit_continue; + + if (!ir->array_index->constant_expression_value()) { + dynamic_sampler_array_indexing = true; + return visit_stop; + } + return visit_continue; + } + + bool uses_dynamic_sampler_array_indexing() + { + return dynamic_sampler_array_indexing; + } + +private: + bool dynamic_sampler_array_indexing; +}; + +} /* anonymous namespace */ + +void +linker_error(gl_shader_program *prog, const char *fmt, ...) +{ + va_list ap; + + ralloc_strcat(&prog->InfoLog, "error: "); + va_start(ap, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, ap); + va_end(ap); + + prog->LinkStatus = false; +} + + +void +linker_warning(gl_shader_program *prog, const char *fmt, ...) +{ + va_list ap; + + ralloc_strcat(&prog->InfoLog, "warning: "); + va_start(ap, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, ap); + va_end(ap); + +} + + +/** + * Given a string identifying a program resource, break it into a base name + * and an optional array index in square brackets. + * + * If an array index is present, \c out_base_name_end is set to point to the + * "[" that precedes the array index, and the array index itself is returned + * as a long. + * + * If no array index is present (or if the array index is negative or + * mal-formed), \c out_base_name_end, is set to point to the null terminator + * at the end of the input string, and -1 is returned. + * + * Only the final array index is parsed; if the string contains other array + * indices (or structure field accesses), they are left in the base name. + * + * No attempt is made to check that the base name is properly formed; + * typically the caller will look up the base name in a hash table, so + * ill-formed base names simply turn into hash table lookup failures. + */ +long +parse_program_resource_name(const GLchar *name, + const GLchar **out_base_name_end) +{ + /* Section 7.3.1 ("Program Interfaces") of the OpenGL 4.3 spec says: + * + * "When an integer array element or block instance number is part of + * the name string, it will be specified in decimal form without a "+" + * or "-" sign or any extra leading zeroes. Additionally, the name + * string will not include white space anywhere in the string." + */ + + const size_t len = strlen(name); + *out_base_name_end = name + len; + + if (len == 0 || name[len-1] != ']') + return -1; + + /* Walk backwards over the string looking for a non-digit character. This + * had better be the opening bracket for an array index. + * + * Initially, i specifies the location of the ']'. Since the string may + * contain only the ']' charcater, walk backwards very carefully. + */ + unsigned i; + for (i = len - 1; (i > 0) && isdigit(name[i-1]); --i) + /* empty */ ; + + if ((i == 0) || name[i-1] != '[') + return -1; + + long array_index = strtol(&name[i], NULL, 10); + if (array_index < 0) + return -1; + + /* Check for leading zero */ + if (name[i] == '0' && name[i+1] != ']') + return -1; + + *out_base_name_end = name + (i - 1); + return array_index; +} + + +void +link_invalidate_variable_locations(exec_list *ir) +{ + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL) + continue; + + /* Only assign locations for variables that lack an explicit location. + * Explicit locations are set for all built-in variables, generic vertex + * shader inputs (via layout(location=...)), and generic fragment shader + * outputs (also via layout(location=...)). + */ + if (!var->data.explicit_location) { + var->data.location = -1; + var->data.location_frac = 0; + } + + /* ir_variable::is_unmatched_generic_inout is used by the linker while + * connecting outputs from one stage to inputs of the next stage. + */ + if (var->data.explicit_location && + var->data.location < VARYING_SLOT_VAR0) { + var->data.is_unmatched_generic_inout = 0; + } else { + var->data.is_unmatched_generic_inout = 1; + } + } +} + + +/** + * Set clip_distance_array_size based on the given shader. + * + * Also check for errors based on incorrect usage of gl_ClipVertex and + * gl_ClipDistance. + * + * Return false if an error was reported. + */ +static void +analyze_clip_usage(struct gl_shader_program *prog, + struct gl_shader *shader, + GLuint *clip_distance_array_size) +{ + *clip_distance_array_size = 0; + + if (!prog->IsES && prog->Version >= 130) { + /* From section 7.1 (Vertex Shader Special Variables) of the + * GLSL 1.30 spec: + * + * "It is an error for a shader to statically write both + * gl_ClipVertex and gl_ClipDistance." + * + * This does not apply to GLSL ES shaders, since GLSL ES defines neither + * gl_ClipVertex nor gl_ClipDistance. + */ + find_assignment_visitor clip_vertex("gl_ClipVertex"); + find_assignment_visitor clip_distance("gl_ClipDistance"); + + clip_vertex.run(shader->ir); + clip_distance.run(shader->ir); + if (clip_vertex.variable_found() && clip_distance.variable_found()) { + linker_error(prog, "%s shader writes to both `gl_ClipVertex' " + "and `gl_ClipDistance'\n", + _mesa_shader_stage_to_string(shader->Stage)); + return; + } + + if (clip_distance.variable_found()) { + ir_variable *clip_distance_var = + shader->symbols->get_variable("gl_ClipDistance"); + + assert(clip_distance_var); + *clip_distance_array_size = clip_distance_var->type->length; + } + } +} + + +/** + * Verify that a vertex shader executable meets all semantic requirements. + * + * Also sets prog->Vert.ClipDistanceArraySize as a side effect. + * + * \param shader Vertex shader executable to be verified + */ +void +validate_vertex_shader_executable(struct gl_shader_program *prog, + struct gl_shader *shader) +{ + if (shader == NULL) + return; + + /* From the GLSL 1.10 spec, page 48: + * + * "The variable gl_Position is available only in the vertex + * language and is intended for writing the homogeneous vertex + * position. All executions of a well-formed vertex shader + * executable must write a value into this variable. [...] The + * variable gl_Position is available only in the vertex + * language and is intended for writing the homogeneous vertex + * position. All executions of a well-formed vertex shader + * executable must write a value into this variable." + * + * while in GLSL 1.40 this text is changed to: + * + * "The variable gl_Position is available only in the vertex + * language and is intended for writing the homogeneous vertex + * position. It can be written at any time during shader + * execution. It may also be read back by a vertex shader + * after being written. This value will be used by primitive + * assembly, clipping, culling, and other fixed functionality + * operations, if present, that operate on primitives after + * vertex processing has occurred. Its value is undefined if + * the vertex shader executable does not write gl_Position." + * + * All GLSL ES Versions are similar to GLSL 1.40--failing to write to + * gl_Position is not an error. + */ + if (prog->Version < (prog->IsES ? 300 : 140)) { + find_assignment_visitor find("gl_Position"); + find.run(shader->ir); + if (!find.variable_found()) { + if (prog->IsES) { + linker_warning(prog, + "vertex shader does not write to `gl_Position'." + "It's value is undefined. \n"); + } else { + linker_error(prog, + "vertex shader does not write to `gl_Position'. \n"); + } + return; + } + } + + analyze_clip_usage(prog, shader, &prog->Vert.ClipDistanceArraySize); +} + +void +validate_tess_eval_shader_executable(struct gl_shader_program *prog, + struct gl_shader *shader) +{ + if (shader == NULL) + return; + + analyze_clip_usage(prog, shader, &prog->TessEval.ClipDistanceArraySize); +} + + +/** + * Verify that a fragment shader executable meets all semantic requirements + * + * \param shader Fragment shader executable to be verified + */ +void +validate_fragment_shader_executable(struct gl_shader_program *prog, + struct gl_shader *shader) +{ + if (shader == NULL) + return; + + find_assignment_visitor frag_color("gl_FragColor"); + find_assignment_visitor frag_data("gl_FragData"); + + frag_color.run(shader->ir); + frag_data.run(shader->ir); + + if (frag_color.variable_found() && frag_data.variable_found()) { + linker_error(prog, "fragment shader writes to both " + "`gl_FragColor' and `gl_FragData'\n"); + } +} + +/** + * Verify that a geometry shader executable meets all semantic requirements + * + * Also sets prog->Geom.VerticesIn, and prog->Geom.ClipDistanceArraySize as + * a side effect. + * + * \param shader Geometry shader executable to be verified + */ +void +validate_geometry_shader_executable(struct gl_shader_program *prog, + struct gl_shader *shader) +{ + if (shader == NULL) + return; + + unsigned num_vertices = vertices_per_prim(prog->Geom.InputType); + prog->Geom.VerticesIn = num_vertices; + + analyze_clip_usage(prog, shader, &prog->Geom.ClipDistanceArraySize); +} + +/** + * Check if geometry shaders emit to non-zero streams and do corresponding + * validations. + */ +static void +validate_geometry_shader_emissions(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] != NULL) { + find_emit_vertex_visitor emit_vertex(ctx->Const.MaxVertexStreams - 1); + emit_vertex.run(prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->ir); + if (emit_vertex.error()) { + linker_error(prog, "Invalid call %s(%d). Accepted values for the " + "stream parameter are in the range [0, %d].\n", + emit_vertex.error_func(), + emit_vertex.error_stream(), + ctx->Const.MaxVertexStreams - 1); + } + prog->Geom.UsesStreams = emit_vertex.uses_streams(); + prog->Geom.UsesEndPrimitive = emit_vertex.uses_end_primitive(); + + /* From the ARB_gpu_shader5 spec: + * + * "Multiple vertex streams are supported only if the output primitive + * type is declared to be "points". A program will fail to link if it + * contains a geometry shader calling EmitStreamVertex() or + * EndStreamPrimitive() if its output primitive type is not "points". + * + * However, in the same spec: + * + * "The function EmitVertex() is equivalent to calling EmitStreamVertex() + * with <stream> set to zero." + * + * And: + * + * "The function EndPrimitive() is equivalent to calling + * EndStreamPrimitive() with <stream> set to zero." + * + * Since we can call EmitVertex() and EndPrimitive() when we output + * primitives other than points, calling EmitStreamVertex(0) or + * EmitEndPrimitive(0) should not produce errors. This it also what Nvidia + * does. Currently we only set prog->Geom.UsesStreams to TRUE when + * EmitStreamVertex() or EmitEndPrimitive() are called with a non-zero + * stream. + */ + if (prog->Geom.UsesStreams && prog->Geom.OutputType != GL_POINTS) { + linker_error(prog, "EmitStreamVertex(n) and EndStreamPrimitive(n) " + "with n>0 requires point output\n"); + } + } +} + +bool +validate_intrastage_arrays(struct gl_shader_program *prog, + ir_variable *const var, + ir_variable *const existing) +{ + /* Consider the types to be "the same" if both types are arrays + * of the same type and one of the arrays is implicitly sized. + * In addition, set the type of the linked variable to the + * explicitly sized array. + */ + if (var->type->is_array() && existing->type->is_array()) { + if ((var->type->fields.array == existing->type->fields.array) && + ((var->type->length == 0)|| (existing->type->length == 0))) { + if (var->type->length != 0) { + if (var->type->length <= existing->data.max_array_access) { + linker_error(prog, "%s `%s' declared as type " + "`%s' but outermost dimension has an index" + " of `%i'\n", + mode_string(var), + var->name, var->type->name, + existing->data.max_array_access); + } + existing->type = var->type; + return true; + } else if (existing->type->length != 0) { + if(existing->type->length <= var->data.max_array_access && + !existing->data.from_ssbo_unsized_array) { + linker_error(prog, "%s `%s' declared as type " + "`%s' but outermost dimension has an index" + " of `%i'\n", + mode_string(var), + var->name, existing->type->name, + var->data.max_array_access); + } + return true; + } + } else { + /* The arrays of structs could have different glsl_type pointers but + * they are actually the same type. Use record_compare() to check that. + */ + if (existing->type->fields.array->is_record() && + var->type->fields.array->is_record() && + existing->type->fields.array->record_compare(var->type->fields.array)) + return true; + } + } + return false; +} + + +/** + * Perform validation of global variables used across multiple shaders + */ +void +cross_validate_globals(struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders, + bool uniforms_only) +{ + /* Examine all of the uniforms in all of the shaders and cross validate + * them. + */ + glsl_symbol_table variables; + for (unsigned i = 0; i < num_shaders; i++) { + if (shader_list[i] == NULL) + continue; + + foreach_in_list(ir_instruction, node, shader_list[i]->ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL) + continue; + + if (uniforms_only && (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage)) + continue; + + /* don't cross validate subroutine uniforms */ + if (var->type->contains_subroutine()) + continue; + + /* Don't cross validate temporaries that are at global scope. These + * will eventually get pulled into the shaders 'main'. + */ + if (var->data.mode == ir_var_temporary) + continue; + + /* If a global with this name has already been seen, verify that the + * new instance has the same type. In addition, if the globals have + * initializers, the values of the initializers must be the same. + */ + ir_variable *const existing = variables.get_variable(var->name); + if (existing != NULL) { + /* Check if types match. Interface blocks have some special + * rules so we handle those elsewhere. + */ + if (var->type != existing->type && + !var->is_interface_instance()) { + if (!validate_intrastage_arrays(prog, var, existing)) { + if (var->type->is_record() && existing->type->is_record() + && existing->type->record_compare(var->type)) { + existing->type = var->type; + } else { + /* If it is an unsized array in a Shader Storage Block, + * two different shaders can access to different elements. + * Because of that, they might be converted to different + * sized arrays, then check that they are compatible but + * ignore the array size. + */ + if (!(var->data.mode == ir_var_shader_storage && + var->data.from_ssbo_unsized_array && + existing->data.mode == ir_var_shader_storage && + existing->data.from_ssbo_unsized_array && + var->type->gl_type == existing->type->gl_type)) { + linker_error(prog, "%s `%s' declared as type " + "`%s' and type `%s'\n", + mode_string(var), + var->name, var->type->name, + existing->type->name); + return; + } + } + } + } + + if (var->data.explicit_location) { + if (existing->data.explicit_location + && (var->data.location != existing->data.location)) { + linker_error(prog, "explicit locations for %s " + "`%s' have differing values\n", + mode_string(var), var->name); + return; + } + + existing->data.location = var->data.location; + existing->data.explicit_location = true; + } else { + /* Check if uniform with implicit location was marked explicit + * by earlier shader stage. If so, mark it explicit in this stage + * too to make sure later processing does not treat it as + * implicit one. + */ + if (existing->data.explicit_location) { + var->data.location = existing->data.location; + var->data.explicit_location = true; + } + } + + /* From the GLSL 4.20 specification: + * "A link error will result if two compilation units in a program + * specify different integer-constant bindings for the same + * opaque-uniform name. However, it is not an error to specify a + * binding on some but not all declarations for the same name" + */ + if (var->data.explicit_binding) { + if (existing->data.explicit_binding && + var->data.binding != existing->data.binding) { + linker_error(prog, "explicit bindings for %s " + "`%s' have differing values\n", + mode_string(var), var->name); + return; + } + + existing->data.binding = var->data.binding; + existing->data.explicit_binding = true; + } + + if (var->type->contains_atomic() && + var->data.offset != existing->data.offset) { + linker_error(prog, "offset specifications for %s " + "`%s' have differing values\n", + mode_string(var), var->name); + return; + } + + /* Validate layout qualifiers for gl_FragDepth. + * + * From the AMD/ARB_conservative_depth specs: + * + * "If gl_FragDepth is redeclared in any fragment shader in a + * program, it must be redeclared in all fragment shaders in + * that program that have static assignments to + * gl_FragDepth. All redeclarations of gl_FragDepth in all + * fragment shaders in a single program must have the same set + * of qualifiers." + */ + if (strcmp(var->name, "gl_FragDepth") == 0) { + bool layout_declared = var->data.depth_layout != ir_depth_layout_none; + bool layout_differs = + var->data.depth_layout != existing->data.depth_layout; + + if (layout_declared && layout_differs) { + linker_error(prog, + "All redeclarations of gl_FragDepth in all " + "fragment shaders in a single program must have " + "the same set of qualifiers.\n"); + } + + if (var->data.used && layout_differs) { + linker_error(prog, + "If gl_FragDepth is redeclared with a layout " + "qualifier in any fragment shader, it must be " + "redeclared with the same layout qualifier in " + "all fragment shaders that have assignments to " + "gl_FragDepth\n"); + } + } + + /* Page 35 (page 41 of the PDF) of the GLSL 4.20 spec says: + * + * "If a shared global has multiple initializers, the + * initializers must all be constant expressions, and they + * must all have the same value. Otherwise, a link error will + * result. (A shared global having only one initializer does + * not require that initializer to be a constant expression.)" + * + * Previous to 4.20 the GLSL spec simply said that initializers + * must have the same value. In this case of non-constant + * initializers, this was impossible to determine. As a result, + * no vendor actually implemented that behavior. The 4.20 + * behavior matches the implemented behavior of at least one other + * vendor, so we'll implement that for all GLSL versions. + */ + if (var->constant_initializer != NULL) { + if (existing->constant_initializer != NULL) { + if (!var->constant_initializer->has_value(existing->constant_initializer)) { + linker_error(prog, "initializers for %s " + "`%s' have differing values\n", + mode_string(var), var->name); + return; + } + } else { + /* If the first-seen instance of a particular uniform did not + * have an initializer but a later instance does, copy the + * initializer to the version stored in the symbol table. + */ + /* FINISHME: This is wrong. The constant_value field should + * FINISHME: not be modified! Imagine a case where a shader + * FINISHME: without an initializer is linked in two different + * FINISHME: programs with shaders that have differing + * FINISHME: initializers. Linking with the first will + * FINISHME: modify the shader, and linking with the second + * FINISHME: will fail. + */ + existing->constant_initializer = + var->constant_initializer->clone(ralloc_parent(existing), + NULL); + } + } + + if (var->data.has_initializer) { + if (existing->data.has_initializer + && (var->constant_initializer == NULL + || existing->constant_initializer == NULL)) { + linker_error(prog, + "shared global variable `%s' has multiple " + "non-constant initializers.\n", + var->name); + return; + } + + /* Some instance had an initializer, so keep track of that. In + * this location, all sorts of initializers (constant or + * otherwise) will propagate the existence to the variable + * stored in the symbol table. + */ + existing->data.has_initializer = true; + } + + if (existing->data.invariant != var->data.invariant) { + linker_error(prog, "declarations for %s `%s' have " + "mismatching invariant qualifiers\n", + mode_string(var), var->name); + return; + } + if (existing->data.centroid != var->data.centroid) { + linker_error(prog, "declarations for %s `%s' have " + "mismatching centroid qualifiers\n", + mode_string(var), var->name); + return; + } + if (existing->data.sample != var->data.sample) { + linker_error(prog, "declarations for %s `%s` have " + "mismatching sample qualifiers\n", + mode_string(var), var->name); + return; + } + if (existing->data.image_format != var->data.image_format) { + linker_error(prog, "declarations for %s `%s` have " + "mismatching image format qualifiers\n", + mode_string(var), var->name); + return; + } + } else + variables.add_variable(var); + } + } +} + + +/** + * Perform validation of uniforms used across multiple shader stages + */ +void +cross_validate_uniforms(struct gl_shader_program *prog) +{ + cross_validate_globals(prog, prog->_LinkedShaders, + MESA_SHADER_STAGES, true); +} + +/** + * Accumulates the array of prog->BufferInterfaceBlocks and checks that all + * definitons of blocks agree on their contents. + */ +static bool +interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) +{ + unsigned max_num_uniform_blocks = 0; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i]) + max_num_uniform_blocks += prog->_LinkedShaders[i]->NumBufferInterfaceBlocks; + } + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + prog->InterfaceBlockStageIndex[i] = ralloc_array(prog, int, + max_num_uniform_blocks); + for (unsigned int j = 0; j < max_num_uniform_blocks; j++) + prog->InterfaceBlockStageIndex[i][j] = -1; + + if (sh == NULL) + continue; + + for (unsigned int j = 0; j < sh->NumBufferInterfaceBlocks; j++) { + int index = link_cross_validate_uniform_block(prog, + &prog->BufferInterfaceBlocks, + &prog->NumBufferInterfaceBlocks, + &sh->BufferInterfaceBlocks[j]); + + if (index == -1) { + linker_error(prog, "uniform block `%s' has mismatching definitions\n", + sh->BufferInterfaceBlocks[j].Name); + return false; + } + + prog->InterfaceBlockStageIndex[i][index] = j; + } + } + + return true; +} + + +/** + * Populates a shaders symbol table with all global declarations + */ +static void +populate_symbol_table(gl_shader *sh) +{ + sh->symbols = new(sh) glsl_symbol_table; + + foreach_in_list(ir_instruction, inst, sh->ir) { + ir_variable *var; + ir_function *func; + + if ((func = inst->as_function()) != NULL) { + sh->symbols->add_function(func); + } else if ((var = inst->as_variable()) != NULL) { + if (var->data.mode != ir_var_temporary) + sh->symbols->add_variable(var); + } + } +} + + +/** + * Remap variables referenced in an instruction tree + * + * This is used when instruction trees are cloned from one shader and placed in + * another. These trees will contain references to \c ir_variable nodes that + * do not exist in the target shader. This function finds these \c ir_variable + * references and replaces the references with matching variables in the target + * shader. + * + * If there is no matching variable in the target shader, a clone of the + * \c ir_variable is made and added to the target shader. The new variable is + * added to \b both the instruction stream and the symbol table. + * + * \param inst IR tree that is to be processed. + * \param symbols Symbol table containing global scope symbols in the + * linked shader. + * \param instructions Instruction stream where new variable declarations + * should be added. + */ +void +remap_variables(ir_instruction *inst, struct gl_shader *target, + hash_table *temps) +{ + class remap_visitor : public ir_hierarchical_visitor { + public: + remap_visitor(struct gl_shader *target, + hash_table *temps) + { + this->target = target; + this->symbols = target->symbols; + this->instructions = target->ir; + this->temps = temps; + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (ir->var->data.mode == ir_var_temporary) { + ir_variable *var = (ir_variable *) hash_table_find(temps, ir->var); + + assert(var != NULL); + ir->var = var; + return visit_continue; + } + + ir_variable *const existing = + this->symbols->get_variable(ir->var->name); + if (existing != NULL) + ir->var = existing; + else { + ir_variable *copy = ir->var->clone(this->target, NULL); + + this->symbols->add_variable(copy); + this->instructions->push_head(copy); + ir->var = copy; + } + + return visit_continue; + } + + private: + struct gl_shader *target; + glsl_symbol_table *symbols; + exec_list *instructions; + hash_table *temps; + }; + + remap_visitor v(target, temps); + + inst->accept(&v); +} + + +/** + * Move non-declarations from one instruction stream to another + * + * The intended usage pattern of this function is to pass the pointer to the + * head sentinel of a list (i.e., a pointer to the list cast to an \c exec_node + * pointer) for \c last and \c false for \c make_copies on the first + * call. Successive calls pass the return value of the previous call for + * \c last and \c true for \c make_copies. + * + * \param instructions Source instruction stream + * \param last Instruction after which new instructions should be + * inserted in the target instruction stream + * \param make_copies Flag selecting whether instructions in \c instructions + * should be copied (via \c ir_instruction::clone) into the + * target list or moved. + * + * \return + * The new "last" instruction in the target instruction stream. This pointer + * is suitable for use as the \c last parameter of a later call to this + * function. + */ +exec_node * +move_non_declarations(exec_list *instructions, exec_node *last, + bool make_copies, gl_shader *target) +{ + hash_table *temps = NULL; + + if (make_copies) + temps = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + + foreach_in_list_safe(ir_instruction, inst, instructions) { + if (inst->as_function()) + continue; + + ir_variable *var = inst->as_variable(); + if ((var != NULL) && (var->data.mode != ir_var_temporary)) + continue; + + assert(inst->as_assignment() + || inst->as_call() + || inst->as_if() /* for initializers with the ?: operator */ + || ((var != NULL) && (var->data.mode == ir_var_temporary))); + + if (make_copies) { + inst = inst->clone(target, NULL); + + if (var != NULL) + hash_table_insert(temps, inst, var); + else + remap_variables(inst, target, temps); + } else { + inst->remove(); + } + + last->insert_after(inst); + last = inst; + } + + if (make_copies) + hash_table_dtor(temps); + + return last; +} + + +/** + * This class is only used in link_intrastage_shaders() below but declaring + * it inside that function leads to compiler warnings with some versions of + * gcc. + */ +class array_sizing_visitor : public ir_hierarchical_visitor { +public: + array_sizing_visitor() + : mem_ctx(ralloc_context(NULL)), + unnamed_interfaces(hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare)) + { + } + + ~array_sizing_visitor() + { + hash_table_dtor(this->unnamed_interfaces); + ralloc_free(this->mem_ctx); + } + + virtual ir_visitor_status visit(ir_variable *var) + { + const glsl_type *type_without_array; + fixup_type(&var->type, var->data.max_array_access, + var->data.from_ssbo_unsized_array); + type_without_array = var->type->without_array(); + if (var->type->is_interface()) { + if (interface_contains_unsized_arrays(var->type)) { + const glsl_type *new_type = + resize_interface_members(var->type, + var->get_max_ifc_array_access(), + var->is_in_shader_storage_block()); + var->type = new_type; + var->change_interface_type(new_type); + } + } else if (type_without_array->is_interface()) { + if (interface_contains_unsized_arrays(type_without_array)) { + const glsl_type *new_type = + resize_interface_members(type_without_array, + var->get_max_ifc_array_access(), + var->is_in_shader_storage_block()); + var->change_interface_type(new_type); + var->type = update_interface_members_array(var->type, new_type); + } + } else if (const glsl_type *ifc_type = var->get_interface_type()) { + /* Store a pointer to the variable in the unnamed_interfaces + * hashtable. + */ + ir_variable **interface_vars = (ir_variable **) + hash_table_find(this->unnamed_interfaces, ifc_type); + if (interface_vars == NULL) { + interface_vars = rzalloc_array(mem_ctx, ir_variable *, + ifc_type->length); + hash_table_insert(this->unnamed_interfaces, interface_vars, + ifc_type); + } + unsigned index = ifc_type->field_index(var->name); + assert(index < ifc_type->length); + assert(interface_vars[index] == NULL); + interface_vars[index] = var; + } + return visit_continue; + } + + /** + * For each unnamed interface block that was discovered while running the + * visitor, adjust the interface type to reflect the newly assigned array + * sizes, and fix up the ir_variable nodes to point to the new interface + * type. + */ + void fixup_unnamed_interface_types() + { + hash_table_call_foreach(this->unnamed_interfaces, + fixup_unnamed_interface_type, NULL); + } + +private: + /** + * If the type pointed to by \c type represents an unsized array, replace + * it with a sized array whose size is determined by max_array_access. + */ + static void fixup_type(const glsl_type **type, unsigned max_array_access, + bool from_ssbo_unsized_array) + { + if (!from_ssbo_unsized_array && (*type)->is_unsized_array()) { + *type = glsl_type::get_array_instance((*type)->fields.array, + max_array_access + 1); + assert(*type != NULL); + } + } + + static const glsl_type * + update_interface_members_array(const glsl_type *type, + const glsl_type *new_interface_type) + { + const glsl_type *element_type = type->fields.array; + if (element_type->is_array()) { + const glsl_type *new_array_type = + update_interface_members_array(element_type, new_interface_type); + return glsl_type::get_array_instance(new_array_type, type->length); + } else { + return glsl_type::get_array_instance(new_interface_type, + type->length); + } + } + + /** + * Determine whether the given interface type contains unsized arrays (if + * it doesn't, array_sizing_visitor doesn't need to process it). + */ + static bool interface_contains_unsized_arrays(const glsl_type *type) + { + for (unsigned i = 0; i < type->length; i++) { + const glsl_type *elem_type = type->fields.structure[i].type; + if (elem_type->is_unsized_array()) + return true; + } + return false; + } + + /** + * Create a new interface type based on the given type, with unsized arrays + * replaced by sized arrays whose size is determined by + * max_ifc_array_access. + */ + static const glsl_type * + resize_interface_members(const glsl_type *type, + const unsigned *max_ifc_array_access, + bool is_ssbo) + { + unsigned num_fields = type->length; + glsl_struct_field *fields = new glsl_struct_field[num_fields]; + memcpy(fields, type->fields.structure, + num_fields * sizeof(*fields)); + for (unsigned i = 0; i < num_fields; i++) { + /* If SSBO last member is unsized array, we don't replace it by a sized + * array. + */ + if (is_ssbo && i == (num_fields - 1)) + fixup_type(&fields[i].type, max_ifc_array_access[i], + true); + else + fixup_type(&fields[i].type, max_ifc_array_access[i], + false); + } + glsl_interface_packing packing = + (glsl_interface_packing) type->interface_packing; + const glsl_type *new_ifc_type = + glsl_type::get_interface_instance(fields, num_fields, + packing, type->name); + delete [] fields; + return new_ifc_type; + } + + static void fixup_unnamed_interface_type(const void *key, void *data, + void *) + { + const glsl_type *ifc_type = (const glsl_type *) key; + ir_variable **interface_vars = (ir_variable **) data; + unsigned num_fields = ifc_type->length; + glsl_struct_field *fields = new glsl_struct_field[num_fields]; + memcpy(fields, ifc_type->fields.structure, + num_fields * sizeof(*fields)); + bool interface_type_changed = false; + for (unsigned i = 0; i < num_fields; i++) { + if (interface_vars[i] != NULL && + fields[i].type != interface_vars[i]->type) { + fields[i].type = interface_vars[i]->type; + interface_type_changed = true; + } + } + if (!interface_type_changed) { + delete [] fields; + return; + } + glsl_interface_packing packing = + (glsl_interface_packing) ifc_type->interface_packing; + const glsl_type *new_ifc_type = + glsl_type::get_interface_instance(fields, num_fields, packing, + ifc_type->name); + delete [] fields; + for (unsigned i = 0; i < num_fields; i++) { + if (interface_vars[i] != NULL) + interface_vars[i]->change_interface_type(new_ifc_type); + } + } + + /** + * Memory context used to allocate the data in \c unnamed_interfaces. + */ + void *mem_ctx; + + /** + * Hash table from const glsl_type * to an array of ir_variable *'s + * pointing to the ir_variables constituting each unnamed interface block. + */ + hash_table *unnamed_interfaces; +}; + + +/** + * Performs the cross-validation of tessellation control shader vertices and + * layout qualifiers for the attached tessellation control shaders, + * and propagates them to the linked TCS and linked shader program. + */ +static void +link_tcs_out_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + linked_shader->TessCtrl.VerticesOut = 0; + + if (linked_shader->Stage != MESA_SHADER_TESS_CTRL) + return; + + /* From the GLSL 4.0 spec (chapter 4.3.8.2): + * + * "All tessellation control shader layout declarations in a program + * must specify the same output patch vertex count. There must be at + * least one layout qualifier specifying an output patch vertex count + * in any program containing tessellation control shaders; however, + * such a declaration is not required in all tessellation control + * shaders." + */ + + for (unsigned i = 0; i < num_shaders; i++) { + struct gl_shader *shader = shader_list[i]; + + if (shader->TessCtrl.VerticesOut != 0) { + if (linked_shader->TessCtrl.VerticesOut != 0 && + linked_shader->TessCtrl.VerticesOut != shader->TessCtrl.VerticesOut) { + linker_error(prog, "tessellation control shader defined with " + "conflicting output vertex count (%d and %d)\n", + linked_shader->TessCtrl.VerticesOut, + shader->TessCtrl.VerticesOut); + return; + } + linked_shader->TessCtrl.VerticesOut = shader->TessCtrl.VerticesOut; + } + } + + /* Just do the intrastage -> interstage propagation right now, + * since we already know we're in the right type of shader program + * for doing it. + */ + if (linked_shader->TessCtrl.VerticesOut == 0) { + linker_error(prog, "tessellation control shader didn't declare " + "vertices out layout qualifier\n"); + return; + } + prog->TessCtrl.VerticesOut = linked_shader->TessCtrl.VerticesOut; +} + + +/** + * Performs the cross-validation of tessellation evaluation shader + * primitive type, vertex spacing, ordering and point_mode layout qualifiers + * for the attached tessellation evaluation shaders, and propagates them + * to the linked TES and linked shader program. + */ +static void +link_tes_in_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + linked_shader->TessEval.PrimitiveMode = PRIM_UNKNOWN; + linked_shader->TessEval.Spacing = 0; + linked_shader->TessEval.VertexOrder = 0; + linked_shader->TessEval.PointMode = -1; + + if (linked_shader->Stage != MESA_SHADER_TESS_EVAL) + return; + + /* From the GLSL 4.0 spec (chapter 4.3.8.1): + * + * "At least one tessellation evaluation shader (compilation unit) in + * a program must declare a primitive mode in its input layout. + * Declaration vertex spacing, ordering, and point mode identifiers is + * optional. It is not required that all tessellation evaluation + * shaders in a program declare a primitive mode. If spacing or + * vertex ordering declarations are omitted, the tessellation + * primitive generator will use equal spacing or counter-clockwise + * vertex ordering, respectively. If a point mode declaration is + * omitted, the tessellation primitive generator will produce lines or + * triangles according to the primitive mode." + */ + + for (unsigned i = 0; i < num_shaders; i++) { + struct gl_shader *shader = shader_list[i]; + + if (shader->TessEval.PrimitiveMode != PRIM_UNKNOWN) { + if (linked_shader->TessEval.PrimitiveMode != PRIM_UNKNOWN && + linked_shader->TessEval.PrimitiveMode != shader->TessEval.PrimitiveMode) { + linker_error(prog, "tessellation evaluation shader defined with " + "conflicting input primitive modes.\n"); + return; + } + linked_shader->TessEval.PrimitiveMode = shader->TessEval.PrimitiveMode; + } + + if (shader->TessEval.Spacing != 0) { + if (linked_shader->TessEval.Spacing != 0 && + linked_shader->TessEval.Spacing != shader->TessEval.Spacing) { + linker_error(prog, "tessellation evaluation shader defined with " + "conflicting vertex spacing.\n"); + return; + } + linked_shader->TessEval.Spacing = shader->TessEval.Spacing; + } + + if (shader->TessEval.VertexOrder != 0) { + if (linked_shader->TessEval.VertexOrder != 0 && + linked_shader->TessEval.VertexOrder != shader->TessEval.VertexOrder) { + linker_error(prog, "tessellation evaluation shader defined with " + "conflicting ordering.\n"); + return; + } + linked_shader->TessEval.VertexOrder = shader->TessEval.VertexOrder; + } + + if (shader->TessEval.PointMode != -1) { + if (linked_shader->TessEval.PointMode != -1 && + linked_shader->TessEval.PointMode != shader->TessEval.PointMode) { + linker_error(prog, "tessellation evaluation shader defined with " + "conflicting point modes.\n"); + return; + } + linked_shader->TessEval.PointMode = shader->TessEval.PointMode; + } + + } + + /* Just do the intrastage -> interstage propagation right now, + * since we already know we're in the right type of shader program + * for doing it. + */ + if (linked_shader->TessEval.PrimitiveMode == PRIM_UNKNOWN) { + linker_error(prog, + "tessellation evaluation shader didn't declare input " + "primitive modes.\n"); + return; + } + prog->TessEval.PrimitiveMode = linked_shader->TessEval.PrimitiveMode; + + if (linked_shader->TessEval.Spacing == 0) + linked_shader->TessEval.Spacing = GL_EQUAL; + prog->TessEval.Spacing = linked_shader->TessEval.Spacing; + + if (linked_shader->TessEval.VertexOrder == 0) + linked_shader->TessEval.VertexOrder = GL_CCW; + prog->TessEval.VertexOrder = linked_shader->TessEval.VertexOrder; + + if (linked_shader->TessEval.PointMode == -1) + linked_shader->TessEval.PointMode = GL_FALSE; + prog->TessEval.PointMode = linked_shader->TessEval.PointMode; +} + + +/** + * Performs the cross-validation of layout qualifiers specified in + * redeclaration of gl_FragCoord for the attached fragment shaders, + * and propagates them to the linked FS and linked shader program. + */ +static void +link_fs_input_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + linked_shader->redeclares_gl_fragcoord = false; + linked_shader->uses_gl_fragcoord = false; + linked_shader->origin_upper_left = false; + linked_shader->pixel_center_integer = false; + + if (linked_shader->Stage != MESA_SHADER_FRAGMENT || + (prog->Version < 150 && !prog->ARB_fragment_coord_conventions_enable)) + return; + + for (unsigned i = 0; i < num_shaders; i++) { + struct gl_shader *shader = shader_list[i]; + /* From the GLSL 1.50 spec, page 39: + * + * "If gl_FragCoord is redeclared in any fragment shader in a program, + * it must be redeclared in all the fragment shaders in that program + * that have a static use gl_FragCoord." + */ + if ((linked_shader->redeclares_gl_fragcoord + && !shader->redeclares_gl_fragcoord + && shader->uses_gl_fragcoord) + || (shader->redeclares_gl_fragcoord + && !linked_shader->redeclares_gl_fragcoord + && linked_shader->uses_gl_fragcoord)) { + linker_error(prog, "fragment shader defined with conflicting " + "layout qualifiers for gl_FragCoord\n"); + } + + /* From the GLSL 1.50 spec, page 39: + * + * "All redeclarations of gl_FragCoord in all fragment shaders in a + * single program must have the same set of qualifiers." + */ + if (linked_shader->redeclares_gl_fragcoord && shader->redeclares_gl_fragcoord + && (shader->origin_upper_left != linked_shader->origin_upper_left + || shader->pixel_center_integer != linked_shader->pixel_center_integer)) { + linker_error(prog, "fragment shader defined with conflicting " + "layout qualifiers for gl_FragCoord\n"); + } + + /* Update the linked shader state. Note that uses_gl_fragcoord should + * accumulate the results. The other values should replace. If there + * are multiple redeclarations, all the fields except uses_gl_fragcoord + * are already known to be the same. + */ + if (shader->redeclares_gl_fragcoord || shader->uses_gl_fragcoord) { + linked_shader->redeclares_gl_fragcoord = + shader->redeclares_gl_fragcoord; + linked_shader->uses_gl_fragcoord = linked_shader->uses_gl_fragcoord + || shader->uses_gl_fragcoord; + linked_shader->origin_upper_left = shader->origin_upper_left; + linked_shader->pixel_center_integer = shader->pixel_center_integer; + } + + linked_shader->EarlyFragmentTests |= shader->EarlyFragmentTests; + } +} + +/** + * Performs the cross-validation of geometry shader max_vertices and + * primitive type layout qualifiers for the attached geometry shaders, + * and propagates them to the linked GS and linked shader program. + */ +static void +link_gs_inout_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + linked_shader->Geom.VerticesOut = 0; + linked_shader->Geom.Invocations = 0; + linked_shader->Geom.InputType = PRIM_UNKNOWN; + linked_shader->Geom.OutputType = PRIM_UNKNOWN; + + /* No in/out qualifiers defined for anything but GLSL 1.50+ + * geometry shaders so far. + */ + if (linked_shader->Stage != MESA_SHADER_GEOMETRY || prog->Version < 150) + return; + + /* From the GLSL 1.50 spec, page 46: + * + * "All geometry shader output layout declarations in a program + * must declare the same layout and same value for + * max_vertices. There must be at least one geometry output + * layout declaration somewhere in a program, but not all + * geometry shaders (compilation units) are required to + * declare it." + */ + + for (unsigned i = 0; i < num_shaders; i++) { + struct gl_shader *shader = shader_list[i]; + + if (shader->Geom.InputType != PRIM_UNKNOWN) { + if (linked_shader->Geom.InputType != PRIM_UNKNOWN && + linked_shader->Geom.InputType != shader->Geom.InputType) { + linker_error(prog, "geometry shader defined with conflicting " + "input types\n"); + return; + } + linked_shader->Geom.InputType = shader->Geom.InputType; + } + + if (shader->Geom.OutputType != PRIM_UNKNOWN) { + if (linked_shader->Geom.OutputType != PRIM_UNKNOWN && + linked_shader->Geom.OutputType != shader->Geom.OutputType) { + linker_error(prog, "geometry shader defined with conflicting " + "output types\n"); + return; + } + linked_shader->Geom.OutputType = shader->Geom.OutputType; + } + + if (shader->Geom.VerticesOut != 0) { + if (linked_shader->Geom.VerticesOut != 0 && + linked_shader->Geom.VerticesOut != shader->Geom.VerticesOut) { + linker_error(prog, "geometry shader defined with conflicting " + "output vertex count (%d and %d)\n", + linked_shader->Geom.VerticesOut, + shader->Geom.VerticesOut); + return; + } + linked_shader->Geom.VerticesOut = shader->Geom.VerticesOut; + } + + if (shader->Geom.Invocations != 0) { + if (linked_shader->Geom.Invocations != 0 && + linked_shader->Geom.Invocations != shader->Geom.Invocations) { + linker_error(prog, "geometry shader defined with conflicting " + "invocation count (%d and %d)\n", + linked_shader->Geom.Invocations, + shader->Geom.Invocations); + return; + } + linked_shader->Geom.Invocations = shader->Geom.Invocations; + } + } + + /* Just do the intrastage -> interstage propagation right now, + * since we already know we're in the right type of shader program + * for doing it. + */ + if (linked_shader->Geom.InputType == PRIM_UNKNOWN) { + linker_error(prog, + "geometry shader didn't declare primitive input type\n"); + return; + } + prog->Geom.InputType = linked_shader->Geom.InputType; + + if (linked_shader->Geom.OutputType == PRIM_UNKNOWN) { + linker_error(prog, + "geometry shader didn't declare primitive output type\n"); + return; + } + prog->Geom.OutputType = linked_shader->Geom.OutputType; + + if (linked_shader->Geom.VerticesOut == 0) { + linker_error(prog, + "geometry shader didn't declare max_vertices\n"); + return; + } + prog->Geom.VerticesOut = linked_shader->Geom.VerticesOut; + + if (linked_shader->Geom.Invocations == 0) + linked_shader->Geom.Invocations = 1; + + prog->Geom.Invocations = linked_shader->Geom.Invocations; +} + + +/** + * Perform cross-validation of compute shader local_size_{x,y,z} layout + * qualifiers for the attached compute shaders, and propagate them to the + * linked CS and linked shader program. + */ +static void +link_cs_input_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + for (int i = 0; i < 3; i++) + linked_shader->Comp.LocalSize[i] = 0; + + /* This function is called for all shader stages, but it only has an effect + * for compute shaders. + */ + if (linked_shader->Stage != MESA_SHADER_COMPUTE) + return; + + /* From the ARB_compute_shader spec, in the section describing local size + * declarations: + * + * If multiple compute shaders attached to a single program object + * declare local work-group size, the declarations must be identical; + * otherwise a link-time error results. Furthermore, if a program + * object contains any compute shaders, at least one must contain an + * input layout qualifier specifying the local work sizes of the + * program, or a link-time error will occur. + */ + for (unsigned sh = 0; sh < num_shaders; sh++) { + struct gl_shader *shader = shader_list[sh]; + + if (shader->Comp.LocalSize[0] != 0) { + if (linked_shader->Comp.LocalSize[0] != 0) { + for (int i = 0; i < 3; i++) { + if (linked_shader->Comp.LocalSize[i] != + shader->Comp.LocalSize[i]) { + linker_error(prog, "compute shader defined with conflicting " + "local sizes\n"); + return; + } + } + } + for (int i = 0; i < 3; i++) + linked_shader->Comp.LocalSize[i] = shader->Comp.LocalSize[i]; + } + } + + /* Just do the intrastage -> interstage propagation right now, + * since we already know we're in the right type of shader program + * for doing it. + */ + if (linked_shader->Comp.LocalSize[0] == 0) { + linker_error(prog, "compute shader didn't declare local size\n"); + return; + } + for (int i = 0; i < 3; i++) + prog->Comp.LocalSize[i] = linked_shader->Comp.LocalSize[i]; +} + + +/** + * Combine a group of shaders for a single stage to generate a linked shader + * + * \note + * If this function is supplied a single shader, it is cloned, and the new + * shader is returned. + */ +static struct gl_shader * +link_intrastage_shaders(void *mem_ctx, + struct gl_context *ctx, + struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + struct gl_uniform_block *uniform_blocks = NULL; + + /* Check that global variables defined in multiple shaders are consistent. + */ + cross_validate_globals(prog, shader_list, num_shaders, false); + if (!prog->LinkStatus) + return NULL; + + /* Check that interface blocks defined in multiple shaders are consistent. + */ + validate_intrastage_interface_blocks(prog, (const gl_shader **)shader_list, + num_shaders); + if (!prog->LinkStatus) + return NULL; + + /* Link up uniform blocks defined within this stage. */ + const unsigned num_uniform_blocks = + link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders, + &uniform_blocks); + if (!prog->LinkStatus) + return NULL; + + /* Check that there is only a single definition of each function signature + * across all shaders. + */ + for (unsigned i = 0; i < (num_shaders - 1); i++) { + foreach_in_list(ir_instruction, node, shader_list[i]->ir) { + ir_function *const f = node->as_function(); + + if (f == NULL) + continue; + + for (unsigned j = i + 1; j < num_shaders; j++) { + ir_function *const other = + shader_list[j]->symbols->get_function(f->name); + + /* If the other shader has no function (and therefore no function + * signatures) with the same name, skip to the next shader. + */ + if (other == NULL) + continue; + + foreach_in_list(ir_function_signature, sig, &f->signatures) { + if (!sig->is_defined || sig->is_builtin()) + continue; + + ir_function_signature *other_sig = + other->exact_matching_signature(NULL, &sig->parameters); + + if ((other_sig != NULL) && other_sig->is_defined + && !other_sig->is_builtin()) { + linker_error(prog, "function `%s' is multiply defined\n", + f->name); + return NULL; + } + } + } + } + } + + /* Find the shader that defines main, and make a clone of it. + * + * Starting with the clone, search for undefined references. If one is + * found, find the shader that defines it. Clone the reference and add + * it to the shader. Repeat until there are no undefined references or + * until a reference cannot be resolved. + */ + gl_shader *main = NULL; + for (unsigned i = 0; i < num_shaders; i++) { + if (_mesa_get_main_function_signature(shader_list[i]) != NULL) { + main = shader_list[i]; + break; + } + } + + if (main == NULL) { + linker_error(prog, "%s shader lacks `main'\n", + _mesa_shader_stage_to_string(shader_list[0]->Stage)); + return NULL; + } + + gl_shader *linked = ctx->Driver.NewShader(NULL, 0, main->Type); + linked->ir = new(linked) exec_list; + clone_ir_list(mem_ctx, linked->ir, main->ir); + + linked->BufferInterfaceBlocks = uniform_blocks; + linked->NumBufferInterfaceBlocks = num_uniform_blocks; + ralloc_steal(linked, linked->BufferInterfaceBlocks); + + link_fs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); + link_tcs_out_layout_qualifiers(prog, linked, shader_list, num_shaders); + link_tes_in_layout_qualifiers(prog, linked, shader_list, num_shaders); + link_gs_inout_layout_qualifiers(prog, linked, shader_list, num_shaders); + link_cs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); + + populate_symbol_table(linked); + + /* The pointer to the main function in the final linked shader (i.e., the + * copy of the original shader that contained the main function). + */ + ir_function_signature *const main_sig = + _mesa_get_main_function_signature(linked); + + /* Move any instructions other than variable declarations or function + * declarations into main. + */ + exec_node *insertion_point = + move_non_declarations(linked->ir, (exec_node *) &main_sig->body, false, + linked); + + for (unsigned i = 0; i < num_shaders; i++) { + if (shader_list[i] == main) + continue; + + insertion_point = move_non_declarations(shader_list[i]->ir, + insertion_point, true, linked); + } + + /* Check if any shader needs built-in functions. */ + bool need_builtins = false; + for (unsigned i = 0; i < num_shaders; i++) { + if (shader_list[i]->uses_builtin_functions) { + need_builtins = true; + break; + } + } + + bool ok; + if (need_builtins) { + /* Make a temporary array one larger than shader_list, which will hold + * the built-in function shader as well. + */ + gl_shader **linking_shaders = (gl_shader **) + calloc(num_shaders + 1, sizeof(gl_shader *)); + + ok = linking_shaders != NULL; + + if (ok) { + memcpy(linking_shaders, shader_list, num_shaders * sizeof(gl_shader *)); + linking_shaders[num_shaders] = _mesa_glsl_get_builtin_function_shader(); + + ok = link_function_calls(prog, linked, linking_shaders, num_shaders + 1); + + free(linking_shaders); + } else { + _mesa_error_no_memory(__func__); + } + } else { + ok = link_function_calls(prog, linked, shader_list, num_shaders); + } + + + if (!ok) { + _mesa_delete_shader(ctx, linked); + return NULL; + } + + /* At this point linked should contain all of the linked IR, so + * validate it to make sure nothing went wrong. + */ + validate_ir_tree(linked->ir); + + /* Set the size of geometry shader input arrays */ + if (linked->Stage == MESA_SHADER_GEOMETRY) { + unsigned num_vertices = vertices_per_prim(prog->Geom.InputType); + geom_array_resize_visitor input_resize_visitor(num_vertices, prog); + foreach_in_list(ir_instruction, ir, linked->ir) { + ir->accept(&input_resize_visitor); + } + } + + if (ctx->Const.VertexID_is_zero_based) + lower_vertex_id(linked); + + /* Validate correct usage of barrier() in the tess control shader */ + if (linked->Stage == MESA_SHADER_TESS_CTRL) { + barrier_use_visitor visitor(prog); + foreach_in_list(ir_instruction, ir, linked->ir) { + ir->accept(&visitor); + } + } + + /* Make a pass over all variable declarations to ensure that arrays with + * unspecified sizes have a size specified. The size is inferred from the + * max_array_access field. + */ + array_sizing_visitor v; + v.run(linked->ir); + v.fixup_unnamed_interface_types(); + + return linked; +} + +/** + * Update the sizes of linked shader uniform arrays to the maximum + * array index used. + * + * From page 81 (page 95 of the PDF) of the OpenGL 2.1 spec: + * + * If one or more elements of an array are active, + * GetActiveUniform will return the name of the array in name, + * subject to the restrictions listed above. The type of the array + * is returned in type. The size parameter contains the highest + * array element index used, plus one. The compiler or linker + * determines the highest index used. There will be only one + * active uniform reported by the GL per uniform array. + + */ +static void +update_array_sizes(struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_uniform) || + !var->type->is_array()) + continue; + + /* GL_ARB_uniform_buffer_object says that std140 uniforms + * will not be eliminated. Since we always do std140, just + * don't resize arrays in UBOs. + * + * Atomic counters are supposed to get deterministic + * locations assigned based on the declaration ordering and + * sizes, array compaction would mess that up. + * + * Subroutine uniforms are not removed. + */ + if (var->is_in_buffer_block() || var->type->contains_atomic() || + var->type->contains_subroutine()) + continue; + + unsigned int size = var->data.max_array_access; + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + if (prog->_LinkedShaders[j] == NULL) + continue; + + foreach_in_list(ir_instruction, node2, prog->_LinkedShaders[j]->ir) { + ir_variable *other_var = node2->as_variable(); + if (!other_var) + continue; + + if (strcmp(var->name, other_var->name) == 0 && + other_var->data.max_array_access > size) { + size = other_var->data.max_array_access; + } + } + } + + if (size + 1 != var->type->length) { + /* If this is a built-in uniform (i.e., it's backed by some + * fixed-function state), adjust the number of state slots to + * match the new array size. The number of slots per array entry + * is not known. It seems safe to assume that the total number of + * slots is an integer multiple of the number of array elements. + * Determine the number of slots per array element by dividing by + * the old (total) size. + */ + const unsigned num_slots = var->get_num_state_slots(); + if (num_slots > 0) { + var->set_num_state_slots((size + 1) + * (num_slots / var->type->length)); + } + + var->type = glsl_type::get_array_instance(var->type->fields.array, + size + 1); + /* FINISHME: We should update the types of array + * dereferences of this variable now. + */ + } + } + } +} + +/** + * Resize tessellation evaluation per-vertex inputs to the size of + * tessellation control per-vertex outputs. + */ +static void +resize_tes_inputs(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + if (prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] == NULL) + return; + + gl_shader *const tcs = prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]; + gl_shader *const tes = prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; + + /* If no control shader is present, then the TES inputs are statically + * sized to MaxPatchVertices; the actual size of the arrays won't be + * known until draw time. + */ + const int num_vertices = tcs + ? tcs->TessCtrl.VerticesOut + : ctx->Const.MaxPatchVertices; + + tess_eval_array_resize_visitor input_resize_visitor(num_vertices, prog); + foreach_in_list(ir_instruction, ir, tes->ir) { + ir->accept(&input_resize_visitor); + } + + if (tcs) { + /* Convert the gl_PatchVerticesIn system value into a constant, since + * the value is known at this point. + */ + foreach_in_list(ir_instruction, ir, tes->ir) { + ir_variable *var = ir->as_variable(); + if (var && var->data.mode == ir_var_system_value && + var->data.location == SYSTEM_VALUE_VERTICES_IN) { + void *mem_ctx = ralloc_parent(var); + var->data.mode = ir_var_auto; + var->data.location = 0; + var->constant_value = new(mem_ctx) ir_constant(num_vertices); + } + } + } +} + +/** + * Find a contiguous set of available bits in a bitmask. + * + * \param used_mask Bits representing used (1) and unused (0) locations + * \param needed_count Number of contiguous bits needed. + * + * \return + * Base location of the available bits on success or -1 on failure. + */ +int +find_available_slots(unsigned used_mask, unsigned needed_count) +{ + unsigned needed_mask = (1 << needed_count) - 1; + const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count; + + /* The comparison to 32 is redundant, but without it GCC emits "warning: + * cannot optimize possibly infinite loops" for the loop below. + */ + if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32)) + return -1; + + for (int i = 0; i <= max_bit_to_test; i++) { + if ((needed_mask & ~used_mask) == needed_mask) + return i; + + needed_mask <<= 1; + } + + return -1; +} + + +/** + * Assign locations for either VS inputs or FS outputs + * + * \param prog Shader program whose variables need locations assigned + * \param constants Driver specific constant values for the program. + * \param target_index Selector for the program target to receive location + * assignmnets. Must be either \c MESA_SHADER_VERTEX or + * \c MESA_SHADER_FRAGMENT. + * + * \return + * If locations are successfully assigned, true is returned. Otherwise an + * error is emitted to the shader link log and false is returned. + */ +bool +assign_attribute_or_color_locations(gl_shader_program *prog, + struct gl_constants *constants, + unsigned target_index) +{ + /* Maximum number of generic locations. This corresponds to either the + * maximum number of draw buffers or the maximum number of generic + * attributes. + */ + unsigned max_index = (target_index == MESA_SHADER_VERTEX) ? + constants->Program[target_index].MaxAttribs : + MAX2(constants->MaxDrawBuffers, constants->MaxDualSourceDrawBuffers); + + /* Mark invalid locations as being used. + */ + unsigned used_locations = (max_index >= 32) + ? ~0 : ~((1 << max_index) - 1); + unsigned double_storage_locations = 0; + + assert((target_index == MESA_SHADER_VERTEX) + || (target_index == MESA_SHADER_FRAGMENT)); + + gl_shader *const sh = prog->_LinkedShaders[target_index]; + if (sh == NULL) + return true; + + /* Operate in a total of four passes. + * + * 1. Invalidate the location assignments for all vertex shader inputs. + * + * 2. Assign locations for inputs that have user-defined (via + * glBindVertexAttribLocation) locations and outputs that have + * user-defined locations (via glBindFragDataLocation). + * + * 3. Sort the attributes without assigned locations by number of slots + * required in decreasing order. Fragmentation caused by attribute + * locations assigned by the application may prevent large attributes + * from having enough contiguous space. + * + * 4. Assign locations to any inputs without assigned locations. + */ + + const int generic_base = (target_index == MESA_SHADER_VERTEX) + ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0; + + const enum ir_variable_mode direction = + (target_index == MESA_SHADER_VERTEX) + ? ir_var_shader_in : ir_var_shader_out; + + + /* Temporary storage for the set of attributes that need locations assigned. + */ + struct temp_attr { + unsigned slots; + ir_variable *var; + + /* Used below in the call to qsort. */ + static int compare(const void *a, const void *b) + { + const temp_attr *const l = (const temp_attr *) a; + const temp_attr *const r = (const temp_attr *) b; + + /* Reversed because we want a descending order sort below. */ + return r->slots - l->slots; + } + } to_assign[16]; + + unsigned num_attr = 0; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != (unsigned) direction)) + continue; + + if (var->data.explicit_location) { + var->data.is_unmatched_generic_inout = 0; + if ((var->data.location >= (int)(max_index + generic_base)) + || (var->data.location < 0)) { + linker_error(prog, + "invalid explicit location %d specified for `%s'\n", + (var->data.location < 0) + ? var->data.location + : var->data.location - generic_base, + var->name); + return false; + } + } else if (target_index == MESA_SHADER_VERTEX) { + unsigned binding; + + if (prog->AttributeBindings->get(binding, var->name)) { + assert(binding >= VERT_ATTRIB_GENERIC0); + var->data.location = binding; + var->data.is_unmatched_generic_inout = 0; + } + } else if (target_index == MESA_SHADER_FRAGMENT) { + unsigned binding; + unsigned index; + + if (prog->FragDataBindings->get(binding, var->name)) { + assert(binding >= FRAG_RESULT_DATA0); + var->data.location = binding; + var->data.is_unmatched_generic_inout = 0; + + if (prog->FragDataIndexBindings->get(index, var->name)) { + var->data.index = index; + } + } + } + + /* From GL4.5 core spec, section 15.2 (Shader Execution): + * + * "Output binding assignments will cause LinkProgram to fail: + * ... + * If the program has an active output assigned to a location greater + * than or equal to the value of MAX_DUAL_SOURCE_DRAW_BUFFERS and has + * an active output assigned an index greater than or equal to one;" + */ + if (target_index == MESA_SHADER_FRAGMENT && var->data.index >= 1 && + var->data.location - generic_base >= + (int) constants->MaxDualSourceDrawBuffers) { + linker_error(prog, + "output location %d >= GL_MAX_DUAL_SOURCE_DRAW_BUFFERS " + "with index %u for %s\n", + var->data.location - generic_base, var->data.index, + var->name); + return false; + } + + const unsigned slots = var->type->count_attribute_slots(target_index == MESA_SHADER_VERTEX ? true : false); + + /* If the variable is not a built-in and has a location statically + * assigned in the shader (presumably via a layout qualifier), make sure + * that it doesn't collide with other assigned locations. Otherwise, + * add it to the list of variables that need linker-assigned locations. + */ + if (var->data.location != -1) { + if (var->data.location >= generic_base && var->data.index < 1) { + /* From page 61 of the OpenGL 4.0 spec: + * + * "LinkProgram will fail if the attribute bindings assigned + * by BindAttribLocation do not leave not enough space to + * assign a location for an active matrix attribute or an + * active attribute array, both of which require multiple + * contiguous generic attributes." + * + * I think above text prohibits the aliasing of explicit and + * automatic assignments. But, aliasing is allowed in manual + * assignments of attribute locations. See below comments for + * the details. + * + * From OpenGL 4.0 spec, page 61: + * + * "It is possible for an application to bind more than one + * attribute name to the same location. This is referred to as + * aliasing. This will only work if only one of the aliased + * attributes is active in the executable program, or if no + * path through the shader consumes more than one attribute of + * a set of attributes aliased to the same location. A link + * error can occur if the linker determines that every path + * through the shader consumes multiple aliased attributes, + * but implementations are not required to generate an error + * in this case." + * + * From GLSL 4.30 spec, page 54: + * + * "A program will fail to link if any two non-vertex shader + * input variables are assigned to the same location. For + * vertex shaders, multiple input variables may be assigned + * to the same location using either layout qualifiers or via + * the OpenGL API. However, such aliasing is intended only to + * support vertex shaders where each execution path accesses + * at most one input per each location. Implementations are + * permitted, but not required, to generate link-time errors + * if they detect that every path through the vertex shader + * executable accesses multiple inputs assigned to any single + * location. For all shader types, a program will fail to link + * if explicit location assignments leave the linker unable + * to find space for other variables without explicit + * assignments." + * + * From OpenGL ES 3.0 spec, page 56: + * + * "Binding more than one attribute name to the same location + * is referred to as aliasing, and is not permitted in OpenGL + * ES Shading Language 3.00 vertex shaders. LinkProgram will + * fail when this condition exists. However, aliasing is + * possible in OpenGL ES Shading Language 1.00 vertex shaders. + * This will only work if only one of the aliased attributes + * is active in the executable program, or if no path through + * the shader consumes more than one attribute of a set of + * attributes aliased to the same location. A link error can + * occur if the linker determines that every path through the + * shader consumes multiple aliased attributes, but implemen- + * tations are not required to generate an error in this case." + * + * After looking at above references from OpenGL, OpenGL ES and + * GLSL specifications, we allow aliasing of vertex input variables + * in: OpenGL 2.0 (and above) and OpenGL ES 2.0. + * + * NOTE: This is not required by the spec but its worth mentioning + * here that we're not doing anything to make sure that no path + * through the vertex shader executable accesses multiple inputs + * assigned to any single location. + */ + + /* Mask representing the contiguous slots that will be used by + * this attribute. + */ + const unsigned attr = var->data.location - generic_base; + const unsigned use_mask = (1 << slots) - 1; + const char *const string = (target_index == MESA_SHADER_VERTEX) + ? "vertex shader input" : "fragment shader output"; + + /* Generate a link error if the requested locations for this + * attribute exceed the maximum allowed attribute location. + */ + if (attr + slots > max_index) { + linker_error(prog, + "insufficient contiguous locations " + "available for %s `%s' %d %d %d\n", string, + var->name, used_locations, use_mask, attr); + return false; + } + + /* Generate a link error if the set of bits requested for this + * attribute overlaps any previously allocated bits. + */ + if ((~(use_mask << attr) & used_locations) != used_locations) { + if (target_index == MESA_SHADER_FRAGMENT || + (prog->IsES && prog->Version >= 300)) { + linker_error(prog, + "overlapping location is assigned " + "to %s `%s' %d %d %d\n", string, + var->name, used_locations, use_mask, attr); + return false; + } else { + linker_warning(prog, + "overlapping location is assigned " + "to %s `%s' %d %d %d\n", string, + var->name, used_locations, use_mask, attr); + } + } + + used_locations |= (use_mask << attr); + + /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes): + * + * "A program with more than the value of MAX_VERTEX_ATTRIBS + * active attribute variables may fail to link, unless + * device-dependent optimizations are able to make the program + * fit within available hardware resources. For the purposes + * of this test, attribute variables of the type dvec3, dvec4, + * dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may + * count as consuming twice as many attributes as equivalent + * single-precision types. While these types use the same number + * of generic attributes as their single-precision equivalents, + * implementations are permitted to consume two single-precision + * vectors of internal storage for each three- or four-component + * double-precision vector." + * + * Mark this attribute slot as taking up twice as much space + * so we can count it properly against limits. According to + * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this + * is optional behavior, but it seems preferable. + */ + if (var->type->without_array()->is_dual_slot_double()) + double_storage_locations |= (use_mask << attr); + } + + continue; + } + + to_assign[num_attr].slots = slots; + to_assign[num_attr].var = var; + num_attr++; + } + + if (target_index == MESA_SHADER_VERTEX) { + unsigned total_attribs_size = + _mesa_bitcount(used_locations & ((1 << max_index) - 1)) + + _mesa_bitcount(double_storage_locations); + if (total_attribs_size > max_index) { + linker_error(prog, + "attempt to use %d vertex attribute slots only %d available ", + total_attribs_size, max_index); + return false; + } + } + + /* If all of the attributes were assigned locations by the application (or + * are built-in attributes with fixed locations), return early. This should + * be the common case. + */ + if (num_attr == 0) + return true; + + qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare); + + if (target_index == MESA_SHADER_VERTEX) { + /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS. It can + * only be explicitly assigned by via glBindAttribLocation. Mark it as + * reserved to prevent it from being automatically allocated below. + */ + find_deref_visitor find("gl_Vertex"); + find.run(sh->ir); + if (find.variable_found()) + used_locations |= (1 << 0); + } + + for (unsigned i = 0; i < num_attr; i++) { + /* Mask representing the contiguous slots that will be used by this + * attribute. + */ + const unsigned use_mask = (1 << to_assign[i].slots) - 1; + + int location = find_available_slots(used_locations, to_assign[i].slots); + + if (location < 0) { + const char *const string = (target_index == MESA_SHADER_VERTEX) + ? "vertex shader input" : "fragment shader output"; + + linker_error(prog, + "insufficient contiguous locations " + "available for %s `%s'\n", + string, to_assign[i].var->name); + return false; + } + + to_assign[i].var->data.location = generic_base + location; + to_assign[i].var->data.is_unmatched_generic_inout = 0; + used_locations |= (use_mask << location); + } + + return true; +} + +/** + * Match explicit locations of outputs to inputs and deactivate the + * unmatch flag if found so we don't optimise them away. + */ +static void +match_explicit_outputs_to_inputs(struct gl_shader_program *prog, + gl_shader *producer, + gl_shader *consumer) +{ + glsl_symbol_table parameters; + ir_variable *explicit_locations[MAX_VARYING] = { NULL }; + + /* Find all shader outputs in the "producer" stage. + */ + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_shader_out)) + continue; + + if (var->data.explicit_location && + var->data.location >= VARYING_SLOT_VAR0) { + const unsigned idx = var->data.location - VARYING_SLOT_VAR0; + if (explicit_locations[idx] == NULL) + explicit_locations[idx] = var; + } + } + + /* Match inputs to outputs */ + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const input = node->as_variable(); + + if ((input == NULL) || (input->data.mode != ir_var_shader_in)) + continue; + + ir_variable *output = NULL; + if (input->data.explicit_location + && input->data.location >= VARYING_SLOT_VAR0) { + output = explicit_locations[input->data.location - VARYING_SLOT_VAR0]; + + if (output != NULL){ + input->data.is_unmatched_generic_inout = 0; + output->data.is_unmatched_generic_inout = 0; + } + } + } +} + +/** + * Store the gl_FragDepth layout in the gl_shader_program struct. + */ +static void +store_fragdepth_layout(struct gl_shader_program *prog) +{ + if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) { + return; + } + + struct exec_list *ir = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->ir; + + /* We don't look up the gl_FragDepth symbol directly because if + * gl_FragDepth is not used in the shader, it's removed from the IR. + * However, the symbol won't be removed from the symbol table. + * + * We're only interested in the cases where the variable is NOT removed + * from the IR. + */ + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != ir_var_shader_out) { + continue; + } + + if (strcmp(var->name, "gl_FragDepth") == 0) { + switch (var->data.depth_layout) { + case ir_depth_layout_none: + prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; + return; + case ir_depth_layout_any: + prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; + return; + case ir_depth_layout_greater: + prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; + return; + case ir_depth_layout_less: + prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; + return; + case ir_depth_layout_unchanged: + prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; + return; + default: + assert(0); + return; + } + } + } +} + +/** + * Validate the resources used by a program versus the implementation limits + */ +static void +check_resources(struct gl_context *ctx, struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh == NULL) + continue; + + if (sh->num_samplers > ctx->Const.Program[i].MaxTextureImageUnits) { + linker_error(prog, "Too many %s shader texture samplers\n", + _mesa_shader_stage_to_string(i)); + } + + if (sh->num_uniform_components > + ctx->Const.Program[i].MaxUniformComponents) { + if (ctx->Const.GLSLSkipStrictMaxUniformLimitCheck) { + linker_warning(prog, "Too many %s shader default uniform block " + "components, but the driver will try to optimize " + "them out; this is non-portable out-of-spec " + "behavior\n", + _mesa_shader_stage_to_string(i)); + } else { + linker_error(prog, "Too many %s shader default uniform block " + "components\n", + _mesa_shader_stage_to_string(i)); + } + } + + if (sh->num_combined_uniform_components > + ctx->Const.Program[i].MaxCombinedUniformComponents) { + if (ctx->Const.GLSLSkipStrictMaxUniformLimitCheck) { + linker_warning(prog, "Too many %s shader uniform components, " + "but the driver will try to optimize them out; " + "this is non-portable out-of-spec behavior\n", + _mesa_shader_stage_to_string(i)); + } else { + linker_error(prog, "Too many %s shader uniform components\n", + _mesa_shader_stage_to_string(i)); + } + } + } + + unsigned blocks[MESA_SHADER_STAGES] = {0}; + unsigned total_uniform_blocks = 0; + unsigned shader_blocks[MESA_SHADER_STAGES] = {0}; + unsigned total_shader_storage_blocks = 0; + + for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { + /* Don't check SSBOs for Uniform Block Size */ + if (!prog->BufferInterfaceBlocks[i].IsShaderStorage && + prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) { + linker_error(prog, "Uniform block %s too big (%d/%d)\n", + prog->BufferInterfaceBlocks[i].Name, + prog->BufferInterfaceBlocks[i].UniformBufferSize, + ctx->Const.MaxUniformBlockSize); + } + + if (prog->BufferInterfaceBlocks[i].IsShaderStorage && + prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) { + linker_error(prog, "Shader storage block %s too big (%d/%d)\n", + prog->BufferInterfaceBlocks[i].Name, + prog->BufferInterfaceBlocks[i].UniformBufferSize, + ctx->Const.MaxShaderStorageBlockSize); + } + + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + if (prog->InterfaceBlockStageIndex[j][i] != -1) { + struct gl_shader *sh = prog->_LinkedShaders[j]; + int stage_index = prog->InterfaceBlockStageIndex[j][i]; + if (sh && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) { + shader_blocks[j]++; + total_shader_storage_blocks++; + } else { + blocks[j]++; + total_uniform_blocks++; + } + } + } + + if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) { + linker_error(prog, "Too many combined uniform blocks (%d/%d)\n", + total_uniform_blocks, + ctx->Const.MaxCombinedUniformBlocks); + } else { + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + const unsigned max_uniform_blocks = + ctx->Const.Program[i].MaxUniformBlocks; + if (blocks[i] > max_uniform_blocks) { + linker_error(prog, "Too many %s uniform blocks (%d/%d)\n", + _mesa_shader_stage_to_string(i), + blocks[i], + max_uniform_blocks); + break; + } + } + } + + if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) { + linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n", + total_shader_storage_blocks, + ctx->Const.MaxCombinedShaderStorageBlocks); + } else { + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + const unsigned max_shader_storage_blocks = + ctx->Const.Program[i].MaxShaderStorageBlocks; + if (shader_blocks[i] > max_shader_storage_blocks) { + linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n", + _mesa_shader_stage_to_string(i), + shader_blocks[i], + max_shader_storage_blocks); + break; + } + } + } + } +} + +static void +link_calculate_subroutine_compat(struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + int count; + if (!sh) + continue; + + for (unsigned j = 0; j < sh->NumSubroutineUniformRemapTable; j++) { + struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[j]; + + if (!uni) + continue; + + count = 0; + for (unsigned f = 0; f < sh->NumSubroutineFunctions; f++) { + struct gl_subroutine_function *fn = &sh->SubroutineFunctions[f]; + for (int k = 0; k < fn->num_compat_types; k++) { + if (fn->types[k] == uni->type) { + count++; + break; + } + } + } + uni->num_compatible_subroutines = count; + } + } +} + +static void +check_subroutine_resources(struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh) { + if (sh->NumSubroutineUniformRemapTable > MAX_SUBROUTINE_UNIFORM_LOCATIONS) + linker_error(prog, "Too many %s shader subroutine uniforms\n", + _mesa_shader_stage_to_string(i)); + } + } +} +/** + * Validate shader image resources. + */ +static void +check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog) +{ + unsigned total_image_units = 0; + unsigned fragment_outputs = 0; + unsigned total_shader_storage_blocks = 0; + + if (!ctx->Extensions.ARB_shader_image_load_store) + return; + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh) { + if (sh->NumImages > ctx->Const.Program[i].MaxImageUniforms) + linker_error(prog, "Too many %s shader image uniforms (%u > %u)\n", + _mesa_shader_stage_to_string(i), sh->NumImages, + ctx->Const.Program[i].MaxImageUniforms); + + total_image_units += sh->NumImages; + + for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) { + int stage_index = prog->InterfaceBlockStageIndex[i][j]; + if (stage_index != -1 && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) + total_shader_storage_blocks++; + } + + if (i == MESA_SHADER_FRAGMENT) { + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + if (var && var->data.mode == ir_var_shader_out) + /* since there are no double fs outputs - pass false */ + fragment_outputs += var->type->count_attribute_slots(false); + } + } + } + } + + if (total_image_units > ctx->Const.MaxCombinedImageUniforms) + linker_error(prog, "Too many combined image uniforms\n"); + + if (total_image_units + fragment_outputs + total_shader_storage_blocks > + ctx->Const.MaxCombinedShaderOutputResources) + linker_error(prog, "Too many combined image uniforms, shader storage " + " buffers and fragment outputs\n"); +} + + +/** + * Initializes explicit location slots to INACTIVE_UNIFORM_EXPLICIT_LOCATION + * for a variable, checks for overlaps between other uniforms using explicit + * locations. + */ +static bool +reserve_explicit_locations(struct gl_shader_program *prog, + string_to_uint_map *map, ir_variable *var) +{ + unsigned slots = var->type->uniform_locations(); + unsigned max_loc = var->data.location + slots - 1; + + /* Resize remap table if locations do not fit in the current one. */ + if (max_loc + 1 > prog->NumUniformRemapTable) { + prog->UniformRemapTable = + reralloc(prog, prog->UniformRemapTable, + gl_uniform_storage *, + max_loc + 1); + + if (!prog->UniformRemapTable) { + linker_error(prog, "Out of memory during linking.\n"); + return false; + } + + /* Initialize allocated space. */ + for (unsigned i = prog->NumUniformRemapTable; i < max_loc + 1; i++) + prog->UniformRemapTable[i] = NULL; + + prog->NumUniformRemapTable = max_loc + 1; + } + + for (unsigned i = 0; i < slots; i++) { + unsigned loc = var->data.location + i; + + /* Check if location is already used. */ + if (prog->UniformRemapTable[loc] == INACTIVE_UNIFORM_EXPLICIT_LOCATION) { + + /* Possibly same uniform from a different stage, this is ok. */ + unsigned hash_loc; + if (map->get(hash_loc, var->name) && hash_loc == loc - i) + continue; + + /* ARB_explicit_uniform_location specification states: + * + * "No two default-block uniform variables in the program can have + * the same location, even if they are unused, otherwise a compiler + * or linker error will be generated." + */ + linker_error(prog, + "location qualifier for uniform %s overlaps " + "previously used location\n", + var->name); + return false; + } + + /* Initialize location as inactive before optimization + * rounds and location assignment. + */ + prog->UniformRemapTable[loc] = INACTIVE_UNIFORM_EXPLICIT_LOCATION; + } + + /* Note, base location used for arrays. */ + map->put(var->data.location, var->name); + + return true; +} + +static bool +reserve_subroutine_explicit_locations(struct gl_shader_program *prog, + struct gl_shader *sh, + ir_variable *var) +{ + unsigned slots = var->type->uniform_locations(); + unsigned max_loc = var->data.location + slots - 1; + + /* Resize remap table if locations do not fit in the current one. */ + if (max_loc + 1 > sh->NumSubroutineUniformRemapTable) { + sh->SubroutineUniformRemapTable = + reralloc(sh, sh->SubroutineUniformRemapTable, + gl_uniform_storage *, + max_loc + 1); + + if (!sh->SubroutineUniformRemapTable) { + linker_error(prog, "Out of memory during linking.\n"); + return false; + } + + /* Initialize allocated space. */ + for (unsigned i = sh->NumSubroutineUniformRemapTable; i < max_loc + 1; i++) + sh->SubroutineUniformRemapTable[i] = NULL; + + sh->NumSubroutineUniformRemapTable = max_loc + 1; + } + + for (unsigned i = 0; i < slots; i++) { + unsigned loc = var->data.location + i; + + /* Check if location is already used. */ + if (sh->SubroutineUniformRemapTable[loc] == INACTIVE_UNIFORM_EXPLICIT_LOCATION) { + + /* ARB_explicit_uniform_location specification states: + * "No two subroutine uniform variables can have the same location + * in the same shader stage, otherwise a compiler or linker error + * will be generated." + */ + linker_error(prog, + "location qualifier for uniform %s overlaps " + "previously used location\n", + var->name); + return false; + } + + /* Initialize location as inactive before optimization + * rounds and location assignment. + */ + sh->SubroutineUniformRemapTable[loc] = INACTIVE_UNIFORM_EXPLICIT_LOCATION; + } + + return true; +} +/** + * Check and reserve all explicit uniform locations, called before + * any optimizations happen to handle also inactive uniforms and + * inactive array elements that may get trimmed away. + */ +static void +check_explicit_uniform_locations(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + if (!ctx->Extensions.ARB_explicit_uniform_location) + return; + + /* This map is used to detect if overlapping explicit locations + * occur with the same uniform (from different stage) or a different one. + */ + string_to_uint_map *uniform_map = new string_to_uint_map; + + if (!uniform_map) { + linker_error(prog, "Out of memory during linking.\n"); + return; + } + + unsigned entries_total = 0; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (!sh) + continue; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + if (!var || var->data.mode != ir_var_uniform) + continue; + + entries_total += var->type->uniform_locations(); + + if (var->data.explicit_location) { + bool ret; + if (var->type->without_array()->is_subroutine()) + ret = reserve_subroutine_explicit_locations(prog, sh, var); + else + ret = reserve_explicit_locations(prog, uniform_map, var); + if (!ret) { + delete uniform_map; + return; + } + } + } + } + + /* Verify that total amount of entries for explicit and implicit locations + * is less than MAX_UNIFORM_LOCATIONS. + */ + if (entries_total >= ctx->Const.MaxUserAssignableUniformLocations) { + linker_error(prog, "count of uniform locations >= MAX_UNIFORM_LOCATIONS" + "(%u >= %u)", entries_total, + ctx->Const.MaxUserAssignableUniformLocations); + } + delete uniform_map; +} + +static bool +should_add_buffer_variable(struct gl_shader_program *shProg, + GLenum type, const char *name) +{ + bool found_interface = false; + unsigned block_name_len = 0; + const char *block_name_dot = strchr(name, '.'); + + /* These rules only apply to buffer variables. So we return + * true for the rest of types. + */ + if (type != GL_BUFFER_VARIABLE) + return true; + + for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { + const char *block_name = shProg->BufferInterfaceBlocks[i].Name; + block_name_len = strlen(block_name); + + const char *block_square_bracket = strchr(block_name, '['); + if (block_square_bracket) { + /* The block is part of an array of named interfaces, + * for the name comparison we ignore the "[x]" part. + */ + block_name_len -= strlen(block_square_bracket); + } + + if (block_name_dot) { + /* Check if the variable name starts with the interface + * name. The interface name (if present) should have the + * length than the interface block name we are comparing to. + */ + unsigned len = strlen(name) - strlen(block_name_dot); + if (len != block_name_len) + continue; + } + + if (strncmp(block_name, name, block_name_len) == 0) { + found_interface = true; + break; + } + } + + /* We remove the interface name from the buffer variable name, + * including the dot that follows it. + */ + if (found_interface) + name = name + block_name_len + 1; + + /* From: ARB_program_interface_query extension: + * + * "For an active shader storage block member declared as an array, an + * entry will be generated only for the first array element, regardless + * of its type. For arrays of aggregate types, the enumeration rules are + * applied recursively for the single enumerated array element. + */ + const char *struct_first_dot = strchr(name, '.'); + const char *first_square_bracket = strchr(name, '['); + + /* The buffer variable is on top level and it is not an array */ + if (!first_square_bracket) { + return true; + /* The shader storage block member is a struct, then generate the entry */ + } else if (struct_first_dot && struct_first_dot < first_square_bracket) { + return true; + } else { + /* Shader storage block member is an array, only generate an entry for the + * first array element. + */ + if (strncmp(first_square_bracket, "[0]", 3) == 0) + return true; + } + + return false; +} + +static bool +add_program_resource(struct gl_shader_program *prog, GLenum type, + const void *data, uint8_t stages) +{ + assert(data); + + /* If resource already exists, do not add it again. */ + for (unsigned i = 0; i < prog->NumProgramResourceList; i++) + if (prog->ProgramResourceList[i].Data == data) + return true; + + prog->ProgramResourceList = + reralloc(prog, + prog->ProgramResourceList, + gl_program_resource, + prog->NumProgramResourceList + 1); + + if (!prog->ProgramResourceList) { + linker_error(prog, "Out of memory during linking.\n"); + return false; + } + + struct gl_program_resource *res = + &prog->ProgramResourceList[prog->NumProgramResourceList]; + + res->Type = type; + res->Data = data; + res->StageReferences = stages; + + prog->NumProgramResourceList++; + + return true; +} + +/* Function checks if a variable var is a packed varying and + * if given name is part of packed varying's list. + * + * If a variable is a packed varying, it has a name like + * 'packed:a,b,c' where a, b and c are separate variables. + */ +static bool +included_in_packed_varying(ir_variable *var, const char *name) +{ + if (strncmp(var->name, "packed:", 7) != 0) + return false; + + char *list = strdup(var->name + 7); + assert(list); + + bool found = false; + char *saveptr; + char *token = strtok_r(list, ",", &saveptr); + while (token) { + if (strcmp(token, name) == 0) { + found = true; + break; + } + token = strtok_r(NULL, ",", &saveptr); + } + free(list); + return found; +} + +/** + * Function builds a stage reference bitmask from variable name. + */ +static uint8_t +build_stageref(struct gl_shader_program *shProg, const char *name, + unsigned mode) +{ + uint8_t stages = 0; + + /* Note, that we assume MAX 8 stages, if there will be more stages, type + * used for reference mask in gl_program_resource will need to be changed. + */ + assert(MESA_SHADER_STAGES < 8); + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = shProg->_LinkedShaders[i]; + if (!sh) + continue; + + /* Shader symbol table may contain variables that have + * been optimized away. Search IR for the variable instead. + */ + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + if (var) { + unsigned baselen = strlen(var->name); + + if (included_in_packed_varying(var, name)) { + stages |= (1 << i); + break; + } + + /* Type needs to match if specified, otherwise we might + * pick a variable with same name but different interface. + */ + if (var->data.mode != mode) + continue; + + if (strncmp(var->name, name, baselen) == 0) { + /* Check for exact name matches but also check for arrays and + * structs. + */ + if (name[baselen] == '\0' || + name[baselen] == '[' || + name[baselen] == '.') { + stages |= (1 << i); + break; + } + } + } + } + } + return stages; +} + +/** + * Create gl_shader_variable from ir_variable class. + */ +static gl_shader_variable * +create_shader_variable(struct gl_shader_program *shProg, const ir_variable *in) +{ + gl_shader_variable *out = ralloc(shProg, struct gl_shader_variable); + if (!out) + return NULL; + + out->type = in->type; + out->name = ralloc_strdup(shProg, in->name); + + if (!out->name) + return NULL; + + out->location = in->data.location; + out->index = in->data.index; + out->patch = in->data.patch; + out->mode = in->data.mode; + + return out; +} + +static bool +add_interface_variables(struct gl_shader_program *shProg, + exec_list *ir, GLenum programInterface) +{ + foreach_in_list(ir_instruction, node, ir) { + ir_variable *var = node->as_variable(); + uint8_t mask = 0; + + if (!var) + continue; + + switch (var->data.mode) { + /* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes): + * "For GetActiveAttrib, all active vertex shader input variables + * are enumerated, including the special built-in inputs gl_VertexID + * and gl_InstanceID." + */ + case ir_var_system_value: + if (var->data.location != SYSTEM_VALUE_VERTEX_ID && + var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE && + var->data.location != SYSTEM_VALUE_INSTANCE_ID) + continue; + /* Mark special built-in inputs referenced by the vertex stage so + * that they are considered active by the shader queries. + */ + mask = (1 << (MESA_SHADER_VERTEX)); + /* FALLTHROUGH */ + case ir_var_shader_in: + if (programInterface != GL_PROGRAM_INPUT) + continue; + break; + case ir_var_shader_out: + if (programInterface != GL_PROGRAM_OUTPUT) + continue; + break; + default: + continue; + }; + + /* Skip packed varyings, packed varyings are handled separately + * by add_packed_varyings. + */ + if (strncmp(var->name, "packed:", 7) == 0) + continue; + + /* Skip fragdata arrays, these are handled separately + * by add_fragdata_arrays. + */ + if (strncmp(var->name, "gl_out_FragData", 15) == 0) + continue; + + gl_shader_variable *sha_v = create_shader_variable(shProg, var); + if (!sha_v) + return false; + + if (!add_program_resource(shProg, programInterface, sha_v, + build_stageref(shProg, sha_v->name, + sha_v->mode) | mask)) + return false; + } + return true; +} + +static bool +add_packed_varyings(struct gl_shader_program *shProg, int stage, GLenum type) +{ + struct gl_shader *sh = shProg->_LinkedShaders[stage]; + GLenum iface; + + if (!sh || !sh->packed_varyings) + return true; + + foreach_in_list(ir_instruction, node, sh->packed_varyings) { + ir_variable *var = node->as_variable(); + if (var) { + switch (var->data.mode) { + case ir_var_shader_in: + iface = GL_PROGRAM_INPUT; + break; + case ir_var_shader_out: + iface = GL_PROGRAM_OUTPUT; + break; + default: + unreachable("unexpected type"); + } + + if (type == iface) { + gl_shader_variable *sha_v = create_shader_variable(shProg, var); + if (!sha_v) + return false; + if (!add_program_resource(shProg, iface, sha_v, + build_stageref(shProg, sha_v->name, + sha_v->mode))) + return false; + } + } + } + return true; +} + +static bool +add_fragdata_arrays(struct gl_shader_program *shProg) +{ + struct gl_shader *sh = shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + if (!sh || !sh->fragdata_arrays) + return true; + + foreach_in_list(ir_instruction, node, sh->fragdata_arrays) { + ir_variable *var = node->as_variable(); + if (var) { + assert(var->data.mode == ir_var_shader_out); + gl_shader_variable *sha_v = create_shader_variable(shProg, var); + if (!sha_v) + return false; + if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, sha_v, + 1 << MESA_SHADER_FRAGMENT)) + return false; + } + } + return true; +} + +static char* +get_top_level_name(const char *name) +{ + const char *first_dot = strchr(name, '.'); + const char *first_square_bracket = strchr(name, '['); + int name_size = 0; + /* From ARB_program_interface_query spec: + * + * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the + * number of active array elements of the top-level shader storage block + * member containing to the active variable is written to <params>. If the + * top-level block member is not declared as an array, the value one is + * written to <params>. If the top-level block member is an array with no + * declared size, the value zero is written to <params>. + */ + + /* The buffer variable is on top level.*/ + if (!first_square_bracket && !first_dot) + name_size = strlen(name); + else if ((!first_square_bracket || + (first_dot && first_dot < first_square_bracket))) + name_size = first_dot - name; + else + name_size = first_square_bracket - name; + + return strndup(name, name_size); +} + +static char* +get_var_name(const char *name) +{ + const char *first_dot = strchr(name, '.'); + + if (!first_dot) + return strdup(name); + + return strndup(first_dot+1, strlen(first_dot) - 1); +} + +static bool +is_top_level_shader_storage_block_member(const char* name, + const char* interface_name, + const char* field_name) +{ + bool result = false; + + /* If the given variable is already a top-level shader storage + * block member, then return array_size = 1. + * We could have two possibilities: if we have an instanced + * shader storage block or not instanced. + * + * For the first, we check create a name as it was in top level and + * compare it with the real name. If they are the same, then + * the variable is already at top-level. + * + * Full instanced name is: interface name + '.' + var name + + * NULL character + */ + int name_length = strlen(interface_name) + 1 + strlen(field_name) + 1; + char *full_instanced_name = (char *) calloc(name_length, sizeof(char)); + if (!full_instanced_name) { + fprintf(stderr, "%s: Cannot allocate space for name\n", __func__); + return false; + } + + snprintf(full_instanced_name, name_length, "%s.%s", + interface_name, field_name); + + /* Check if its top-level shader storage block member of an + * instanced interface block, or of a unnamed interface block. + */ + if (strcmp(name, full_instanced_name) == 0 || + strcmp(name, field_name) == 0) + result = true; + + free(full_instanced_name); + return result; +} + +static int +get_array_size(struct gl_uniform_storage *uni, const glsl_struct_field *field, + char *interface_name, char *var_name) +{ + /* From GL_ARB_program_interface_query spec: + * + * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer + * identifying the number of active array elements of the top-level + * shader storage block member containing to the active variable is + * written to <params>. If the top-level block member is not + * declared as an array, the value one is written to <params>. If + * the top-level block member is an array with no declared size, + * the value zero is written to <params>. + */ + if (is_top_level_shader_storage_block_member(uni->name, + interface_name, + var_name)) + return 1; + else if (field->type->is_unsized_array()) + return 0; + else if (field->type->is_array()) + return field->type->length; + + return 1; +} + +static int +get_array_stride(struct gl_uniform_storage *uni, const glsl_type *interface, + const glsl_struct_field *field, char *interface_name, + char *var_name) +{ + /* From GL_ARB_program_interface_query: + * + * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer + * identifying the stride between array elements of the top-level + * shader storage block member containing the active variable is + * written to <params>. For top-level block members declared as + * arrays, the value written is the difference, in basic machine + * units, between the offsets of the active variable for + * consecutive elements in the top-level array. For top-level + * block members not declared as an array, zero is written to + * <params>." + */ + if (field->type->is_array()) { + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(field->matrix_layout); + bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; + const glsl_type *array_type = field->type->fields.array; + + if (is_top_level_shader_storage_block_member(uni->name, + interface_name, + var_name)) + return 0; + + if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) { + if (array_type->is_record() || array_type->is_array()) + return glsl_align(array_type->std140_size(row_major), 16); + else + return MAX2(array_type->std140_base_alignment(row_major), 16); + } else { + return array_type->std430_array_stride(row_major); + } + } + return 0; +} + +static void +calculate_array_size_and_stride(struct gl_shader_program *shProg, + struct gl_uniform_storage *uni) +{ + int block_index = uni->block_index; + int array_size = -1; + int array_stride = -1; + char *var_name = get_top_level_name(uni->name); + char *interface_name = + get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name); + + if (strcmp(var_name, interface_name) == 0) { + /* Deal with instanced array of SSBOs */ + char *temp_name = get_var_name(uni->name); + if (!temp_name) { + linker_error(shProg, "Out of memory during linking.\n"); + goto write_top_level_array_size_and_stride; + } + free(var_name); + var_name = get_top_level_name(temp_name); + free(temp_name); + if (!var_name) { + linker_error(shProg, "Out of memory during linking.\n"); + goto write_top_level_array_size_and_stride; + } + } + + for (unsigned i = 0; i < shProg->NumShaders; i++) { + if (shProg->Shaders[i] == NULL) + continue; + + const gl_shader *stage = shProg->Shaders[i]; + foreach_in_list(ir_instruction, node, stage->ir) { + ir_variable *var = node->as_variable(); + if (!var || !var->get_interface_type() || + var->data.mode != ir_var_shader_storage) + continue; + + const glsl_type *interface = var->get_interface_type(); + + if (strcmp(interface_name, interface->name) != 0) + continue; + + for (unsigned i = 0; i < interface->length; i++) { + const glsl_struct_field *field = &interface->fields.structure[i]; + if (strcmp(field->name, var_name) != 0) + continue; + + array_stride = get_array_stride(uni, interface, field, + interface_name, var_name); + array_size = get_array_size(uni, field, interface_name, var_name); + goto write_top_level_array_size_and_stride; + } + } + } +write_top_level_array_size_and_stride: + free(interface_name); + free(var_name); + uni->top_level_array_stride = array_stride; + uni->top_level_array_size = array_size; +} + +/** + * Builds up a list of program resources that point to existing + * resource data. + */ +void +build_program_resource_list(struct gl_shader_program *shProg) +{ + /* Rebuild resource list. */ + if (shProg->ProgramResourceList) { + ralloc_free(shProg->ProgramResourceList); + shProg->ProgramResourceList = NULL; + shProg->NumProgramResourceList = 0; + } + + int input_stage = MESA_SHADER_STAGES, output_stage = 0; + + /* Determine first input and final output stage. These are used to + * detect which variables should be enumerated in the resource list + * for GL_PROGRAM_INPUT and GL_PROGRAM_OUTPUT. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (!shProg->_LinkedShaders[i]) + continue; + if (input_stage == MESA_SHADER_STAGES) + input_stage = i; + output_stage = i; + } + + /* Empty shader, no resources. */ + if (input_stage == MESA_SHADER_STAGES && output_stage == 0) + return; + + /* Program interface needs to expose varyings in case of SSO. */ + if (shProg->SeparateShader) { + if (!add_packed_varyings(shProg, input_stage, GL_PROGRAM_INPUT)) + return; + + if (!add_packed_varyings(shProg, output_stage, GL_PROGRAM_OUTPUT)) + return; + } + + if (!add_fragdata_arrays(shProg)) + return; + + /* Add inputs and outputs to the resource list. */ + if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir, + GL_PROGRAM_INPUT)) + return; + + if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage]->ir, + GL_PROGRAM_OUTPUT)) + return; + + /* Add transform feedback varyings. */ + if (shProg->LinkedTransformFeedback.NumVarying > 0) { + for (int i = 0; i < shProg->LinkedTransformFeedback.NumVarying; i++) { + if (!add_program_resource(shProg, GL_TRANSFORM_FEEDBACK_VARYING, + &shProg->LinkedTransformFeedback.Varyings[i], + 0)) + return; + } + } + + /* Add uniforms from uniform storage. */ + for (unsigned i = 0; i < shProg->NumUniformStorage; i++) { + /* Do not add uniforms internally used by Mesa. */ + if (shProg->UniformStorage[i].hidden) + continue; + + uint8_t stageref = + build_stageref(shProg, shProg->UniformStorage[i].name, + ir_var_uniform); + + /* Add stagereferences for uniforms in a uniform block. */ + int block_index = shProg->UniformStorage[i].block_index; + if (block_index != -1) { + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + if (shProg->InterfaceBlockStageIndex[j][block_index] != -1) + stageref |= (1 << j); + } + } + + bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage; + GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM; + if (!should_add_buffer_variable(shProg, type, + shProg->UniformStorage[i].name)) + continue; + + if (is_shader_storage) { + calculate_array_size_and_stride(shProg, &shProg->UniformStorage[i]); + } + + if (!add_program_resource(shProg, type, + &shProg->UniformStorage[i], stageref)) + return; + } + + /* Add program uniform blocks and shader storage blocks. */ + for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { + bool is_shader_storage = shProg->BufferInterfaceBlocks[i].IsShaderStorage; + GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK; + if (!add_program_resource(shProg, type, + &shProg->BufferInterfaceBlocks[i], 0)) + return; + } + + /* Add atomic counter buffers. */ + for (unsigned i = 0; i < shProg->NumAtomicBuffers; i++) { + if (!add_program_resource(shProg, GL_ATOMIC_COUNTER_BUFFER, + &shProg->AtomicBuffers[i], 0)) + return; + } + + for (unsigned i = 0; i < shProg->NumUniformStorage; i++) { + GLenum type; + if (!shProg->UniformStorage[i].hidden) + continue; + + for (int j = MESA_SHADER_VERTEX; j < MESA_SHADER_STAGES; j++) { + if (!shProg->UniformStorage[i].opaque[j].active || + !shProg->UniformStorage[i].type->is_subroutine()) + continue; + + type = _mesa_shader_stage_to_subroutine_uniform((gl_shader_stage)j); + /* add shader subroutines */ + if (!add_program_resource(shProg, type, &shProg->UniformStorage[i], 0)) + return; + } + } + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = shProg->_LinkedShaders[i]; + GLuint type; + + if (!sh) + continue; + + type = _mesa_shader_stage_to_subroutine((gl_shader_stage)i); + for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { + if (!add_program_resource(shProg, type, &sh->SubroutineFunctions[j], 0)) + return; + } + } +} + +/** + * This check is done to make sure we allow only constant expression + * indexing and "constant-index-expression" (indexing with an expression + * that includes loop induction variable). + */ +static bool +validate_sampler_array_indexing(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + dynamic_sampler_array_indexing_visitor v; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + bool no_dynamic_indexing = + ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler; + + /* Search for array derefs in shader. */ + v.run(prog->_LinkedShaders[i]->ir); + if (v.uses_dynamic_sampler_array_indexing()) { + const char *msg = "sampler arrays indexed with non-constant " + "expressions is forbidden in GLSL %s %u"; + /* Backend has indicated that it has no dynamic indexing support. */ + if (no_dynamic_indexing) { + linker_error(prog, msg, prog->IsES ? "ES" : "", prog->Version); + return false; + } else { + linker_warning(prog, msg, prog->IsES ? "ES" : "", prog->Version); + } + } + } + return true; +} + +static void +link_assign_subroutine_types(struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh == NULL) + continue; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_function *fn = node->as_function(); + if (!fn) + continue; + + if (fn->is_subroutine) + sh->NumSubroutineUniformTypes++; + + if (!fn->num_subroutine_types) + continue; + + sh->SubroutineFunctions = reralloc(sh, sh->SubroutineFunctions, + struct gl_subroutine_function, + sh->NumSubroutineFunctions + 1); + sh->SubroutineFunctions[sh->NumSubroutineFunctions].name = ralloc_strdup(sh, fn->name); + sh->SubroutineFunctions[sh->NumSubroutineFunctions].num_compat_types = fn->num_subroutine_types; + sh->SubroutineFunctions[sh->NumSubroutineFunctions].types = + ralloc_array(sh, const struct glsl_type *, + fn->num_subroutine_types); + + /* From Section 4.4.4(Subroutine Function Layout Qualifiers) of the + * GLSL 4.5 spec: + * + * "Each subroutine with an index qualifier in the shader must be + * given a unique index, otherwise a compile or link error will be + * generated." + */ + for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { + if (sh->SubroutineFunctions[j].index != -1 && + sh->SubroutineFunctions[j].index == fn->subroutine_index) { + linker_error(prog, "each subroutine index qualifier in the " + "shader must be unique\n"); + return; + } + } + sh->SubroutineFunctions[sh->NumSubroutineFunctions].index = + fn->subroutine_index; + + for (int j = 0; j < fn->num_subroutine_types; j++) + sh->SubroutineFunctions[sh->NumSubroutineFunctions].types[j] = fn->subroutine_types[j]; + sh->NumSubroutineFunctions++; + } + + /* Assign index for subroutines without an explicit index*/ + int index = 0; + for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { + while (sh->SubroutineFunctions[j].index == -1) { + for (unsigned k = 0; k < sh->NumSubroutineFunctions; k++) { + if (sh->SubroutineFunctions[k].index == index) + break; + else if (k == sh->NumSubroutineFunctions - 1) + sh->SubroutineFunctions[j].index = index; + } + index++; + } + } + } +} + +static void +split_ubos_and_ssbos(void *mem_ctx, + struct gl_uniform_block *blocks, + unsigned num_blocks, + struct gl_uniform_block ***ubos, + unsigned *num_ubos, + unsigned **ubo_interface_block_indices, + struct gl_uniform_block ***ssbos, + unsigned *num_ssbos, + unsigned **ssbo_interface_block_indices) +{ + unsigned num_ubo_blocks = 0; + unsigned num_ssbo_blocks = 0; + + for (unsigned i = 0; i < num_blocks; i++) { + if (blocks[i].IsShaderStorage) + num_ssbo_blocks++; + else + num_ubo_blocks++; + } + + *ubos = ralloc_array(mem_ctx, gl_uniform_block *, num_ubo_blocks); + *num_ubos = 0; + + *ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks); + *num_ssbos = 0; + + if (ubo_interface_block_indices) + *ubo_interface_block_indices = + ralloc_array(mem_ctx, unsigned, num_ubo_blocks); + + if (ssbo_interface_block_indices) + *ssbo_interface_block_indices = + ralloc_array(mem_ctx, unsigned, num_ssbo_blocks); + + for (unsigned i = 0; i < num_blocks; i++) { + if (blocks[i].IsShaderStorage) { + (*ssbos)[*num_ssbos] = &blocks[i]; + if (ssbo_interface_block_indices) + (*ssbo_interface_block_indices)[*num_ssbos] = i; + (*num_ssbos)++; + } else { + (*ubos)[*num_ubos] = &blocks[i]; + if (ubo_interface_block_indices) + (*ubo_interface_block_indices)[*num_ubos] = i; + (*num_ubos)++; + } + } + + assert(*num_ubos + *num_ssbos == num_blocks); +} + +static void +set_always_active_io(exec_list *ir, ir_variable_mode io_mode) +{ + assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); + + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode) + continue; + + /* Don't set always active on builtins that haven't been redeclared */ + if (var->data.how_declared == ir_var_declared_implicitly) + continue; + + var->data.always_active_io = true; + } +} + +/** + * When separate shader programs are enabled, only input/outputs between + * the stages of a multi-stage separate program can be safely removed + * from the shader interface. Other inputs/outputs must remain active. + */ +static void +disable_varying_optimizations_for_sso(struct gl_shader_program *prog) +{ + unsigned first, last; + assert(prog->SeparateShader); + + first = MESA_SHADER_STAGES; + last = 0; + + /* Determine first and last stage. Excluding the compute stage */ + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + if (!prog->_LinkedShaders[i]) + continue; + if (first == MESA_SHADER_STAGES) + first = i; + last = i; + } + + if (first == MESA_SHADER_STAGES) + return; + + for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) { + gl_shader *sh = prog->_LinkedShaders[stage]; + if (!sh) + continue; + + if (first == last) { + /* For a single shader program only allow inputs to the vertex shader + * and outputs from the fragment shader to be removed. + */ + if (stage != MESA_SHADER_VERTEX) + set_always_active_io(sh->ir, ir_var_shader_in); + if (stage != MESA_SHADER_FRAGMENT) + set_always_active_io(sh->ir, ir_var_shader_out); + } else { + /* For multi-stage separate shader programs only allow inputs and + * outputs between the shader stages to be removed as well as inputs + * to the vertex shader and outputs from the fragment shader. + */ + if (stage == first && stage != MESA_SHADER_VERTEX) + set_always_active_io(sh->ir, ir_var_shader_in); + else if (stage == last && stage != MESA_SHADER_FRAGMENT) + set_always_active_io(sh->ir, ir_var_shader_out); + } + } +} + +void +link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) +{ + tfeedback_decl *tfeedback_decls = NULL; + unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying; + + void *mem_ctx = ralloc_context(NULL); // temporary linker context + + prog->LinkStatus = true; /* All error paths will set this to false */ + prog->Validated = false; + prog->_Used = false; + + prog->ARB_fragment_coord_conventions_enable = false; + + /* Separate the shaders into groups based on their type. + */ + struct gl_shader **shader_list[MESA_SHADER_STAGES]; + unsigned num_shaders[MESA_SHADER_STAGES]; + + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + shader_list[i] = (struct gl_shader **) + calloc(prog->NumShaders, sizeof(struct gl_shader *)); + num_shaders[i] = 0; + } + + unsigned min_version = UINT_MAX; + unsigned max_version = 0; + const bool is_es_prog = + (prog->NumShaders > 0 && prog->Shaders[0]->IsES) ? true : false; + for (unsigned i = 0; i < prog->NumShaders; i++) { + min_version = MIN2(min_version, prog->Shaders[i]->Version); + max_version = MAX2(max_version, prog->Shaders[i]->Version); + + if (prog->Shaders[i]->IsES != is_es_prog) { + linker_error(prog, "all shaders must use same shading " + "language version\n"); + goto done; + } + + if (prog->Shaders[i]->ARB_fragment_coord_conventions_enable) { + prog->ARB_fragment_coord_conventions_enable = true; + } + + gl_shader_stage shader_type = prog->Shaders[i]->Stage; + shader_list[shader_type][num_shaders[shader_type]] = prog->Shaders[i]; + num_shaders[shader_type]++; + } + + /* In desktop GLSL, different shader versions may be linked together. In + * GLSL ES, all shader versions must be the same. + */ + if (is_es_prog && min_version != max_version) { + linker_error(prog, "all shaders must use same shading " + "language version\n"); + goto done; + } + + prog->Version = max_version; + prog->IsES = is_es_prog; + + /* From OpenGL 4.5 Core specification (7.3 Program Objects): + * "Linking can fail for a variety of reasons as specified in the OpenGL + * Shading Language Specification, as well as any of the following + * reasons: + * + * * No shader objects are attached to program. + * + * ..." + * + * Same rule applies for OpenGL ES >= 3.1. + */ + + if (prog->NumShaders == 0 && + ((ctx->API == API_OPENGL_CORE && ctx->Version >= 45) || + (ctx->API == API_OPENGLES2 && ctx->Version >= 31))) { + linker_error(prog, "No shader objects are attached to program.\n"); + goto done; + } + + /* Some shaders have to be linked with some other shaders present. + */ + if (num_shaders[MESA_SHADER_GEOMETRY] > 0 && + num_shaders[MESA_SHADER_VERTEX] == 0 && + !prog->SeparateShader) { + linker_error(prog, "Geometry shader must be linked with " + "vertex shader\n"); + goto done; + } + if (num_shaders[MESA_SHADER_TESS_EVAL] > 0 && + num_shaders[MESA_SHADER_VERTEX] == 0 && + !prog->SeparateShader) { + linker_error(prog, "Tessellation evaluation shader must be linked with " + "vertex shader\n"); + goto done; + } + if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 && + num_shaders[MESA_SHADER_VERTEX] == 0 && + !prog->SeparateShader) { + linker_error(prog, "Tessellation control shader must be linked with " + "vertex shader\n"); + goto done; + } + + /* The spec is self-contradictory here. It allows linking without a tess + * eval shader, but that can only be used with transform feedback and + * rasterization disabled. However, transform feedback isn't allowed + * with GL_PATCHES, so it can't be used. + * + * More investigation showed that the idea of transform feedback after + * a tess control shader was dropped, because some hw vendors couldn't + * support tessellation without a tess eval shader, but the linker section + * wasn't updated to reflect that. + * + * All specifications (ARB_tessellation_shader, GL 4.0-4.5) have this + * spec bug. + * + * Do what's reasonable and always require a tess eval shader if a tess + * control shader is present. + */ + if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 && + num_shaders[MESA_SHADER_TESS_EVAL] == 0 && + !prog->SeparateShader) { + linker_error(prog, "Tessellation control shader must be linked with " + "tessellation evaluation shader\n"); + goto done; + } + + /* Compute shaders have additional restrictions. */ + if (num_shaders[MESA_SHADER_COMPUTE] > 0 && + num_shaders[MESA_SHADER_COMPUTE] != prog->NumShaders) { + linker_error(prog, "Compute shaders may not be linked with any other " + "type of shader\n"); + } + + for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] != NULL) + _mesa_delete_shader(ctx, prog->_LinkedShaders[i]); + + prog->_LinkedShaders[i] = NULL; + } + + /* Link all shaders for a particular stage and validate the result. + */ + for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) { + if (num_shaders[stage] > 0) { + gl_shader *const sh = + link_intrastage_shaders(mem_ctx, ctx, prog, shader_list[stage], + num_shaders[stage]); + + if (!prog->LinkStatus) { + if (sh) + _mesa_delete_shader(ctx, sh); + goto done; + } + + switch (stage) { + case MESA_SHADER_VERTEX: + validate_vertex_shader_executable(prog, sh); + break; + case MESA_SHADER_TESS_CTRL: + /* nothing to be done */ + break; + case MESA_SHADER_TESS_EVAL: + validate_tess_eval_shader_executable(prog, sh); + break; + case MESA_SHADER_GEOMETRY: + validate_geometry_shader_executable(prog, sh); + break; + case MESA_SHADER_FRAGMENT: + validate_fragment_shader_executable(prog, sh); + break; + } + if (!prog->LinkStatus) { + if (sh) + _mesa_delete_shader(ctx, sh); + goto done; + } + + _mesa_reference_shader(ctx, &prog->_LinkedShaders[stage], sh); + } + } + + if (num_shaders[MESA_SHADER_GEOMETRY] > 0) + prog->LastClipDistanceArraySize = prog->Geom.ClipDistanceArraySize; + else if (num_shaders[MESA_SHADER_TESS_EVAL] > 0) + prog->LastClipDistanceArraySize = prog->TessEval.ClipDistanceArraySize; + else if (num_shaders[MESA_SHADER_VERTEX] > 0) + prog->LastClipDistanceArraySize = prog->Vert.ClipDistanceArraySize; + else + prog->LastClipDistanceArraySize = 0; /* Not used */ + + /* Here begins the inter-stage linking phase. Some initial validation is + * performed, then locations are assigned for uniforms, attributes, and + * varyings. + */ + cross_validate_uniforms(prog); + if (!prog->LinkStatus) + goto done; + + unsigned first, last, prev; + + first = MESA_SHADER_STAGES; + last = 0; + + /* Determine first and last stage. */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (!prog->_LinkedShaders[i]) + continue; + if (first == MESA_SHADER_STAGES) + first = i; + last = i; + } + + check_explicit_uniform_locations(ctx, prog); + link_assign_subroutine_types(prog); + + if (!prog->LinkStatus) + goto done; + + resize_tes_inputs(ctx, prog); + + /* Validate the inputs of each stage with the output of the preceding + * stage. + */ + prev = first; + for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + validate_interstage_inout_blocks(prog, prog->_LinkedShaders[prev], + prog->_LinkedShaders[i]); + if (!prog->LinkStatus) + goto done; + + cross_validate_outputs_to_inputs(prog, + prog->_LinkedShaders[prev], + prog->_LinkedShaders[i]); + if (!prog->LinkStatus) + goto done; + + prev = i; + } + + /* Cross-validate uniform blocks between shader stages */ + validate_interstage_uniform_blocks(prog, prog->_LinkedShaders, + MESA_SHADER_STAGES); + if (!prog->LinkStatus) + goto done; + + for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] != NULL) + lower_named_interface_blocks(mem_ctx, prog->_LinkedShaders[i]); + } + + /* Implement the GLSL 1.30+ rule for discard vs infinite loops Do + * it before optimization because we want most of the checks to get + * dropped thanks to constant propagation. + * + * This rule also applies to GLSL ES 3.00. + */ + if (max_version >= (is_es_prog ? 300 : 130)) { + struct gl_shader *sh = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + if (sh) { + lower_discard_flow(sh->ir); + } + } + + if (prog->SeparateShader) + disable_varying_optimizations_for_sso(prog); + + if (!interstage_cross_validate_uniform_blocks(prog)) + goto done; + + /* Do common optimization before assigning storage for attributes, + * uniforms, and varyings. Later optimization could possibly make + * some of that unused. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + detect_recursion_linked(prog, prog->_LinkedShaders[i]->ir); + if (!prog->LinkStatus) + goto done; + + if (ctx->Const.ShaderCompilerOptions[i].LowerClipDistance) { + lower_clip_distance(prog->_LinkedShaders[i]); + } + + if (ctx->Const.LowerTessLevel) { + lower_tess_level(prog->_LinkedShaders[i]); + } + + while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, false, + &ctx->Const.ShaderCompilerOptions[i], + ctx->Const.NativeIntegers)) + ; + + lower_const_arrays_to_uniforms(prog->_LinkedShaders[i]->ir); + } + + /* Validation for special cases where we allow sampler array indexing + * with loop induction variable. This check emits a warning or error + * depending if backend can handle dynamic indexing. + */ + if ((!prog->IsES && prog->Version < 130) || + (prog->IsES && prog->Version < 300)) { + if (!validate_sampler_array_indexing(ctx, prog)) + goto done; + } + + /* Check and validate stream emissions in geometry shaders */ + validate_geometry_shader_emissions(ctx, prog); + + /* Mark all generic shader inputs and outputs as unpaired. */ + for (unsigned i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { + if (prog->_LinkedShaders[i] != NULL) { + link_invalidate_variable_locations(prog->_LinkedShaders[i]->ir); + } + } + + prev = first; + for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + match_explicit_outputs_to_inputs(prog, prog->_LinkedShaders[prev], + prog->_LinkedShaders[i]); + prev = i; + } + + if (!assign_attribute_or_color_locations(prog, &ctx->Const, + MESA_SHADER_VERTEX)) { + goto done; + } + + if (!assign_attribute_or_color_locations(prog, &ctx->Const, + MESA_SHADER_FRAGMENT)) { + goto done; + } + + if (num_tfeedback_decls != 0) { + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * the <count> specified by TransformFeedbackVaryingsEXT is + * non-zero, but the program object has no vertex or geometry + * shader; + */ + if (first == MESA_SHADER_FRAGMENT) { + linker_error(prog, "Transform feedback varyings specified, but " + "no vertex or geometry shader is present.\n"); + goto done; + } + + tfeedback_decls = ralloc_array(mem_ctx, tfeedback_decl, + prog->TransformFeedback.NumVarying); + if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls, + prog->TransformFeedback.VaryingNames, + tfeedback_decls)) + goto done; + } + + /* Linking the stages in the opposite order (from fragment to vertex) + * ensures that inter-shader outputs written to in an earlier stage are + * eliminated if they are (transitively) not used in a later stage. + */ + int next; + + if (first < MESA_SHADER_FRAGMENT) { + gl_shader *const sh = prog->_LinkedShaders[last]; + + if (first != MESA_SHADER_VERTEX) { + /* There was no vertex shader, but we still have to assign varying + * locations for use by tessellation/geometry shader inputs in SSO. + * + * If the shader is not separable (i.e., prog->SeparateShader is + * false), linking will have already failed when first is not + * MESA_SHADER_VERTEX. + */ + if (!assign_varying_locations(ctx, mem_ctx, prog, + NULL, prog->_LinkedShaders[first], + num_tfeedback_decls, tfeedback_decls)) + goto done; + } + + if (last != MESA_SHADER_FRAGMENT && + (num_tfeedback_decls != 0 || prog->SeparateShader)) { + /* There was no fragment shader, but we still have to assign varying + * locations for use by transform feedback. + */ + if (!assign_varying_locations(ctx, mem_ctx, prog, + sh, NULL, + num_tfeedback_decls, tfeedback_decls)) + goto done; + } + + do_dead_builtin_varyings(ctx, sh, NULL, + num_tfeedback_decls, tfeedback_decls); + + remove_unused_shader_inputs_and_outputs(prog->SeparateShader, sh, + ir_var_shader_out); + } + else if (first == MESA_SHADER_FRAGMENT) { + /* If the program only contains a fragment shader... + */ + gl_shader *const sh = prog->_LinkedShaders[first]; + + do_dead_builtin_varyings(ctx, NULL, sh, + num_tfeedback_decls, tfeedback_decls); + + if (prog->SeparateShader) { + if (!assign_varying_locations(ctx, mem_ctx, prog, + NULL /* producer */, + sh /* consumer */, + 0 /* num_tfeedback_decls */, + NULL /* tfeedback_decls */)) + goto done; + } else { + remove_unused_shader_inputs_and_outputs(false, sh, + ir_var_shader_in); + } + } + + next = last; + for (int i = next - 1; i >= 0; i--) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + gl_shader *const sh_i = prog->_LinkedShaders[i]; + gl_shader *const sh_next = prog->_LinkedShaders[next]; + + if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next, + next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, + tfeedback_decls)) + goto done; + + do_dead_builtin_varyings(ctx, sh_i, sh_next, + next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, + tfeedback_decls); + + /* This must be done after all dead varyings are eliminated. */ + if (!check_against_output_limit(ctx, prog, sh_i)) + goto done; + if (!check_against_input_limit(ctx, prog, sh_next)) + goto done; + + next = i; + } + + if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls)) + goto done; + + update_array_sizes(prog); + link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue); + link_assign_atomic_counter_resources(ctx, prog); + store_fragdepth_layout(prog); + + link_calculate_subroutine_compat(prog); + check_resources(ctx, prog); + check_subroutine_resources(prog); + check_image_resources(ctx, prog); + link_check_atomic_counter_resources(ctx, prog); + + if (!prog->LinkStatus) + goto done; + + /* OpenGL ES requires that a vertex shader and a fragment shader both be + * present in a linked program. GL_ARB_ES2_compatibility doesn't say + * anything about shader linking when one of the shaders (vertex or + * fragment shader) is absent. So, the extension shouldn't change the + * behavior specified in GLSL specification. + */ + if (!prog->SeparateShader && ctx->API == API_OPENGLES2) { + /* With ES < 3.1 one needs to have always vertex + fragment shader. */ + if (ctx->Version < 31) { + if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) { + linker_error(prog, "program lacks a vertex shader\n"); + } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) { + linker_error(prog, "program lacks a fragment shader\n"); + } + } else { + /* From OpenGL ES 3.1 specification (7.3 Program Objects): + * "Linking can fail for a variety of reasons as specified in the + * OpenGL ES Shading Language Specification, as well as any of the + * following reasons: + * + * ... + * + * * program contains objects to form either a vertex shader or + * fragment shader, and program is not separable, and does not + * contain objects to form both a vertex shader and fragment + * shader." + */ + if (!!prog->_LinkedShaders[MESA_SHADER_VERTEX] ^ + !!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) { + linker_error(prog, "Program needs to contain both vertex and " + "fragment shaders.\n"); + } + } + } + + /* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks + * for gl_shader_program and gl_shader, so that drivers that need separate + * index spaces for each set can have that. + */ + for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] != NULL) { + gl_shader *sh = prog->_LinkedShaders[i]; + split_ubos_and_ssbos(sh, + sh->BufferInterfaceBlocks, + sh->NumBufferInterfaceBlocks, + &sh->UniformBlocks, + &sh->NumUniformBlocks, + NULL, + &sh->ShaderStorageBlocks, + &sh->NumShaderStorageBlocks, + NULL); + } + } + + split_ubos_and_ssbos(prog, + prog->BufferInterfaceBlocks, + prog->NumBufferInterfaceBlocks, + &prog->UniformBlocks, + &prog->NumUniformBlocks, + &prog->UboInterfaceBlockIndex, + &prog->ShaderStorageBlocks, + &prog->NumShaderStorageBlocks, + &prog->SsboInterfaceBlockIndex); + + /* FINISHME: Assign fragment shader output locations. */ + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks) + lower_ubo_reference(prog->_LinkedShaders[i]); + + if (ctx->Const.ShaderCompilerOptions[i].LowerShaderSharedVariables) + lower_shared_reference(prog->_LinkedShaders[i], + &prog->Comp.SharedSize); + + lower_vector_derefs(prog->_LinkedShaders[i]); + } + +done: + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + free(shader_list[i]); + if (prog->_LinkedShaders[i] == NULL) + continue; + + /* Do a final validation step to make sure that the IR wasn't + * invalidated by any modifications performed after intrastage linking. + */ + validate_ir_tree(prog->_LinkedShaders[i]->ir); + + /* Retain any live IR, but trash the rest. */ + reparent_ir(prog->_LinkedShaders[i]->ir, prog->_LinkedShaders[i]->ir); + + /* The symbol table in the linked shaders may contain references to + * variables that were removed (e.g., unused uniforms). Since it may + * contain junk, there is no possible valid use. Delete it and set the + * pointer to NULL. + */ + delete prog->_LinkedShaders[i]->symbols; + prog->_LinkedShaders[i]->symbols = NULL; + } + + ralloc_free(mem_ctx); +} diff --git a/src/compiler/glsl/linker.h b/src/compiler/glsl/linker.h new file mode 100644 index 0000000..c80be1c --- /dev/null +++ b/src/compiler/glsl/linker.h @@ -0,0 +1,205 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_LINKER_H +#define GLSL_LINKER_H + +extern bool +link_function_calls(gl_shader_program *prog, gl_shader *main, + gl_shader **shader_list, unsigned num_shaders); + +extern void +link_invalidate_variable_locations(exec_list *ir); + +extern void +link_assign_uniform_locations(struct gl_shader_program *prog, + unsigned int boolean_true); + +extern void +link_set_uniform_initializers(struct gl_shader_program *prog, + unsigned int boolean_true); + +extern int +link_cross_validate_uniform_block(void *mem_ctx, + struct gl_uniform_block **linked_blocks, + unsigned int *num_linked_blocks, + struct gl_uniform_block *new_block); + +extern bool +link_uniform_blocks_are_compatible(const gl_uniform_block *a, + const gl_uniform_block *b); + +extern unsigned +link_uniform_blocks(void *mem_ctx, + struct gl_context *ctx, + struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders, + struct gl_uniform_block **blocks_ret); + +bool +validate_intrastage_arrays(struct gl_shader_program *prog, + ir_variable *const var, + ir_variable *const existing); + +void +validate_intrastage_interface_blocks(struct gl_shader_program *prog, + const gl_shader **shader_list, + unsigned num_shaders); + +void +validate_interstage_inout_blocks(struct gl_shader_program *prog, + const gl_shader *producer, + const gl_shader *consumer); + +void +validate_interstage_uniform_blocks(struct gl_shader_program *prog, + gl_shader **stages, int num_stages); + +extern void +link_assign_atomic_counter_resources(struct gl_context *ctx, + struct gl_shader_program *prog); + +extern void +link_check_atomic_counter_resources(struct gl_context *ctx, + struct gl_shader_program *prog); + +/** + * Class for processing all of the leaf fields of a variable that corresponds + * to a program resource. + * + * The leaf fields are all the parts of the variable that the application + * could query using \c glGetProgramResourceIndex (or that could be returned + * by \c glGetProgramResourceName). + * + * Classes my derive from this class to implement specific functionality. + * This class only provides the mechanism to iterate over the leaves. Derived + * classes must implement \c ::visit_field and may override \c ::process. + */ +class program_resource_visitor { +public: + /** + * Begin processing a variable + * + * Classes that overload this function should call \c ::process from the + * base class to start the recursive processing of the variable. + * + * \param var The variable that is to be processed + * + * Calls \c ::visit_field for each leaf of the variable. + * + * \warning + * When processing a uniform block, this entry should only be used in cases + * where the row / column ordering of matrices in the block does not + * matter. For example, enumerating the names of members of the block, but + * not for determining the offsets of members. + */ + void process(ir_variable *var); + + /** + * Begin processing a variable of a structured type. + * + * This flavor of \c process should be used to handle structured types + * (i.e., structures, interfaces, or arrays there of) that need special + * name handling. A common usage is to handle cases where the block name + * (instead of the instance name) is used for an interface block. + * + * \param type Type that is to be processed, associated with \c name + * \param name Base name of the structured variable being processed + * + * \note + * \c type must be \c GLSL_TYPE_RECORD, \c GLSL_TYPE_INTERFACE, or an array + * there of. + */ + void process(const glsl_type *type, const char *name); + +protected: + /** + * Method invoked for each leaf of the variable + * + * \param type Type of the field. + * \param name Fully qualified name of the field. + * \param row_major For a matrix type, is it stored row-major. + * \param record_type Type of the record containing the field. + * \param last_field Set if \c name is the last field of the structure + * containing it. This will always be false for items + * not contained in a structure or interface block. + * + * The default implementation just calls the other \c visit_field method. + */ + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major, const glsl_type *record_type, + const unsigned packing, + bool last_field); + + /** + * Method invoked for each leaf of the variable + * + * \param type Type of the field. + * \param name Fully qualified name of the field. + * \param row_major For a matrix type, is it stored row-major. + */ + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) = 0; + + /** + * Visit a record before visiting its fields + * + * For structures-of-structures or interfaces-of-structures, this visits + * the inner structure before visiting its fields. + * + * The default implementation does nothing. + */ + virtual void visit_field(const glsl_struct_field *field); + + virtual void enter_record(const glsl_type *type, const char *name, + bool row_major, const unsigned packing); + + virtual void leave_record(const glsl_type *type, const char *name, + bool row_major, const unsigned packing); + + virtual void set_record_array_count(unsigned record_array_count); + +private: + /** + * \param name_length Length of the current name \b not including the + * terminating \c NUL character. + * \param last_field Set if \c name is the last field of the structure + * containing it. This will always be false for items + * not contained in a structure or interface block. + */ + void recursion(const glsl_type *t, char **name, size_t name_length, + bool row_major, const glsl_type *record_type, + const unsigned packing, + bool last_field, unsigned record_array_count); +}; + +void +linker_error(gl_shader_program *prog, const char *fmt, ...); + +void +linker_warning(gl_shader_program *prog, const char *fmt, ...); + +#endif /* GLSL_LINKER_H */ diff --git a/src/compiler/glsl/list.h b/src/compiler/glsl/list.h new file mode 100644 index 0000000..a1c4d82 --- /dev/null +++ b/src/compiler/glsl/list.h @@ -0,0 +1,700 @@ +/* + * Copyright © 2008, 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file list.h + * \brief Doubly-linked list abstract container type. + * + * Each doubly-linked list has a sentinel head and tail node. These nodes + * contain no data. The head sentinel can be identified by its \c prev + * pointer being \c NULL. The tail sentinel can be identified by its + * \c next pointer being \c NULL. + * + * A list is empty if either the head sentinel's \c next pointer points to the + * tail sentinel or the tail sentinel's \c prev poiner points to the head + * sentinel. + * + * Instead of tracking two separate \c node structures and a \c list structure + * that points to them, the sentinel nodes are in a single structure. Noting + * that each sentinel node always has one \c NULL pointer, the \c NULL + * pointers occupy the same memory location. In the \c list structure + * contains a the following: + * + * - A \c head pointer that represents the \c next pointer of the + * head sentinel node. + * - A \c tail pointer that represents the \c prev pointer of the head + * sentinel node and the \c next pointer of the tail sentinel node. This + * pointer is \b always \c NULL. + * - A \c tail_prev pointer that represents the \c prev pointer of the + * tail sentinel node. + * + * Therefore, if \c head->next is \c NULL or \c tail_prev->prev is \c NULL, + * the list is empty. + * + * Do note that this means that the list nodes will contain pointers into the + * list structure itself and as a result you may not \c realloc() an \c + * exec_list or any structure in which an \c exec_list is embedded. + * + * To anyone familiar with "exec lists" on the Amiga, this structure should + * be immediately recognizable. See the following link for the original Amiga + * operating system documentation on the subject. + * + * http://www.natami.net/dev/Libraries_Manual_guide/node02D7.html + * + * \author Ian Romanick <ian.d.romanick@intel.com> + */ + +#pragma once +#ifndef LIST_CONTAINER_H +#define LIST_CONTAINER_H + +#ifndef __cplusplus +#include <stddef.h> +#endif +#include <assert.h> + +#include "util/ralloc.h" + +struct exec_node { + struct exec_node *next; + struct exec_node *prev; + +#ifdef __cplusplus + DECLARE_RALLOC_CXX_OPERATORS(exec_node) + + exec_node() : next(NULL), prev(NULL) + { + /* empty */ + } + + const exec_node *get_next() const; + exec_node *get_next(); + + const exec_node *get_prev() const; + exec_node *get_prev(); + + void remove(); + + /** + * Link a node with itself + * + * This creates a sort of degenerate list that is occasionally useful. + */ + void self_link(); + + /** + * Insert a node in the list after the current node + */ + void insert_after(exec_node *after); + /** + * Insert a node in the list before the current node + */ + void insert_before(exec_node *before); + + /** + * Insert another list in the list before the current node + */ + void insert_before(struct exec_list *before); + + /** + * Replace the current node with the given node. + */ + void replace_with(exec_node *replacement); + + /** + * Is this the sentinel at the tail of the list? + */ + bool is_tail_sentinel() const; + + /** + * Is this the sentinel at the head of the list? + */ + bool is_head_sentinel() const; +#endif +}; + +static inline void +exec_node_init(struct exec_node *n) +{ + n->next = NULL; + n->prev = NULL; +} + +static inline const struct exec_node * +exec_node_get_next_const(const struct exec_node *n) +{ + return n->next; +} + +static inline struct exec_node * +exec_node_get_next(struct exec_node *n) +{ + return n->next; +} + +static inline const struct exec_node * +exec_node_get_prev_const(const struct exec_node *n) +{ + return n->prev; +} + +static inline struct exec_node * +exec_node_get_prev(struct exec_node *n) +{ + return n->prev; +} + +static inline void +exec_node_remove(struct exec_node *n) +{ + n->next->prev = n->prev; + n->prev->next = n->next; + n->next = NULL; + n->prev = NULL; +} + +static inline void +exec_node_self_link(struct exec_node *n) +{ + n->next = n; + n->prev = n; +} + +static inline void +exec_node_insert_after(struct exec_node *n, struct exec_node *after) +{ + after->next = n->next; + after->prev = n; + + n->next->prev = after; + n->next = after; +} + +static inline void +exec_node_insert_node_before(struct exec_node *n, struct exec_node *before) +{ + before->next = n; + before->prev = n->prev; + + n->prev->next = before; + n->prev = before; +} + +static inline void +exec_node_replace_with(struct exec_node *n, struct exec_node *replacement) +{ + replacement->prev = n->prev; + replacement->next = n->next; + + n->prev->next = replacement; + n->next->prev = replacement; +} + +static inline bool +exec_node_is_tail_sentinel(const struct exec_node *n) +{ + return n->next == NULL; +} + +static inline bool +exec_node_is_head_sentinel(const struct exec_node *n) +{ + return n->prev == NULL; +} + +#ifdef __cplusplus +inline const exec_node *exec_node::get_next() const +{ + return exec_node_get_next_const(this); +} + +inline exec_node *exec_node::get_next() +{ + return exec_node_get_next(this); +} + +inline const exec_node *exec_node::get_prev() const +{ + return exec_node_get_prev_const(this); +} + +inline exec_node *exec_node::get_prev() +{ + return exec_node_get_prev(this); +} + +inline void exec_node::remove() +{ + exec_node_remove(this); +} + +inline void exec_node::self_link() +{ + exec_node_self_link(this); +} + +inline void exec_node::insert_after(exec_node *after) +{ + exec_node_insert_after(this, after); +} + +inline void exec_node::insert_before(exec_node *before) +{ + exec_node_insert_node_before(this, before); +} + +inline void exec_node::replace_with(exec_node *replacement) +{ + exec_node_replace_with(this, replacement); +} + +inline bool exec_node::is_tail_sentinel() const +{ + return exec_node_is_tail_sentinel(this); +} + +inline bool exec_node::is_head_sentinel() const +{ + return exec_node_is_head_sentinel(this); +} +#endif + +#ifdef __cplusplus +/* This macro will not work correctly if `t' uses virtual inheritance. If you + * are using virtual inheritance, you deserve a slow and painful death. Enjoy! + */ +#define exec_list_offsetof(t, f, p) \ + (((char *) &((t *) p)->f) - ((char *) p)) +#else +#define exec_list_offsetof(t, f, p) offsetof(t, f) +#endif + +/** + * Get a pointer to the structure containing an exec_node + * + * Given a pointer to an \c exec_node embedded in a structure, get a pointer to + * the containing structure. + * + * \param type Base type of the structure containing the node + * \param node Pointer to the \c exec_node + * \param field Name of the field in \c type that is the embedded \c exec_node + */ +#define exec_node_data(type, node, field) \ + ((type *) (((char *) node) - exec_list_offsetof(type, field, node))) + +#ifdef __cplusplus +struct exec_node; +#endif + +struct exec_list { + struct exec_node *head; + struct exec_node *tail; + struct exec_node *tail_pred; + +#ifdef __cplusplus + DECLARE_RALLOC_CXX_OPERATORS(exec_list) + + exec_list() + { + make_empty(); + } + + void make_empty(); + + bool is_empty() const; + + const exec_node *get_head() const; + exec_node *get_head(); + + const exec_node *get_tail() const; + exec_node *get_tail(); + + unsigned length() const; + + void push_head(exec_node *n); + void push_tail(exec_node *n); + void push_degenerate_list_at_head(exec_node *n); + + /** + * Remove the first node from a list and return it + * + * \return + * The first node in the list or \c NULL if the list is empty. + * + * \sa exec_list::get_head + */ + exec_node *pop_head(); + + /** + * Move all of the nodes from this list to the target list + */ + void move_nodes_to(exec_list *target); + + /** + * Append all nodes from the source list to the end of the target list + */ + void append_list(exec_list *source); + + /** + * Prepend all nodes from the source list to the beginning of the target + * list + */ + void prepend_list(exec_list *source); +#endif +}; + +static inline void +exec_list_make_empty(struct exec_list *list) +{ + list->head = (struct exec_node *) & list->tail; + list->tail = NULL; + list->tail_pred = (struct exec_node *) & list->head; +} + +static inline bool +exec_list_is_empty(const struct exec_list *list) +{ + /* There are three ways to test whether a list is empty or not. + * + * - Check to see if the \c head points to the \c tail. + * - Check to see if the \c tail_pred points to the \c head. + * - Check to see if the \c head is the sentinel node by test whether its + * \c next pointer is \c NULL. + * + * The first two methods tend to generate better code on modern systems + * because they save a pointer dereference. + */ + return list->head == (struct exec_node *) &list->tail; +} + +static inline const struct exec_node * +exec_list_get_head_const(const struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->head : NULL; +} + +static inline struct exec_node * +exec_list_get_head(struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->head : NULL; +} + +static inline const struct exec_node * +exec_list_get_tail_const(const struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->tail_pred : NULL; +} + +static inline struct exec_node * +exec_list_get_tail(struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->tail_pred : NULL; +} + +static inline unsigned +exec_list_length(const struct exec_list *list) +{ + unsigned size = 0; + struct exec_node *node; + + for (node = list->head; node->next != NULL; node = node->next) { + size++; + } + + return size; +} + +static inline void +exec_list_push_head(struct exec_list *list, struct exec_node *n) +{ + n->next = list->head; + n->prev = (struct exec_node *) &list->head; + + n->next->prev = n; + list->head = n; +} + +static inline void +exec_list_push_tail(struct exec_list *list, struct exec_node *n) +{ + n->next = (struct exec_node *) &list->tail; + n->prev = list->tail_pred; + + n->prev->next = n; + list->tail_pred = n; +} + +static inline void +exec_list_push_degenerate_list_at_head(struct exec_list *list, struct exec_node *n) +{ + assert(n->prev->next == n); + + n->prev->next = list->head; + list->head->prev = n->prev; + n->prev = (struct exec_node *) &list->head; + list->head = n; +} + +static inline struct exec_node * +exec_list_pop_head(struct exec_list *list) +{ + struct exec_node *const n = exec_list_get_head(list); + if (n != NULL) + exec_node_remove(n); + + return n; +} + +static inline void +exec_list_move_nodes_to(struct exec_list *list, struct exec_list *target) +{ + if (exec_list_is_empty(list)) { + exec_list_make_empty(target); + } else { + target->head = list->head; + target->tail = NULL; + target->tail_pred = list->tail_pred; + + target->head->prev = (struct exec_node *) &target->head; + target->tail_pred->next = (struct exec_node *) &target->tail; + + exec_list_make_empty(list); + } +} + +static inline void +exec_list_append(struct exec_list *list, struct exec_list *source) +{ + if (exec_list_is_empty(source)) + return; + + /* Link the first node of the source with the last node of the target list. + */ + list->tail_pred->next = source->head; + source->head->prev = list->tail_pred; + + /* Make the tail of the source list be the tail of the target list. + */ + list->tail_pred = source->tail_pred; + list->tail_pred->next = (struct exec_node *) &list->tail; + + /* Make the source list empty for good measure. + */ + exec_list_make_empty(source); +} + +static inline void +exec_list_prepend(struct exec_list *list, struct exec_list *source) +{ + exec_list_append(source, list); + exec_list_move_nodes_to(source, list); +} + +static inline void +exec_node_insert_list_before(struct exec_node *n, struct exec_list *before) +{ + if (exec_list_is_empty(before)) + return; + + before->tail_pred->next = n; + before->head->prev = n->prev; + + n->prev->next = before->head; + n->prev = before->tail_pred; + + exec_list_make_empty(before); +} + +static inline void +exec_list_validate(const struct exec_list *list) +{ + const struct exec_node *node; + + assert(list->head->prev == (const struct exec_node *) &list->head); + assert(list->tail == NULL); + assert(list->tail_pred->next == (const struct exec_node *) &list->tail); + + /* We could try to use one of the interators below for this but they all + * either require C++ or assume the exec_node is embedded in a structure + * which is not the case for this function. + */ + for (node = list->head; node->next != NULL; node = node->next) { + assert(node->next->prev == node); + assert(node->prev->next == node); + } +} + +#ifdef __cplusplus +inline void exec_list::make_empty() +{ + exec_list_make_empty(this); +} + +inline bool exec_list::is_empty() const +{ + return exec_list_is_empty(this); +} + +inline const exec_node *exec_list::get_head() const +{ + return exec_list_get_head_const(this); +} + +inline exec_node *exec_list::get_head() +{ + return exec_list_get_head(this); +} + +inline const exec_node *exec_list::get_tail() const +{ + return exec_list_get_tail_const(this); +} + +inline exec_node *exec_list::get_tail() +{ + return exec_list_get_tail(this); +} + +inline unsigned exec_list::length() const +{ + return exec_list_length(this); +} + +inline void exec_list::push_head(exec_node *n) +{ + exec_list_push_head(this, n); +} + +inline void exec_list::push_tail(exec_node *n) +{ + exec_list_push_tail(this, n); +} + +inline void exec_list::push_degenerate_list_at_head(exec_node *n) +{ + exec_list_push_degenerate_list_at_head(this, n); +} + +inline exec_node *exec_list::pop_head() +{ + return exec_list_pop_head(this); +} + +inline void exec_list::move_nodes_to(exec_list *target) +{ + exec_list_move_nodes_to(this, target); +} + +inline void exec_list::append_list(exec_list *source) +{ + exec_list_append(this, source); +} + +inline void exec_list::prepend_list(exec_list *source) +{ + exec_list_prepend(this, source); +} + +inline void exec_node::insert_before(exec_list *before) +{ + exec_node_insert_list_before(this, before); +} +#endif + +#define foreach_in_list(__type, __inst, __list) \ + for (__type *(__inst) = (__type *)(__list)->head; \ + !(__inst)->is_tail_sentinel(); \ + (__inst) = (__type *)(__inst)->next) + +#define foreach_in_list_reverse(__type, __inst, __list) \ + for (__type *(__inst) = (__type *)(__list)->tail_pred; \ + !(__inst)->is_head_sentinel(); \ + (__inst) = (__type *)(__inst)->prev) + +/** + * This version is safe even if the current node is removed. + */ +#define foreach_in_list_safe(__type, __node, __list) \ + for (__type *__node = (__type *)(__list)->head, \ + *__next = (__type *)__node->next; \ + __next != NULL; \ + __node = __next, __next = (__type *)__next->next) + +#define foreach_in_list_reverse_safe(__type, __node, __list) \ + for (__type *__node = (__type *)(__list)->tail_pred, \ + *__prev = (__type *)__node->prev; \ + __prev != NULL; \ + __node = __prev, __prev = (__type *)__prev->prev) + +#define foreach_in_list_use_after(__type, __inst, __list) \ + __type *(__inst); \ + for ((__inst) = (__type *)(__list)->head; \ + !(__inst)->is_tail_sentinel(); \ + (__inst) = (__type *)(__inst)->next) +/** + * Iterate through two lists at once. Stops at the end of the shorter list. + * + * This is safe against either current node being removed or replaced. + */ +#define foreach_two_lists(__node1, __list1, __node2, __list2) \ + for (struct exec_node * __node1 = (__list1)->head, \ + * __node2 = (__list2)->head, \ + * __next1 = __node1->next, \ + * __next2 = __node2->next \ + ; __next1 != NULL && __next2 != NULL \ + ; __node1 = __next1, \ + __node2 = __next2, \ + __next1 = __next1->next, \ + __next2 = __next2->next) + +#define foreach_list_typed(__type, __node, __field, __list) \ + for (__type * __node = \ + exec_node_data(__type, (__list)->head, __field); \ + (__node)->__field.next != NULL; \ + (__node) = exec_node_data(__type, (__node)->__field.next, __field)) + +#define foreach_list_typed_reverse(__type, __node, __field, __list) \ + for (__type * __node = \ + exec_node_data(__type, (__list)->tail_pred, __field); \ + (__node)->__field.prev != NULL; \ + (__node) = exec_node_data(__type, (__node)->__field.prev, __field)) + +#define foreach_list_typed_safe(__type, __node, __field, __list) \ + for (__type * __node = \ + exec_node_data(__type, (__list)->head, __field), \ + * __next = \ + exec_node_data(__type, (__node)->__field.next, __field); \ + (__node)->__field.next != NULL; \ + __node = __next, __next = \ + exec_node_data(__type, (__next)->__field.next, __field)) + +#define foreach_list_typed_reverse_safe(__type, __node, __field, __list) \ + for (__type * __node = \ + exec_node_data(__type, (__list)->tail_pred, __field), \ + * __prev = \ + exec_node_data(__type, (__node)->__field.prev, __field); \ + (__node)->__field.prev != NULL; \ + __node = __prev, __prev = \ + exec_node_data(__type, (__prev)->__field.prev, __field)) + +#endif /* LIST_CONTAINER_H */ diff --git a/src/compiler/glsl/loop_analysis.cpp b/src/compiler/glsl/loop_analysis.cpp new file mode 100644 index 0000000..096a80a --- /dev/null +++ b/src/compiler/glsl/loop_analysis.cpp @@ -0,0 +1,640 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "compiler/glsl_types.h" +#include "loop_analysis.h" +#include "ir_hierarchical_visitor.h" + +static bool is_loop_terminator(ir_if *ir); + +static bool all_expression_operands_are_loop_constant(ir_rvalue *, + hash_table *); + +static ir_rvalue *get_basic_induction_increment(ir_assignment *, hash_table *); + + +/** + * Record the fact that the given loop variable was referenced inside the loop. + * + * \arg in_assignee is true if the reference was on the LHS of an assignment. + * + * \arg in_conditional_code_or_nested_loop is true if the reference occurred + * inside an if statement or a nested loop. + * + * \arg current_assignment is the ir_assignment node that the loop variable is + * on the LHS of, if any (ignored if \c in_assignee is false). + */ +void +loop_variable::record_reference(bool in_assignee, + bool in_conditional_code_or_nested_loop, + ir_assignment *current_assignment) +{ + if (in_assignee) { + assert(current_assignment != NULL); + + if (in_conditional_code_or_nested_loop || + current_assignment->condition != NULL) { + this->conditional_or_nested_assignment = true; + } + + if (this->first_assignment == NULL) { + assert(this->num_assignments == 0); + + this->first_assignment = current_assignment; + } + + this->num_assignments++; + } else if (this->first_assignment == current_assignment) { + /* This catches the case where the variable is used in the RHS of an + * assignment where it is also in the LHS. + */ + this->read_before_write = true; + } +} + + +loop_state::loop_state() +{ + this->ht = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + this->mem_ctx = ralloc_context(NULL); + this->loop_found = false; +} + + +loop_state::~loop_state() +{ + hash_table_dtor(this->ht); + ralloc_free(this->mem_ctx); +} + + +loop_variable_state * +loop_state::insert(ir_loop *ir) +{ + loop_variable_state *ls = new(this->mem_ctx) loop_variable_state; + + hash_table_insert(this->ht, ls, ir); + this->loop_found = true; + + return ls; +} + + +loop_variable_state * +loop_state::get(const ir_loop *ir) +{ + return (loop_variable_state *) hash_table_find(this->ht, ir); +} + + +loop_variable * +loop_variable_state::get(const ir_variable *ir) +{ + return (loop_variable *) hash_table_find(this->var_hash, ir); +} + + +loop_variable * +loop_variable_state::insert(ir_variable *var) +{ + void *mem_ctx = ralloc_parent(this); + loop_variable *lv = rzalloc(mem_ctx, loop_variable); + + lv->var = var; + + hash_table_insert(this->var_hash, lv, lv->var); + this->variables.push_tail(lv); + + return lv; +} + + +loop_terminator * +loop_variable_state::insert(ir_if *if_stmt) +{ + void *mem_ctx = ralloc_parent(this); + loop_terminator *t = new(mem_ctx) loop_terminator(); + + t->ir = if_stmt; + this->terminators.push_tail(t); + + return t; +} + + +/** + * If the given variable already is recorded in the state for this loop, + * return the corresponding loop_variable object that records information + * about it. + * + * Otherwise, create a new loop_variable object to record information about + * the variable, and set its \c read_before_write field appropriately based on + * \c in_assignee. + * + * \arg in_assignee is true if this variable was encountered on the LHS of an + * assignment. + */ +loop_variable * +loop_variable_state::get_or_insert(ir_variable *var, bool in_assignee) +{ + loop_variable *lv = this->get(var); + + if (lv == NULL) { + lv = this->insert(var); + lv->read_before_write = !in_assignee; + } + + return lv; +} + + +namespace { + +class loop_analysis : public ir_hierarchical_visitor { +public: + loop_analysis(loop_state *loops); + + virtual ir_visitor_status visit(ir_loop_jump *); + virtual ir_visitor_status visit(ir_dereference_variable *); + + virtual ir_visitor_status visit_enter(ir_call *); + + virtual ir_visitor_status visit_enter(ir_loop *); + virtual ir_visitor_status visit_leave(ir_loop *); + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_leave(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_if *); + virtual ir_visitor_status visit_leave(ir_if *); + + loop_state *loops; + + int if_statement_depth; + + ir_assignment *current_assignment; + + exec_list state; +}; + +} /* anonymous namespace */ + +loop_analysis::loop_analysis(loop_state *loops) + : loops(loops), if_statement_depth(0), current_assignment(NULL) +{ + /* empty */ +} + + +ir_visitor_status +loop_analysis::visit(ir_loop_jump *ir) +{ + (void) ir; + + assert(!this->state.is_empty()); + + loop_variable_state *const ls = + (loop_variable_state *) this->state.get_head(); + + ls->num_loop_jumps++; + + return visit_continue; +} + + +ir_visitor_status +loop_analysis::visit_enter(ir_call *) +{ + /* Mark every loop that we're currently analyzing as containing an ir_call + * (even those at outer nesting levels). + */ + foreach_in_list(loop_variable_state, ls, &this->state) { + ls->contains_calls = true; + } + + return visit_continue_with_parent; +} + + +ir_visitor_status +loop_analysis::visit(ir_dereference_variable *ir) +{ + /* If we're not somewhere inside a loop, there's nothing to do. + */ + if (this->state.is_empty()) + return visit_continue; + + bool nested = false; + + foreach_in_list(loop_variable_state, ls, &this->state) { + ir_variable *var = ir->variable_referenced(); + loop_variable *lv = ls->get_or_insert(var, this->in_assignee); + + lv->record_reference(this->in_assignee, + nested || this->if_statement_depth > 0, + this->current_assignment); + nested = true; + } + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_enter(ir_loop *ir) +{ + loop_variable_state *ls = this->loops->insert(ir); + this->state.push_head(ls); + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_leave(ir_loop *ir) +{ + loop_variable_state *const ls = + (loop_variable_state *) this->state.pop_head(); + + /* Function calls may contain side effects. These could alter any of our + * variables in ways that cannot be known, and may even terminate shader + * execution (say, calling discard in the fragment shader). So we can't + * rely on any of our analysis about assignments to variables. + * + * We could perform some conservative analysis (prove there's no statically + * possible assignment, etc.) but it isn't worth it for now; function + * inlining will allow us to unroll loops anyway. + */ + if (ls->contains_calls) + return visit_continue; + + foreach_in_list(ir_instruction, node, &ir->body_instructions) { + /* Skip over declarations at the start of a loop. + */ + if (node->as_variable()) + continue; + + ir_if *if_stmt = ((ir_instruction *) node)->as_if(); + + if ((if_stmt != NULL) && is_loop_terminator(if_stmt)) + ls->insert(if_stmt); + else + break; + } + + + foreach_in_list_safe(loop_variable, lv, &ls->variables) { + /* Move variables that are already marked as being loop constant to + * a separate list. These trivially don't need to be tested. + */ + if (lv->is_loop_constant()) { + lv->remove(); + ls->constants.push_tail(lv); + } + } + + /* Each variable assigned in the loop that isn't already marked as being loop + * constant might still be loop constant. The requirements at this point + * are: + * + * - Variable is written before it is read. + * + * - Only one assignment to the variable. + * + * - All operands on the RHS of the assignment are also loop constants. + * + * The last requirement is the reason for the progress loop. A variable + * marked as a loop constant on one pass may allow other variables to be + * marked as loop constant on following passes. + */ + bool progress; + do { + progress = false; + + foreach_in_list_safe(loop_variable, lv, &ls->variables) { + if (lv->conditional_or_nested_assignment || (lv->num_assignments > 1)) + continue; + + /* Process the RHS of the assignment. If all of the variables + * accessed there are loop constants, then add this + */ + ir_rvalue *const rhs = lv->first_assignment->rhs; + if (all_expression_operands_are_loop_constant(rhs, ls->var_hash)) { + lv->rhs_clean = true; + + if (lv->is_loop_constant()) { + progress = true; + + lv->remove(); + ls->constants.push_tail(lv); + } + } + } + } while (progress); + + /* The remaining variables that are not loop invariant might be loop + * induction variables. + */ + foreach_in_list_safe(loop_variable, lv, &ls->variables) { + /* If there is more than one assignment to a variable, it cannot be a + * loop induction variable. This isn't strictly true, but this is a + * very simple induction variable detector, and it can't handle more + * complex cases. + */ + if (lv->num_assignments > 1) + continue; + + /* All of the variables with zero assignments in the loop are loop + * invariant, and they should have already been filtered out. + */ + assert(lv->num_assignments == 1); + assert(lv->first_assignment != NULL); + + /* The assignment to the variable in the loop must be unconditional and + * not inside a nested loop. + */ + if (lv->conditional_or_nested_assignment) + continue; + + /* Basic loop induction variables have a single assignment in the loop + * that has the form 'VAR = VAR + i' or 'VAR = VAR - i' where i is a + * loop invariant. + */ + ir_rvalue *const inc = + get_basic_induction_increment(lv->first_assignment, ls->var_hash); + if (inc != NULL) { + lv->increment = inc; + + lv->remove(); + ls->induction_variables.push_tail(lv); + } + } + + /* Search the loop terminating conditions for those of the form 'i < c' + * where i is a loop induction variable, c is a constant, and < is any + * relative operator. From each of these we can infer an iteration count. + * Also figure out which terminator (if any) produces the smallest + * iteration count--this is the limiting terminator. + */ + foreach_in_list(loop_terminator, t, &ls->terminators) { + ir_if *if_stmt = t->ir; + + /* If-statements can be either 'if (expr)' or 'if (deref)'. We only care + * about the former here. + */ + ir_expression *cond = if_stmt->condition->as_expression(); + if (cond == NULL) + continue; + + switch (cond->operation) { + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: { + /* The expressions that we care about will either be of the form + * 'counter < limit' or 'limit < counter'. Figure out which is + * which. + */ + ir_rvalue *counter = cond->operands[0]->as_dereference_variable(); + ir_constant *limit = cond->operands[1]->as_constant(); + enum ir_expression_operation cmp = cond->operation; + + if (limit == NULL) { + counter = cond->operands[1]->as_dereference_variable(); + limit = cond->operands[0]->as_constant(); + + switch (cmp) { + case ir_binop_less: cmp = ir_binop_greater; break; + case ir_binop_greater: cmp = ir_binop_less; break; + case ir_binop_lequal: cmp = ir_binop_gequal; break; + case ir_binop_gequal: cmp = ir_binop_lequal; break; + default: assert(!"Should not get here."); + } + } + + if ((counter == NULL) || (limit == NULL)) + break; + + ir_variable *var = counter->variable_referenced(); + + ir_rvalue *init = find_initial_value(ir, var); + + loop_variable *lv = ls->get(var); + if (lv != NULL && lv->is_induction_var()) { + t->iterations = calculate_iterations(init, limit, lv->increment, + cmp); + + if (t->iterations >= 0 && + (ls->limiting_terminator == NULL || + t->iterations < ls->limiting_terminator->iterations)) { + ls->limiting_terminator = t; + } + } + break; + } + + default: + break; + } + } + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_enter(ir_if *ir) +{ + (void) ir; + + if (!this->state.is_empty()) + this->if_statement_depth++; + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_leave(ir_if *ir) +{ + (void) ir; + + if (!this->state.is_empty()) + this->if_statement_depth--; + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_enter(ir_assignment *ir) +{ + /* If we're not somewhere inside a loop, there's nothing to do. + */ + if (this->state.is_empty()) + return visit_continue_with_parent; + + this->current_assignment = ir; + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_leave(ir_assignment *ir) +{ + /* Since the visit_enter exits with visit_continue_with_parent for this + * case, the loop state stack should never be empty here. + */ + assert(!this->state.is_empty()); + + assert(this->current_assignment == ir); + this->current_assignment = NULL; + + return visit_continue; +} + + +class examine_rhs : public ir_hierarchical_visitor { +public: + examine_rhs(hash_table *loop_variables) + { + this->only_uses_loop_constants = true; + this->loop_variables = loop_variables; + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + loop_variable *lv = + (loop_variable *) hash_table_find(this->loop_variables, ir->var); + + assert(lv != NULL); + + if (lv->is_loop_constant()) { + return visit_continue; + } else { + this->only_uses_loop_constants = false; + return visit_stop; + } + } + + hash_table *loop_variables; + bool only_uses_loop_constants; +}; + + +bool +all_expression_operands_are_loop_constant(ir_rvalue *ir, hash_table *variables) +{ + examine_rhs v(variables); + + ir->accept(&v); + + return v.only_uses_loop_constants; +} + + +ir_rvalue * +get_basic_induction_increment(ir_assignment *ir, hash_table *var_hash) +{ + /* The RHS must be a binary expression. + */ + ir_expression *const rhs = ir->rhs->as_expression(); + if ((rhs == NULL) + || ((rhs->operation != ir_binop_add) + && (rhs->operation != ir_binop_sub))) + return NULL; + + /* One of the of operands of the expression must be the variable assigned. + * If the operation is subtraction, the variable in question must be the + * "left" operand. + */ + ir_variable *const var = ir->lhs->variable_referenced(); + + ir_variable *const op0 = rhs->operands[0]->variable_referenced(); + ir_variable *const op1 = rhs->operands[1]->variable_referenced(); + + if (((op0 != var) && (op1 != var)) + || ((op1 == var) && (rhs->operation == ir_binop_sub))) + return NULL; + + ir_rvalue *inc = (op0 == var) ? rhs->operands[1] : rhs->operands[0]; + + if (inc->as_constant() == NULL) { + ir_variable *const inc_var = inc->variable_referenced(); + if (inc_var != NULL) { + loop_variable *lv = + (loop_variable *) hash_table_find(var_hash, inc_var); + + if (lv == NULL || !lv->is_loop_constant()) { + assert(lv != NULL); + inc = NULL; + } + } else + inc = NULL; + } + + if ((inc != NULL) && (rhs->operation == ir_binop_sub)) { + void *mem_ctx = ralloc_parent(ir); + + inc = new(mem_ctx) ir_expression(ir_unop_neg, + inc->type, + inc->clone(mem_ctx, NULL), + NULL); + } + + return inc; +} + + +/** + * Detect whether an if-statement is a loop terminating condition + * + * Detects if-statements of the form + * + * (if (expression bool ...) (break)) + */ +bool +is_loop_terminator(ir_if *ir) +{ + if (!ir->else_instructions.is_empty()) + return false; + + ir_instruction *const inst = + (ir_instruction *) ir->then_instructions.get_head(); + if (inst == NULL) + return false; + + if (inst->ir_type != ir_type_loop_jump) + return false; + + ir_loop_jump *const jump = (ir_loop_jump *) inst; + if (jump->mode != ir_loop_jump::jump_break) + return false; + + return true; +} + + +loop_state * +analyze_loop_variables(exec_list *instructions) +{ + loop_state *loops = new loop_state; + loop_analysis v(loops); + + v.run(instructions); + return v.loops; +} diff --git a/src/compiler/glsl/loop_analysis.h b/src/compiler/glsl/loop_analysis.h new file mode 100644 index 0000000..3b1971d --- /dev/null +++ b/src/compiler/glsl/loop_analysis.h @@ -0,0 +1,259 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef LOOP_ANALYSIS_H +#define LOOP_ANALYSIS_H + +#include "ir.h" +#include "program/hash_table.h" + +/** + * Analyze and classify all variables used in all loops in the instruction list + */ +extern class loop_state * +analyze_loop_variables(exec_list *instructions); + + +/** + * Fill in loop control fields + * + * Based on analysis of loop variables, this function tries to remove + * redundant sequences in the loop of the form + * + * (if (expression bool ...) (break)) + * + * For example, if it is provable that one loop exit condition will + * always be satisfied before another, the unnecessary exit condition will be + * removed. + */ +extern bool +set_loop_controls(exec_list *instructions, loop_state *ls); + + +extern bool +unroll_loops(exec_list *instructions, loop_state *ls, + const struct gl_shader_compiler_options *options); + +ir_rvalue * +find_initial_value(ir_loop *loop, ir_variable *var); + +int +calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, + enum ir_expression_operation op); + + +/** + * Tracking for all variables used in a loop + */ +class loop_variable_state : public exec_node { +public: + class loop_variable *get(const ir_variable *); + class loop_variable *insert(ir_variable *); + class loop_variable *get_or_insert(ir_variable *, bool in_assignee); + class loop_terminator *insert(ir_if *); + + + /** + * Variables that have not yet been classified + */ + exec_list variables; + + /** + * Variables whose values are constant within the body of the loop + * + * This list contains \c loop_variable objects. + */ + exec_list constants; + + /** + * Induction variables for this loop + * + * This list contains \c loop_variable objects. + */ + exec_list induction_variables; + + /** + * Simple if-statements that lead to the termination of the loop + * + * This list contains \c loop_terminator objects. + * + * \sa is_loop_terminator + */ + exec_list terminators; + + /** + * If any of the terminators in \c terminators leads to termination of the + * loop after a constant number of iterations, this is the terminator that + * leads to termination after the smallest number of iterations. Otherwise + * NULL. + */ + loop_terminator *limiting_terminator; + + /** + * Hash table containing all variables accessed in this loop + */ + hash_table *var_hash; + + /** + * Number of ir_loop_jump instructions that operate on this loop + */ + unsigned num_loop_jumps; + + /** + * Whether this loop contains any function calls. + */ + bool contains_calls; + + loop_variable_state() + { + this->num_loop_jumps = 0; + this->contains_calls = false; + this->var_hash = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + this->limiting_terminator = NULL; + } + + ~loop_variable_state() + { + hash_table_dtor(this->var_hash); + } + + DECLARE_RALLOC_CXX_OPERATORS(loop_variable_state) +}; + + +class loop_variable : public exec_node { +public: + /** The variable in question. */ + ir_variable *var; + + /** Is the variable read in the loop before it is written? */ + bool read_before_write; + + /** Are all variables in the RHS of the assignment loop constants? */ + bool rhs_clean; + + /** + * Is there an assignment to the variable that is conditional, or inside a + * nested loop? + */ + bool conditional_or_nested_assignment; + + /** Reference to the first assignment to the variable in the loop body. */ + ir_assignment *first_assignment; + + /** Number of assignments to the variable in the loop body. */ + unsigned num_assignments; + + /** + * Increment value for a loop induction variable + * + * If this is a loop induction variable, the amount by which the variable + * is incremented on each iteration through the loop. + * + * If this is not a loop induction variable, NULL. + */ + ir_rvalue *increment; + + + inline bool is_induction_var() const + { + /* Induction variables always have a non-null increment, and vice + * versa. + */ + return this->increment != NULL; + } + + + inline bool is_loop_constant() const + { + const bool is_const = (this->num_assignments == 0) + || (((this->num_assignments == 1) + && !this->conditional_or_nested_assignment + && !this->read_before_write + && this->rhs_clean) || this->var->data.read_only); + + /* If the RHS of *the* assignment is clean, then there must be exactly + * one assignment of the variable. + */ + assert((this->rhs_clean && (this->num_assignments == 1)) + || !this->rhs_clean); + + return is_const; + } + + void record_reference(bool in_assignee, + bool in_conditional_code_or_nested_loop, + ir_assignment *current_assignment); +}; + + +class loop_terminator : public exec_node { +public: + loop_terminator() + : ir(NULL), iterations(-1) + { + } + + /** + * Statement which terminates the loop. + */ + ir_if *ir; + + /** + * The number of iterations after which the terminator is known to + * terminate the loop (if that is a fixed value). Otherwise -1. + */ + int iterations; +}; + + +class loop_state { +public: + ~loop_state(); + + /** + * Get the loop variable state data for a particular loop + */ + loop_variable_state *get(const ir_loop *); + + loop_variable_state *insert(ir_loop *ir); + + bool loop_found; + +private: + loop_state(); + + /** + * Hash table containing all loops that have been analyzed. + */ + hash_table *ht; + + void *mem_ctx; + + friend loop_state *analyze_loop_variables(exec_list *instructions); +}; + +#endif /* LOOP_ANALYSIS_H */ diff --git a/src/compiler/glsl/loop_controls.cpp b/src/compiler/glsl/loop_controls.cpp new file mode 100644 index 0000000..c717605 --- /dev/null +++ b/src/compiler/glsl/loop_controls.cpp @@ -0,0 +1,246 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <limits.h> +#include "main/compiler.h" +#include "compiler/glsl_types.h" +#include "loop_analysis.h" +#include "ir_hierarchical_visitor.h" + +/** + * Find an initializer of a variable outside a loop + * + * Works backwards from the loop to find the pre-loop value of the variable. + * This is used, for example, to find the initial value of loop induction + * variables. + * + * \param loop Loop where \c var is an induction variable + * \param var Variable whose initializer is to be found + * + * \return + * The \c ir_rvalue assigned to the variable outside the loop. May return + * \c NULL if no initializer can be found. + */ +ir_rvalue * +find_initial_value(ir_loop *loop, ir_variable *var) +{ + for (exec_node *node = loop->prev; + !node->is_head_sentinel(); + node = node->prev) { + ir_instruction *ir = (ir_instruction *) node; + + switch (ir->ir_type) { + case ir_type_call: + case ir_type_loop: + case ir_type_loop_jump: + case ir_type_return: + case ir_type_if: + return NULL; + + case ir_type_function: + case ir_type_function_signature: + assert(!"Should not get here."); + return NULL; + + case ir_type_assignment: { + ir_assignment *assign = ir->as_assignment(); + ir_variable *assignee = assign->lhs->whole_variable_referenced(); + + if (assignee == var) + return (assign->condition != NULL) ? NULL : assign->rhs; + + break; + } + + default: + break; + } + } + + return NULL; +} + + +int +calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, + enum ir_expression_operation op) +{ + if (from == NULL || to == NULL || increment == NULL) + return -1; + + void *mem_ctx = ralloc_context(NULL); + + ir_expression *const sub = + new(mem_ctx) ir_expression(ir_binop_sub, from->type, to, from); + + ir_expression *const div = + new(mem_ctx) ir_expression(ir_binop_div, sub->type, sub, increment); + + ir_constant *iter = div->constant_expression_value(); + + if (iter == NULL) + return -1; + + if (!iter->type->is_integer()) { + const ir_expression_operation op = iter->type->is_double() + ? ir_unop_d2i : ir_unop_f2i; + ir_rvalue *cast = + new(mem_ctx) ir_expression(op, glsl_type::int_type, iter, NULL); + + iter = cast->constant_expression_value(); + } + + int iter_value = iter->get_int_component(0); + + /* Make sure that the calculated number of iterations satisfies the exit + * condition. This is needed to catch off-by-one errors and some types of + * ill-formed loops. For example, we need to detect that the following + * loop does not have a maximum iteration count. + * + * for (float x = 0.0; x != 0.9; x += 0.2) + * ; + */ + const int bias[] = { -1, 0, 1 }; + bool valid_loop = false; + + for (unsigned i = 0; i < ARRAY_SIZE(bias); i++) { + /* Increment may be of type int, uint or float. */ + switch (increment->type->base_type) { + case GLSL_TYPE_INT: + iter = new(mem_ctx) ir_constant(iter_value + bias[i]); + break; + case GLSL_TYPE_UINT: + iter = new(mem_ctx) ir_constant(unsigned(iter_value + bias[i])); + break; + case GLSL_TYPE_FLOAT: + iter = new(mem_ctx) ir_constant(float(iter_value + bias[i])); + break; + case GLSL_TYPE_DOUBLE: + iter = new(mem_ctx) ir_constant(double(iter_value + bias[i])); + break; + default: + unreachable("Unsupported type for loop iterator."); + } + + ir_expression *const mul = + new(mem_ctx) ir_expression(ir_binop_mul, increment->type, iter, + increment); + + ir_expression *const add = + new(mem_ctx) ir_expression(ir_binop_add, mul->type, mul, from); + + ir_expression *const cmp = + new(mem_ctx) ir_expression(op, glsl_type::bool_type, add, to); + + ir_constant *const cmp_result = cmp->constant_expression_value(); + + assert(cmp_result != NULL); + if (cmp_result->get_bool_component(0)) { + iter_value += bias[i]; + valid_loop = true; + break; + } + } + + ralloc_free(mem_ctx); + return (valid_loop) ? iter_value : -1; +} + +namespace { + +class loop_control_visitor : public ir_hierarchical_visitor { +public: + loop_control_visitor(loop_state *state) + { + this->state = state; + this->progress = false; + } + + virtual ir_visitor_status visit_leave(ir_loop *ir); + + loop_state *state; + + bool progress; +}; + +} /* anonymous namespace */ + +ir_visitor_status +loop_control_visitor::visit_leave(ir_loop *ir) +{ + loop_variable_state *const ls = this->state->get(ir); + + /* If we've entered a loop that hasn't been analyzed, something really, + * really bad has happened. + */ + if (ls == NULL) { + assert(ls != NULL); + return visit_continue; + } + + if (ls->limiting_terminator != NULL) { + /* If the limiting terminator has an iteration count of zero, then we've + * proven that the loop cannot run, so delete it. + */ + int iterations = ls->limiting_terminator->iterations; + if (iterations == 0) { + ir->remove(); + this->progress = true; + return visit_continue; + } + } + + /* Remove the conditional break statements associated with all terminators + * that are associated with a fixed iteration count, except for the one + * associated with the limiting terminator--that one needs to stay, since + * it terminates the loop. Exception: if the loop still has a normative + * bound, then that terminates the loop, so we don't even need the limiting + * terminator. + */ + foreach_in_list(loop_terminator, t, &ls->terminators) { + if (t->iterations < 0) + continue; + + if (t != ls->limiting_terminator) { + t->ir->remove(); + + assert(ls->num_loop_jumps > 0); + ls->num_loop_jumps--; + + this->progress = true; + } + } + + return visit_continue; +} + + +bool +set_loop_controls(exec_list *instructions, loop_state *ls) +{ + loop_control_visitor v(ls); + + v.run(instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/loop_unroll.cpp b/src/compiler/glsl/loop_unroll.cpp new file mode 100644 index 0000000..aea2743c --- /dev/null +++ b/src/compiler/glsl/loop_unroll.cpp @@ -0,0 +1,432 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "compiler/glsl_types.h" +#include "loop_analysis.h" +#include "ir_hierarchical_visitor.h" + +#include "main/mtypes.h" + +namespace { + +class loop_unroll_visitor : public ir_hierarchical_visitor { +public: + loop_unroll_visitor(loop_state *state, + const struct gl_shader_compiler_options *options) + { + this->state = state; + this->progress = false; + this->options = options; + } + + virtual ir_visitor_status visit_leave(ir_loop *ir); + void simple_unroll(ir_loop *ir, int iterations); + void complex_unroll(ir_loop *ir, int iterations, + bool continue_from_then_branch); + void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest); + + loop_state *state; + + bool progress; + const struct gl_shader_compiler_options *options; +}; + +} /* anonymous namespace */ + +static bool +is_break(ir_instruction *ir) +{ + return ir != NULL && ir->ir_type == ir_type_loop_jump + && ((ir_loop_jump *) ir)->is_break(); +} + +class loop_unroll_count : public ir_hierarchical_visitor { +public: + int nodes; + bool unsupported_variable_indexing; + bool array_indexed_by_induction_var_with_exact_iterations; + /* If there are nested loops, the node count will be inaccurate. */ + bool nested_loop; + + loop_unroll_count(exec_list *list, loop_variable_state *ls, + const struct gl_shader_compiler_options *options) + : ls(ls), options(options) + { + nodes = 0; + nested_loop = false; + unsupported_variable_indexing = false; + array_indexed_by_induction_var_with_exact_iterations = false; + + run(list); + } + + virtual ir_visitor_status visit_enter(ir_assignment *) + { + nodes++; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_expression *) + { + nodes++; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_loop *) + { + nested_loop = true; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *ir) + { + /* Force unroll in case of dynamic indexing with sampler arrays + * when EmitNoIndirectSampler is set. + */ + if (options->EmitNoIndirectSampler) { + if ((ir->array->type->is_array() && + ir->array->type->contains_sampler()) && + !ir->array_index->constant_expression_value()) { + unsupported_variable_indexing = true; + return visit_continue; + } + } + + /* Check for arrays variably-indexed by a loop induction variable. + * Unrolling the loop may convert that access into constant-indexing. + * + * Many drivers don't support particular kinds of variable indexing, + * and have to resort to using lower_variable_index_to_cond_assign to + * handle it. This results in huge amounts of horrible code, so we'd + * like to avoid that if possible. Here, we just note that it will + * happen. + */ + if ((ir->array->type->is_array() || ir->array->type->is_matrix()) && + !ir->array_index->as_constant()) { + ir_variable *array = ir->array->variable_referenced(); + loop_variable *lv = ls->get(ir->array_index->variable_referenced()); + if (array && lv && lv->is_induction_var()) { + /* If an array is indexed by a loop induction variable, and the + * array size is exactly the number of loop iterations, this is + * probably a simple for-loop trying to access each element in + * turn; the application may expect it to be unrolled. + */ + if (int(array->type->length) == ls->limiting_terminator->iterations) + array_indexed_by_induction_var_with_exact_iterations = true; + + switch (array->data.mode) { + case ir_var_auto: + case ir_var_temporary: + case ir_var_const_in: + case ir_var_function_in: + case ir_var_function_out: + case ir_var_function_inout: + if (options->EmitNoIndirectTemp) + unsupported_variable_indexing = true; + break; + case ir_var_uniform: + case ir_var_shader_storage: + if (options->EmitNoIndirectUniform) + unsupported_variable_indexing = true; + break; + case ir_var_shader_in: + if (options->EmitNoIndirectInput) + unsupported_variable_indexing = true; + break; + case ir_var_shader_out: + if (options->EmitNoIndirectOutput) + unsupported_variable_indexing = true; + break; + } + } + } + return visit_continue; + } + +private: + loop_variable_state *ls; + const struct gl_shader_compiler_options *options; +}; + + +/** + * Unroll a loop which does not contain any jumps. For example, if the input + * is: + * + * (loop (...) ...instrs...) + * + * And the iteration count is 3, the output will be: + * + * ...instrs... ...instrs... ...instrs... + */ +void +loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) +{ + void *const mem_ctx = ralloc_parent(ir); + + for (int i = 0; i < iterations; i++) { + exec_list copy_list; + + copy_list.make_empty(); + clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); + + ir->insert_before(©_list); + } + + /* The loop has been replaced by the unrolled copies. Remove the original + * loop from the IR sequence. + */ + ir->remove(); + + this->progress = true; +} + + +/** + * Unroll a loop whose last statement is an ir_if. If \c + * continue_from_then_branch is true, the loop is repeated only when the + * "then" branch of the if is taken; otherwise it is repeated only when the + * "else" branch of the if is taken. + * + * For example, if the input is: + * + * (loop (...) + * ...body... + * (if (cond) + * (...then_instrs...) + * (...else_instrs...))) + * + * And the iteration count is 3, and \c continue_from_then_branch is true, + * then the output will be: + * + * ...body... + * (if (cond) + * (...then_instrs... + * ...body... + * (if (cond) + * (...then_instrs... + * ...body... + * (if (cond) + * (...then_instrs...) + * (...else_instrs...))) + * (...else_instrs...))) + * (...else_instrs)) + */ +void +loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations, + bool continue_from_then_branch) +{ + void *const mem_ctx = ralloc_parent(ir); + ir_instruction *ir_to_replace = ir; + + for (int i = 0; i < iterations; i++) { + exec_list copy_list; + + copy_list.make_empty(); + clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); + + ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if(); + assert(ir_if != NULL); + + ir_to_replace->insert_before(©_list); + ir_to_replace->remove(); + + /* placeholder that will be removed in the next iteration */ + ir_to_replace = + new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); + + exec_list *const list = (continue_from_then_branch) + ? &ir_if->then_instructions : &ir_if->else_instructions; + + list->push_tail(ir_to_replace); + } + + ir_to_replace->remove(); + + this->progress = true; +} + + +/** + * Move all of the instructions which follow \c ir_if to the end of + * \c splice_dest. + * + * For example, in the code snippet: + * + * (if (cond) + * (...then_instructions... + * break) + * (...else_instructions...)) + * ...post_if_instructions... + * + * If \c ir_if points to the "if" instruction, and \c splice_dest points to + * (...else_instructions...), the code snippet is transformed into: + * + * (if (cond) + * (...then_instructions... + * break) + * (...else_instructions... + * ...post_if_instructions...)) + */ +void +loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if, + exec_list *splice_dest) +{ + while (!ir_if->get_next()->is_tail_sentinel()) { + ir_instruction *move_ir = (ir_instruction *) ir_if->get_next(); + + move_ir->remove(); + splice_dest->push_tail(move_ir); + } +} + + +ir_visitor_status +loop_unroll_visitor::visit_leave(ir_loop *ir) +{ + loop_variable_state *const ls = this->state->get(ir); + int iterations; + + /* If we've entered a loop that hasn't been analyzed, something really, + * really bad has happened. + */ + if (ls == NULL) { + assert(ls != NULL); + return visit_continue; + } + + /* Don't try to unroll loops where the number of iterations is not known + * at compile-time. + */ + if (ls->limiting_terminator == NULL) + return visit_continue; + + iterations = ls->limiting_terminator->iterations; + + const int max_iterations = options->MaxUnrollIterations; + + /* Don't try to unroll loops that have zillions of iterations either. + */ + if (iterations > max_iterations) + return visit_continue; + + /* Don't try to unroll nested loops and loops with a huge body. + */ + loop_unroll_count count(&ir->body_instructions, ls, options); + + bool loop_too_large = + count.nested_loop || count.nodes * iterations > max_iterations * 5; + + if (loop_too_large && !count.unsupported_variable_indexing && + !count.array_indexed_by_induction_var_with_exact_iterations) + return visit_continue; + + /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps. + * We'll be removing the limiting terminator before we unroll. + */ + assert(ls->num_loop_jumps > 0); + unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1; + + if (predicted_num_loop_jumps > 1) + return visit_continue; + + if (predicted_num_loop_jumps == 0) { + ls->limiting_terminator->ir->remove(); + simple_unroll(ir, iterations); + return visit_continue; + } + + ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail(); + assert(last_ir != NULL); + + if (is_break(last_ir)) { + /* If the only loop-jump is a break at the end of the loop, the loop + * will execute exactly once. Remove the break and use the simple + * unroller with an iteration count of 1. + */ + last_ir->remove(); + + ls->limiting_terminator->ir->remove(); + simple_unroll(ir, 1); + return visit_continue; + } + + /* recognize loops in the form produced by ir_lower_jumps */ + foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) { + /* Skip the limiting terminator, since it will go away when we + * unroll. + */ + if (cur_ir == ls->limiting_terminator->ir) + continue; + + ir_if *ir_if = cur_ir->as_if(); + if (ir_if != NULL) { + /* Determine which if-statement branch, if any, ends with a + * break. The branch that did *not* have the break will get a + * temporary continue inserted in each iteration of the loop + * unroll. + * + * Note that since ls->num_loop_jumps is <= 1, it is impossible + * for both branches to end with a break. + */ + ir_instruction *ir_if_last = + (ir_instruction *) ir_if->then_instructions.get_tail(); + + if (is_break(ir_if_last)) { + ls->limiting_terminator->ir->remove(); + splice_post_if_instructions(ir_if, &ir_if->else_instructions); + ir_if_last->remove(); + complex_unroll(ir, iterations, false); + return visit_continue; + } else { + ir_if_last = + (ir_instruction *) ir_if->else_instructions.get_tail(); + + if (is_break(ir_if_last)) { + ls->limiting_terminator->ir->remove(); + splice_post_if_instructions(ir_if, &ir_if->then_instructions); + ir_if_last->remove(); + complex_unroll(ir, iterations, true); + return visit_continue; + } + } + } + } + + /* Did not find the break statement. It must be in a complex if-nesting, + * so don't try to unroll. + */ + return visit_continue; +} + + +bool +unroll_loops(exec_list *instructions, loop_state *ls, + const struct gl_shader_compiler_options *options) +{ + loop_unroll_visitor v(ls, options); + + v.run(instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_buffer_access.cpp b/src/compiler/glsl/lower_buffer_access.cpp new file mode 100644 index 0000000..f8c8d14 --- /dev/null +++ b/src/compiler/glsl/lower_buffer_access.cpp @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_buffer_access.cpp + * + * Helper for IR lowering pass to replace dereferences of buffer object based + * shader variables with intrinsic function calls. + * + * This helper is used by lowering passes for UBOs, SSBOs and compute shader + * shared variables. + */ + +#include "lower_buffer_access.h" +#include "ir_builder.h" +#include "main/macros.h" +#include "util/list.h" +#include "glsl_parser_extras.h" + +using namespace ir_builder; + +namespace lower_buffer_access { + +static inline int +writemask_for_size(unsigned n) +{ + return ((1 << n) - 1); +} + +/** + * Takes a deref and recursively calls itself to break the deref down to the + * point that the reads or writes generated are contiguous scalars or vectors. + */ +void +lower_buffer_access::emit_access(void *mem_ctx, + bool is_write, + ir_dereference *deref, + ir_variable *base_offset, + unsigned int deref_offset, + bool row_major, + int matrix_columns, + unsigned int packing, + unsigned int write_mask) +{ + if (deref->type->is_record()) { + unsigned int field_offset = 0; + + for (unsigned i = 0; i < deref->type->length; i++) { + const struct glsl_struct_field *field = + &deref->type->fields.structure[i]; + ir_dereference *field_deref = + new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL), + field->name); + + field_offset = + glsl_align(field_offset, + field->type->std140_base_alignment(row_major)); + + emit_access(mem_ctx, is_write, field_deref, base_offset, + deref_offset + field_offset, + row_major, 1, packing, + writemask_for_size(field_deref->type->vector_elements)); + + field_offset += field->type->std140_size(row_major); + } + return; + } + + if (deref->type->is_array()) { + unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ? + deref->type->fields.array->std430_array_stride(row_major) : + glsl_align(deref->type->fields.array->std140_size(row_major), 16); + + for (unsigned i = 0; i < deref->type->length; i++) { + ir_constant *element = new(mem_ctx) ir_constant(i); + ir_dereference *element_deref = + new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), + element); + emit_access(mem_ctx, is_write, element_deref, base_offset, + deref_offset + i * array_stride, + row_major, 1, packing, + writemask_for_size(element_deref->type->vector_elements)); + } + return; + } + + if (deref->type->is_matrix()) { + for (unsigned i = 0; i < deref->type->matrix_columns; i++) { + ir_constant *col = new(mem_ctx) ir_constant(i); + ir_dereference *col_deref = + new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col); + + if (row_major) { + /* For a row-major matrix, the next column starts at the next + * element. + */ + int size_mul = deref->type->is_double() ? 8 : 4; + emit_access(mem_ctx, is_write, col_deref, base_offset, + deref_offset + i * size_mul, + row_major, deref->type->matrix_columns, packing, + writemask_for_size(col_deref->type->vector_elements)); + } else { + int size_mul; + + /* std430 doesn't round up vec2 size to a vec4 size */ + if (packing == GLSL_INTERFACE_PACKING_STD430 && + deref->type->vector_elements == 2 && + !deref->type->is_double()) { + size_mul = 8; + } else { + /* std140 always rounds the stride of arrays (and matrices) to a + * vec4, so matrices are always 16 between columns/rows. With + * doubles, they will be 32 apart when there are more than 2 rows. + * + * For both std140 and std430, if the member is a + * three-'component vector with components consuming N basic + * machine units, the base alignment is 4N. For vec4, base + * alignment is 4N. + */ + size_mul = (deref->type->is_double() && + deref->type->vector_elements > 2) ? 32 : 16; + } + + emit_access(mem_ctx, is_write, col_deref, base_offset, + deref_offset + i * size_mul, + row_major, deref->type->matrix_columns, packing, + writemask_for_size(col_deref->type->vector_elements)); + } + } + return; + } + + assert(deref->type->is_scalar() || deref->type->is_vector()); + + if (!row_major) { + ir_rvalue *offset = + add(base_offset, new(mem_ctx) ir_constant(deref_offset)); + unsigned mask = + is_write ? write_mask : (1 << deref->type->vector_elements) - 1; + insert_buffer_access(mem_ctx, deref, deref->type, offset, mask, -1); + } else { + unsigned N = deref->type->is_double() ? 8 : 4; + + /* We're dereffing a column out of a row-major matrix, so we + * gather the vector from each stored row. + */ + assert(deref->type->base_type == GLSL_TYPE_FLOAT || + deref->type->base_type == GLSL_TYPE_DOUBLE); + /* Matrices, row_major or not, are stored as if they were + * arrays of vectors of the appropriate size in std140. + * Arrays have their strides rounded up to a vec4, so the + * matrix stride is always 16. However a double matrix may either be 16 + * or 32 depending on the number of columns. + */ + assert(matrix_columns <= 4); + unsigned matrix_stride = 0; + /* Matrix stride for std430 mat2xY matrices are not rounded up to + * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform + * Block Layout": + * + * "2. If the member is a two- or four-component vector with components + * consuming N basic machine units, the base alignment is 2N or 4N, + * respectively." [...] + * "4. If the member is an array of scalars or vectors, the base alignment + * and array stride are set to match the base alignment of a single array + * element, according to rules (1), (2), and (3), and rounded up to the + * base alignment of a vec4." [...] + * "7. If the member is a row-major matrix with C columns and R rows, the + * matrix is stored identically to an array of R row vectors with C + * components each, according to rule (4)." [...] + * "When using the std430 storage layout, shader storage blocks will be + * laid out in buffer storage identically to uniform and shader storage + * blocks using the std140 layout, except that the base alignment and + * stride of arrays of scalars and vectors in rule 4 and of structures in + * rule 9 are not rounded up a multiple of the base alignment of a vec4." + */ + if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2) + matrix_stride = 2 * N; + else + matrix_stride = glsl_align(matrix_columns * N, 16); + + const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ? + glsl_type::float_type : glsl_type::double_type; + + for (unsigned i = 0; i < deref->type->vector_elements; i++) { + ir_rvalue *chan_offset = + add(base_offset, + new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); + if (!is_write || ((1U << i) & write_mask)) + insert_buffer_access(mem_ctx, deref, deref_type, chan_offset, + (1U << i), i); + } + } +} + +/** + * Determine if a thing being dereferenced is row-major + * + * There is some trickery here. + * + * If the thing being dereferenced is a member of uniform block \b without an + * instance name, then the name of the \c ir_variable is the field name of an + * interface type. If this field is row-major, then the thing referenced is + * row-major. + * + * If the thing being dereferenced is a member of uniform block \b with an + * instance name, then the last dereference in the tree will be an + * \c ir_dereference_record. If that record field is row-major, then the + * thing referenced is row-major. + */ +bool +lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref) +{ + bool matrix = false; + const ir_rvalue *ir = deref; + + while (true) { + matrix = matrix || ir->type->without_array()->is_matrix(); + + switch (ir->ir_type) { + case ir_type_dereference_array: { + const ir_dereference_array *const array_deref = + (const ir_dereference_array *) ir; + + ir = array_deref->array; + break; + } + + case ir_type_dereference_record: { + const ir_dereference_record *const record_deref = + (const ir_dereference_record *) ir; + + ir = record_deref->record; + + const int idx = ir->type->field_index(record_deref->field); + assert(idx >= 0); + + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout); + + switch (matrix_layout) { + case GLSL_MATRIX_LAYOUT_INHERITED: + break; + case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: + return false; + case GLSL_MATRIX_LAYOUT_ROW_MAJOR: + return matrix || deref->type->without_array()->is_record(); + } + + break; + } + + case ir_type_dereference_variable: { + const ir_dereference_variable *const var_deref = + (const ir_dereference_variable *) ir; + + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(var_deref->var->data.matrix_layout); + + switch (matrix_layout) { + case GLSL_MATRIX_LAYOUT_INHERITED: { + /* For interface block matrix variables we handle inherited + * layouts at HIR generation time, but we don't do that for shared + * variables, which are always column-major + */ + ir_variable *var = deref->variable_referenced(); + assert((var->is_in_buffer_block() && !matrix) || + var->data.mode == ir_var_shader_shared); + return false; + } + case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: + return false; + case GLSL_MATRIX_LAYOUT_ROW_MAJOR: + return matrix || deref->type->without_array()->is_record(); + } + + unreachable("invalid matrix layout"); + break; + } + + default: + return false; + } + } + + /* The tree must have ended with a dereference that wasn't an + * ir_dereference_variable. That is invalid, and it should be impossible. + */ + unreachable("invalid dereference tree"); + return false; +} + +/** + * This function initializes various values that will be used later by + * emit_access when actually emitting loads or stores. + * + * Note: const_offset is an input as well as an output, clients must + * initialize it to the offset of the variable in the underlying block, and + * this function will adjust it by adding the constant offset of the member + * being accessed into that variable. + */ +void +lower_buffer_access::setup_buffer_access(void *mem_ctx, + ir_variable *var, + ir_rvalue *deref, + ir_rvalue **offset, + unsigned *const_offset, + bool *row_major, + int *matrix_columns, + unsigned packing) +{ + *offset = new(mem_ctx) ir_constant(0u); + *row_major = is_dereferenced_thing_row_major(deref); + *matrix_columns = 1; + + /* Calculate the offset to the start of the region of the UBO + * dereferenced by *rvalue. This may be a variable offset if an + * array dereference has a variable index. + */ + while (deref) { + switch (deref->ir_type) { + case ir_type_dereference_variable: { + deref = NULL; + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *deref_array = (ir_dereference_array *) deref; + unsigned array_stride; + if (deref_array->array->type->is_vector()) { + /* We get this when storing or loading a component out of a vector + * with a non-constant index. This happens for v[i] = f where v is + * a vector (or m[i][j] = f where m is a matrix). If we don't + * lower that here, it gets turned into v = vector_insert(v, i, + * f), which loads the entire vector, modifies one component and + * then write the entire thing back. That breaks if another + * thread or SIMD channel is modifying the same vector. + */ + array_stride = 4; + if (deref_array->array->type->is_double()) + array_stride *= 2; + } else if (deref_array->array->type->is_matrix() && *row_major) { + /* When loading a vector out of a row major matrix, the + * step between the columns (vectors) is the size of a + * float, while the step between the rows (elements of a + * vector) is handled below in emit_ubo_loads. + */ + array_stride = 4; + if (deref_array->array->type->is_double()) + array_stride *= 2; + *matrix_columns = deref_array->array->type->matrix_columns; + } else if (deref_array->type->without_array()->is_interface()) { + /* We're processing an array dereference of an interface instance + * array. The thing being dereferenced *must* be a variable + * dereference because interfaces cannot be embedded in other + * types. In terms of calculating the offsets for the lowering + * pass, we don't care about the array index. All elements of an + * interface instance array will have the same offsets relative to + * the base of the block that backs them. + */ + deref = deref_array->array->as_dereference(); + break; + } else { + /* Whether or not the field is row-major (because it might be a + * bvec2 or something) does not affect the array itself. We need + * to know whether an array element in its entirety is row-major. + */ + const bool array_row_major = + is_dereferenced_thing_row_major(deref_array); + + /* The array type will give the correct interface packing + * information + */ + if (packing == GLSL_INTERFACE_PACKING_STD430) { + array_stride = deref_array->type->std430_array_stride(array_row_major); + } else { + array_stride = deref_array->type->std140_size(array_row_major); + array_stride = glsl_align(array_stride, 16); + } + } + + ir_rvalue *array_index = deref_array->array_index; + if (array_index->type->base_type == GLSL_TYPE_INT) + array_index = i2u(array_index); + + ir_constant *const_index = + array_index->constant_expression_value(NULL); + if (const_index) { + *const_offset += array_stride * const_index->value.u[0]; + } else { + *offset = add(*offset, + mul(array_index, + new(mem_ctx) ir_constant(array_stride))); + } + deref = deref_array->array->as_dereference(); + break; + } + + case ir_type_dereference_record: { + ir_dereference_record *deref_record = (ir_dereference_record *) deref; + const glsl_type *struct_type = deref_record->record->type; + unsigned intra_struct_offset = 0; + + for (unsigned int i = 0; i < struct_type->length; i++) { + const glsl_type *type = struct_type->fields.structure[i].type; + + ir_dereference_record *field_deref = new(mem_ctx) + ir_dereference_record(deref_record->record, + struct_type->fields.structure[i].name); + const bool field_row_major = + is_dereferenced_thing_row_major(field_deref); + + ralloc_free(field_deref); + + unsigned field_align = 0; + + if (packing == GLSL_INTERFACE_PACKING_STD430) + field_align = type->std430_base_alignment(field_row_major); + else + field_align = type->std140_base_alignment(field_row_major); + + intra_struct_offset = glsl_align(intra_struct_offset, field_align); + + if (strcmp(struct_type->fields.structure[i].name, + deref_record->field) == 0) + break; + + if (packing == GLSL_INTERFACE_PACKING_STD430) + intra_struct_offset += type->std430_size(field_row_major); + else + intra_struct_offset += type->std140_size(field_row_major); + + /* If the field just examined was itself a structure, apply rule + * #9: + * + * "The structure may have padding at the end; the base offset + * of the member following the sub-structure is rounded up to + * the next multiple of the base alignment of the structure." + */ + if (type->without_array()->is_record()) { + intra_struct_offset = glsl_align(intra_struct_offset, + field_align); + + } + } + + *const_offset += intra_struct_offset; + deref = deref_record->record->as_dereference(); + break; + } + + case ir_type_swizzle: { + ir_swizzle *deref_swizzle = (ir_swizzle *) deref; + + assert(deref_swizzle->mask.num_components == 1); + + *const_offset += deref_swizzle->mask.x * sizeof(int); + deref = deref_swizzle->val->as_dereference(); + break; + } + + default: + assert(!"not reached"); + deref = NULL; + break; + } + } +} + +} /* namespace lower_buffer_access */ diff --git a/src/compiler/glsl/lower_buffer_access.h b/src/compiler/glsl/lower_buffer_access.h new file mode 100644 index 0000000..cc4614e --- /dev/null +++ b/src/compiler/glsl/lower_buffer_access.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_buffer_access.h + * + * Helper for IR lowering pass to replace dereferences of buffer object based + * shader variables with intrinsic function calls. + * + * This helper is used by lowering passes for UBOs, SSBOs and compute shader + * shared variables. + */ + +#pragma once +#ifndef LOWER_BUFFER_ACCESS_H +#define LOWER_BUFFER_ACCESS_H + +#include "ir.h" +#include "ir_rvalue_visitor.h" + +namespace lower_buffer_access { + +class lower_buffer_access : public ir_rvalue_enter_visitor { +public: + virtual void + insert_buffer_access(void *mem_ctx, ir_dereference *deref, + const glsl_type *type, ir_rvalue *offset, + unsigned mask, int channel) = 0; + + void emit_access(void *mem_ctx, bool is_write, ir_dereference *deref, + ir_variable *base_offset, unsigned int deref_offset, + bool row_major, int matrix_columns, + unsigned int packing, unsigned int write_mask); + + bool is_dereferenced_thing_row_major(const ir_rvalue *deref); + + void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref, + ir_rvalue **offset, unsigned *const_offset, + bool *row_major, int *matrix_columns, + unsigned packing); +}; + +} /* namespace lower_buffer_access */ + +#endif /* LOWER_BUFFER_ACCESS_H */ diff --git a/src/compiler/glsl/lower_clip_distance.cpp b/src/compiler/glsl/lower_clip_distance.cpp new file mode 100644 index 0000000..1ada215 --- /dev/null +++ b/src/compiler/glsl/lower_clip_distance.cpp @@ -0,0 +1,574 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_clip_distance.cpp + * + * This pass accounts for the difference between the way + * gl_ClipDistance is declared in standard GLSL (as an array of + * floats), and the way it is frequently implemented in hardware (as + * a pair of vec4s, with four clip distances packed into each). + * + * The declaration of gl_ClipDistance is replaced with a declaration + * of gl_ClipDistanceMESA, and any references to gl_ClipDistance are + * translated to refer to gl_ClipDistanceMESA with the appropriate + * swizzling of array indices. For instance: + * + * gl_ClipDistance[i] + * + * is translated into: + * + * gl_ClipDistanceMESA[i>>2][i&3] + * + * Since some hardware may not internally represent gl_ClipDistance as a pair + * of vec4's, this lowering pass is optional. To enable it, set the + * LowerClipDistance flag in gl_shader_compiler_options to true. + */ + +#include "glsl_symbol_table.h" +#include "ir_rvalue_visitor.h" +#include "ir.h" +#include "program/prog_instruction.h" /* For WRITEMASK_* */ + +namespace { + +class lower_clip_distance_visitor : public ir_rvalue_visitor { +public: + explicit lower_clip_distance_visitor(gl_shader_stage shader_stage) + : progress(false), old_clip_distance_out_var(NULL), + old_clip_distance_in_var(NULL), new_clip_distance_out_var(NULL), + new_clip_distance_in_var(NULL), shader_stage(shader_stage) + { + } + + virtual ir_visitor_status visit(ir_variable *); + void create_indices(ir_rvalue*, ir_rvalue *&, ir_rvalue *&); + bool is_clip_distance_vec8(ir_rvalue *ir); + ir_rvalue *lower_clip_distance_vec8(ir_rvalue *ir); + virtual ir_visitor_status visit_leave(ir_assignment *); + void visit_new_assignment(ir_assignment *ir); + virtual ir_visitor_status visit_leave(ir_call *); + + virtual void handle_rvalue(ir_rvalue **rvalue); + + void fix_lhs(ir_assignment *); + + bool progress; + + /** + * Pointer to the declaration of gl_ClipDistance, if found. + * + * Note: + * + * - the in_var is for geometry and both tessellation shader inputs only. + * + * - since gl_ClipDistance is available in tessellation control, + * tessellation evaluation and geometry shaders as both an input + * and an output, it's possible for both old_clip_distance_out_var + * and old_clip_distance_in_var to be non-null. + */ + ir_variable *old_clip_distance_out_var; + ir_variable *old_clip_distance_in_var; + + /** + * Pointer to the newly-created gl_ClipDistanceMESA variable. + */ + ir_variable *new_clip_distance_out_var; + ir_variable *new_clip_distance_in_var; + + /** + * Type of shader we are compiling (e.g. MESA_SHADER_VERTEX) + */ + const gl_shader_stage shader_stage; +}; + +} /* anonymous namespace */ + +/** + * Replace any declaration of gl_ClipDistance as an array of floats with a + * declaration of gl_ClipDistanceMESA as an array of vec4's. + */ +ir_visitor_status +lower_clip_distance_visitor::visit(ir_variable *ir) +{ + ir_variable **old_var; + ir_variable **new_var; + + if (!ir->name || strcmp(ir->name, "gl_ClipDistance") != 0) + return visit_continue; + assert (ir->type->is_array()); + + if (ir->data.mode == ir_var_shader_out) { + if (this->old_clip_distance_out_var) + return visit_continue; + old_var = &old_clip_distance_out_var; + new_var = &new_clip_distance_out_var; + } else if (ir->data.mode == ir_var_shader_in) { + if (this->old_clip_distance_in_var) + return visit_continue; + old_var = &old_clip_distance_in_var; + new_var = &new_clip_distance_in_var; + } else { + unreachable("not reached"); + } + + this->progress = true; + + if (!ir->type->fields.array->is_array()) { + /* gl_ClipDistance (used for vertex, tessellation evaluation and + * geometry output, and fragment input). + */ + assert((ir->data.mode == ir_var_shader_in && + this->shader_stage == MESA_SHADER_FRAGMENT) || + (ir->data.mode == ir_var_shader_out && + (this->shader_stage == MESA_SHADER_VERTEX || + this->shader_stage == MESA_SHADER_TESS_EVAL || + this->shader_stage == MESA_SHADER_GEOMETRY))); + + *old_var = ir; + assert (ir->type->fields.array == glsl_type::float_type); + unsigned new_size = (ir->type->array_size() + 3) / 4; + + /* Clone the old var so that we inherit all of its properties */ + *new_var = ir->clone(ralloc_parent(ir), NULL); + + /* And change the properties that we need to change */ + (*new_var)->name = ralloc_strdup(*new_var, "gl_ClipDistanceMESA"); + (*new_var)->type = glsl_type::get_array_instance(glsl_type::vec4_type, + new_size); + (*new_var)->data.max_array_access = ir->data.max_array_access / 4; + + ir->replace_with(*new_var); + } else { + /* 2D gl_ClipDistance (used for tessellation control, tessellation + * evaluation and geometry input, and tessellation control output). + */ + assert((ir->data.mode == ir_var_shader_in && + (this->shader_stage == MESA_SHADER_GEOMETRY || + this->shader_stage == MESA_SHADER_TESS_EVAL)) || + this->shader_stage == MESA_SHADER_TESS_CTRL); + + *old_var = ir; + assert (ir->type->fields.array->fields.array == glsl_type::float_type); + unsigned new_size = (ir->type->fields.array->array_size() + 3) / 4; + + /* Clone the old var so that we inherit all of its properties */ + *new_var = ir->clone(ralloc_parent(ir), NULL); + + /* And change the properties that we need to change */ + (*new_var)->name = ralloc_strdup(*new_var, "gl_ClipDistanceMESA"); + (*new_var)->type = glsl_type::get_array_instance( + glsl_type::get_array_instance(glsl_type::vec4_type, + new_size), + ir->type->array_size()); + (*new_var)->data.max_array_access = ir->data.max_array_access / 4; + + ir->replace_with(*new_var); + } + + return visit_continue; +} + + +/** + * Create the necessary GLSL rvalues to index into gl_ClipDistanceMESA based + * on the rvalue previously used to index into gl_ClipDistance. + * + * \param array_index Selects one of the vec4's in gl_ClipDistanceMESA + * \param swizzle_index Selects a component within the vec4 selected by + * array_index. + */ +void +lower_clip_distance_visitor::create_indices(ir_rvalue *old_index, + ir_rvalue *&array_index, + ir_rvalue *&swizzle_index) +{ + void *ctx = ralloc_parent(old_index); + + /* Make sure old_index is a signed int so that the bitwise "shift" and + * "and" operations below type check properly. + */ + if (old_index->type != glsl_type::int_type) { + assert (old_index->type == glsl_type::uint_type); + old_index = new(ctx) ir_expression(ir_unop_u2i, old_index); + } + + ir_constant *old_index_constant = old_index->constant_expression_value(); + if (old_index_constant) { + /* gl_ClipDistance is being accessed via a constant index. Don't bother + * creating expressions to calculate the lowered indices. Just create + * constants. + */ + int const_val = old_index_constant->get_int_component(0); + array_index = new(ctx) ir_constant(const_val / 4); + swizzle_index = new(ctx) ir_constant(const_val % 4); + } else { + /* Create a variable to hold the value of old_index (so that we + * don't compute it twice). + */ + ir_variable *old_index_var = new(ctx) ir_variable( + glsl_type::int_type, "clip_distance_index", ir_var_temporary); + this->base_ir->insert_before(old_index_var); + this->base_ir->insert_before(new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(old_index_var), old_index)); + + /* Create the expression clip_distance_index / 4. Do this as a bit + * shift because that's likely to be more efficient. + */ + array_index = new(ctx) ir_expression( + ir_binop_rshift, new(ctx) ir_dereference_variable(old_index_var), + new(ctx) ir_constant(2)); + + /* Create the expression clip_distance_index % 4. Do this as a bitwise + * AND because that's likely to be more efficient. + */ + swizzle_index = new(ctx) ir_expression( + ir_binop_bit_and, new(ctx) ir_dereference_variable(old_index_var), + new(ctx) ir_constant(3)); + } +} + + +/** + * Determine whether the given rvalue describes an array of 8 floats that + * needs to be lowered to an array of 2 vec4's; that is, determine whether it + * matches one of the following patterns: + * + * - gl_ClipDistance (if gl_ClipDistance is 1D) + * - gl_ClipDistance[i] (if gl_ClipDistance is 2D) + */ +bool +lower_clip_distance_visitor::is_clip_distance_vec8(ir_rvalue *ir) +{ + /* Note that geometry shaders contain gl_ClipDistance both as an input + * (which is a 2D array) and an output (which is a 1D array), so it's + * possible for both this->old_clip_distance_out_var and + * this->old_clip_distance_in_var to be non-NULL in the same shader. + */ + + if (!ir->type->is_array()) + return false; + if (ir->type->fields.array != glsl_type::float_type) + return false; + + if (this->old_clip_distance_out_var) { + if (ir->variable_referenced() == this->old_clip_distance_out_var) + return true; + } + if (this->old_clip_distance_in_var) { + assert(this->shader_stage == MESA_SHADER_TESS_CTRL || + this->shader_stage == MESA_SHADER_TESS_EVAL || + this->shader_stage == MESA_SHADER_GEOMETRY || + this->shader_stage == MESA_SHADER_FRAGMENT); + + if (ir->variable_referenced() == this->old_clip_distance_in_var) + return true; + } + return false; +} + + +/** + * If the given ir satisfies is_clip_distance_vec8(), return new ir + * representing its lowered equivalent. That is, map: + * + * - gl_ClipDistance => gl_ClipDistanceMESA (if gl_ClipDistance is 1D) + * - gl_ClipDistance[i] => gl_ClipDistanceMESA[i] (if gl_ClipDistance is 2D) + * + * Otherwise return NULL. + */ +ir_rvalue * +lower_clip_distance_visitor::lower_clip_distance_vec8(ir_rvalue *ir) +{ + if (!ir->type->is_array()) + return NULL; + if (ir->type->fields.array != glsl_type::float_type) + return NULL; + + ir_variable **new_var = NULL; + if (this->old_clip_distance_out_var) { + if (ir->variable_referenced() == this->old_clip_distance_out_var) + new_var = &this->new_clip_distance_out_var; + } + if (this->old_clip_distance_in_var) { + if (ir->variable_referenced() == this->old_clip_distance_in_var) + new_var = &this->new_clip_distance_in_var; + } + if (new_var == NULL) + return NULL; + + if (ir->as_dereference_variable()) { + return new(ralloc_parent(ir)) ir_dereference_variable(*new_var); + } else { + ir_dereference_array *array_ref = ir->as_dereference_array(); + assert(array_ref); + assert(array_ref->array->as_dereference_variable()); + + return new(ralloc_parent(ir)) + ir_dereference_array(*new_var, array_ref->array_index); + } +} + + +void +lower_clip_distance_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL) + return; + + ir_dereference_array *const array_deref = (*rv)->as_dereference_array(); + if (array_deref == NULL) + return; + + /* Replace any expression that indexes one of the floats in gl_ClipDistance + * with an expression that indexes into one of the vec4's in + * gl_ClipDistanceMESA and accesses the appropriate component. + */ + ir_rvalue *lowered_vec8 = + this->lower_clip_distance_vec8(array_deref->array); + if (lowered_vec8 != NULL) { + this->progress = true; + ir_rvalue *array_index; + ir_rvalue *swizzle_index; + this->create_indices(array_deref->array_index, array_index, swizzle_index); + void *mem_ctx = ralloc_parent(array_deref); + + ir_dereference_array *const new_array_deref = + new(mem_ctx) ir_dereference_array(lowered_vec8, array_index); + + ir_expression *const expr = + new(mem_ctx) ir_expression(ir_binop_vector_extract, + new_array_deref, + swizzle_index); + + *rv = expr; + } +} + +void +lower_clip_distance_visitor::fix_lhs(ir_assignment *ir) +{ + if (ir->lhs->ir_type == ir_type_expression) { + void *mem_ctx = ralloc_parent(ir); + ir_expression *const expr = (ir_expression *) ir->lhs; + + /* The expression must be of the form: + * + * (vector_extract gl_ClipDistanceMESA[i], j). + */ + assert(expr->operation == ir_binop_vector_extract); + assert(expr->operands[0]->ir_type == ir_type_dereference_array); + assert(expr->operands[0]->type == glsl_type::vec4_type); + + ir_dereference *const new_lhs = (ir_dereference *) expr->operands[0]; + ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, + glsl_type::vec4_type, + new_lhs->clone(mem_ctx, NULL), + ir->rhs, + expr->operands[1]); + ir->set_lhs(new_lhs); + ir->write_mask = WRITEMASK_XYZW; + } +} + +/** + * Replace any assignment having the 1D gl_ClipDistance (undereferenced) as + * its LHS or RHS with a sequence of assignments, one for each component of + * the array. Each of these assignments is lowered to refer to + * gl_ClipDistanceMESA as appropriate. + * + * We need to do a similar replacement for 2D gl_ClipDistance, however since + * it's an input, the only case we need to address is where a 1D slice of it + * is the entire RHS of an assignment, e.g.: + * + * foo = gl_in[i].gl_ClipDistance + */ +ir_visitor_status +lower_clip_distance_visitor::visit_leave(ir_assignment *ir) +{ + /* First invoke the base class visitor. This causes handle_rvalue() to be + * called on ir->rhs and ir->condition. + */ + ir_rvalue_visitor::visit_leave(ir); + + if (this->is_clip_distance_vec8(ir->lhs) || + this->is_clip_distance_vec8(ir->rhs)) { + /* LHS or RHS of the assignment is the entire 1D gl_ClipDistance array + * (or a 1D slice of a 2D gl_ClipDistance input array). Since we are + * reshaping gl_ClipDistance from an array of floats to an array of + * vec4's, this isn't going to work as a bulk assignment anymore, so + * unroll it to element-by-element assignments and lower each of them. + * + * Note: to unroll into element-by-element assignments, we need to make + * clones of the LHS and RHS. This is safe because expressions and + * l-values are side-effect free. + */ + void *ctx = ralloc_parent(ir); + int array_size = ir->lhs->type->array_size(); + for (int i = 0; i < array_size; ++i) { + ir_dereference_array *new_lhs = new(ctx) ir_dereference_array( + ir->lhs->clone(ctx, NULL), new(ctx) ir_constant(i)); + ir_dereference_array *new_rhs = new(ctx) ir_dereference_array( + ir->rhs->clone(ctx, NULL), new(ctx) ir_constant(i)); + this->handle_rvalue((ir_rvalue **) &new_rhs); + + /* Handle the LHS after creating the new assignment. This must + * happen in this order because handle_rvalue may replace the old LHS + * with an ir_expression of ir_binop_vector_extract. Since this is + * not a valide l-value, this will cause an assertion in the + * ir_assignment constructor to fail. + * + * If this occurs, replace the mangled LHS with a dereference of the + * vector, and replace the RHS with an ir_triop_vector_insert. + */ + ir_assignment *const assign = new(ctx) ir_assignment(new_lhs, new_rhs); + this->handle_rvalue((ir_rvalue **) &assign->lhs); + this->fix_lhs(assign); + + this->base_ir->insert_before(assign); + } + ir->remove(); + + return visit_continue; + } + + /* Handle the LHS as if it were an r-value. Normally + * rvalue_visit(ir_assignment *) only visits the RHS, but we need to lower + * expressions in the LHS as well. + * + * This may cause the LHS to get replaced with an ir_expression of + * ir_binop_vector_extract. If this occurs, replace it with a dereference + * of the vector, and replace the RHS with an ir_triop_vector_insert. + */ + handle_rvalue((ir_rvalue **)&ir->lhs); + this->fix_lhs(ir); + + return rvalue_visit(ir); +} + + +/** + * Set up base_ir properly and call visit_leave() on a newly created + * ir_assignment node. This is used in cases where we have to insert an + * ir_assignment in a place where we know the hierarchical visitor won't see + * it. + */ +void +lower_clip_distance_visitor::visit_new_assignment(ir_assignment *ir) +{ + ir_instruction *old_base_ir = this->base_ir; + this->base_ir = ir; + ir->accept(this); + this->base_ir = old_base_ir; +} + + +/** + * If a 1D gl_ClipDistance variable appears as an argument in an ir_call + * expression, replace it with a temporary variable, and make sure the ir_call + * is preceded and/or followed by assignments that copy the contents of the + * temporary variable to and/or from gl_ClipDistance. Each of these + * assignments is then lowered to refer to gl_ClipDistanceMESA. + * + * We need to do a similar replacement for 2D gl_ClipDistance, however since + * it's an input, the only case we need to address is where a 1D slice of it + * is passed as an "in" parameter to an ir_call, e.g.: + * + * foo(gl_in[i].gl_ClipDistance) + */ +ir_visitor_status +lower_clip_distance_visitor::visit_leave(ir_call *ir) +{ + void *ctx = ralloc_parent(ir); + + const exec_node *formal_param_node = ir->callee->parameters.head; + const exec_node *actual_param_node = ir->actual_parameters.head; + while (!actual_param_node->is_tail_sentinel()) { + ir_variable *formal_param = (ir_variable *) formal_param_node; + ir_rvalue *actual_param = (ir_rvalue *) actual_param_node; + + /* Advance formal_param_node and actual_param_node now so that we can + * safely replace actual_param with another node, if necessary, below. + */ + formal_param_node = formal_param_node->next; + actual_param_node = actual_param_node->next; + + if (this->is_clip_distance_vec8(actual_param)) { + /* User is trying to pass the whole 1D gl_ClipDistance array (or a 1D + * slice of a 2D gl_ClipDistance array) to a function call. Since we + * are reshaping gl_ClipDistance from an array of floats to an array + * of vec4's, this isn't going to work anymore, so use a temporary + * array instead. + */ + ir_variable *temp_clip_distance = new(ctx) ir_variable( + actual_param->type, "temp_clip_distance", ir_var_temporary); + this->base_ir->insert_before(temp_clip_distance); + actual_param->replace_with( + new(ctx) ir_dereference_variable(temp_clip_distance)); + if (formal_param->data.mode == ir_var_function_in + || formal_param->data.mode == ir_var_function_inout) { + /* Copy from gl_ClipDistance to the temporary before the call. + * Since we are going to insert this copy before the current + * instruction, we need to visit it afterwards to make sure it + * gets lowered. + */ + ir_assignment *new_assignment = new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(temp_clip_distance), + actual_param->clone(ctx, NULL)); + this->base_ir->insert_before(new_assignment); + this->visit_new_assignment(new_assignment); + } + if (formal_param->data.mode == ir_var_function_out + || formal_param->data.mode == ir_var_function_inout) { + /* Copy from the temporary to gl_ClipDistance after the call. + * Since visit_list_elements() has already decided which + * instruction it's going to visit next, we need to visit + * afterwards to make sure it gets lowered. + */ + ir_assignment *new_assignment = new(ctx) ir_assignment( + actual_param->clone(ctx, NULL), + new(ctx) ir_dereference_variable(temp_clip_distance)); + this->base_ir->insert_after(new_assignment); + this->visit_new_assignment(new_assignment); + } + } + } + + return rvalue_visit(ir); +} + + +bool +lower_clip_distance(gl_shader *shader) +{ + lower_clip_distance_visitor v(shader->Stage); + + visit_list_elements(&v, shader->ir); + + if (v.new_clip_distance_out_var) + shader->symbols->add_variable(v.new_clip_distance_out_var); + if (v.new_clip_distance_in_var) + shader->symbols->add_variable(v.new_clip_distance_in_var); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp b/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp new file mode 100644 index 0000000..2d024d4 --- /dev/null +++ b/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp @@ -0,0 +1,109 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_const_arrays_to_uniforms.cpp + * + * Lower constant arrays to uniform arrays. + * + * Some driver backends (such as i965 and nouveau) don't handle constant arrays + * gracefully, instead treating them as ordinary writable temporary arrays. + * Since arrays can be large, this often means spilling them to scratch memory, + * which usually involves a large number of instructions. + * + * This must be called prior to link_set_uniform_initializers(); we need the + * linker to process our new uniform's constant initializer. + * + * This should be called after optimizations, since those can result in + * splitting and removing arrays that are indexed by constant expressions. + */ +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "compiler/glsl_types.h" + +namespace { +class lower_const_array_visitor : public ir_rvalue_visitor { +public: + lower_const_array_visitor(exec_list *insts) + { + instructions = insts; + progress = false; + } + + bool run() + { + visit_list_elements(this, instructions); + return progress; + } + + void handle_rvalue(ir_rvalue **rvalue); + +private: + exec_list *instructions; + bool progress; +}; + +void +lower_const_array_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference_array *dra = (*rvalue)->as_dereference_array(); + if (!dra) + return; + + ir_constant *con = dra->array->as_constant(); + if (!con || !con->type->is_array()) + return; + + void *mem_ctx = ralloc_parent(con); + + char *uniform_name = ralloc_asprintf(mem_ctx, "constarray__%p", dra); + + ir_variable *uni = + new(mem_ctx) ir_variable(con->type, uniform_name, ir_var_uniform); + uni->constant_initializer = con; + uni->constant_value = con; + uni->data.has_initializer = true; + uni->data.how_declared = ir_var_hidden; + uni->data.read_only = true; + /* Assume the whole thing is accessed. */ + uni->data.max_array_access = uni->type->length - 1; + instructions->push_head(uni); + + ir_dereference_variable *varref = new(mem_ctx) ir_dereference_variable(uni); + *rvalue = new(mem_ctx) ir_dereference_array(varref, dra->array_index); + + progress = true; +} + +} /* anonymous namespace */ + +bool +lower_const_arrays_to_uniforms(exec_list *instructions) +{ + lower_const_array_visitor v(instructions); + return v.run(); +} diff --git a/src/compiler/glsl/lower_discard.cpp b/src/compiler/glsl/lower_discard.cpp new file mode 100644 index 0000000..b62eb20 --- /dev/null +++ b/src/compiler/glsl/lower_discard.cpp @@ -0,0 +1,201 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_discard.cpp + * + * This pass moves discards out of if-statements. + * + * Case 1: The "then" branch contains a conditional discard: + * --------------------------------------------------------- + * + * if (cond1) { + * s1; + * discard cond2; + * s2; + * } else { + * s3; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * temp = cond2; + * s2; + * } else { + * s3; + * } + * discard temp; + * + * Case 2: The "else" branch contains a conditional discard: + * --------------------------------------------------------- + * + * if (cond1) { + * s1; + * } else { + * s2; + * discard cond2; + * s3; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * } else { + * s2; + * temp = cond2; + * s3; + * } + * discard temp; + * + * Case 3: Both branches contain a conditional discard: + * ---------------------------------------------------- + * + * if (cond1) { + * s1; + * discard cond2; + * s2; + * } else { + * s3; + * discard cond3; + * s4; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * temp = cond2; + * s2; + * } else { + * s3; + * temp = cond3; + * s4; + * } + * discard temp; + * + * If there are multiple conditional discards, we need only deal with one of + * them. Repeatedly applying this pass will take care of the others. + * + * Unconditional discards are treated as having a condition of "true". + */ + +#include "compiler/glsl_types.h" +#include "ir.h" + +namespace { + +class lower_discard_visitor : public ir_hierarchical_visitor { +public: + lower_discard_visitor() + { + this->progress = false; + } + + ir_visitor_status visit_leave(ir_if *); + + bool progress; +}; + +} /* anonymous namespace */ + +bool +lower_discard(exec_list *instructions) +{ + lower_discard_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} + + +static ir_discard * +find_discard(exec_list &instructions) +{ + foreach_in_list(ir_instruction, node, &instructions) { + ir_discard *ir = node->as_discard(); + if (ir != NULL) + return ir; + } + return NULL; +} + + +static void +replace_discard(void *mem_ctx, ir_variable *var, ir_discard *ir) +{ + ir_rvalue *condition = ir->condition; + + /* For unconditional discards, use "true" as the condition. */ + if (condition == NULL) + condition = new(mem_ctx) ir_constant(true); + + ir_assignment *assignment = + new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(var), + condition, NULL); + + ir->replace_with(assignment); +} + + +ir_visitor_status +lower_discard_visitor::visit_leave(ir_if *ir) +{ + ir_discard *then_discard = find_discard(ir->then_instructions); + ir_discard *else_discard = find_discard(ir->else_instructions); + + if (then_discard == NULL && else_discard == NULL) + return visit_continue; + + void *mem_ctx = ralloc_parent(ir); + + ir_variable *temp = new(mem_ctx) ir_variable(glsl_type::bool_type, + "discard_cond_temp", + ir_var_temporary); + ir_assignment *temp_initializer = + new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(temp), + new(mem_ctx) ir_constant(false), NULL); + + ir->insert_before(temp); + ir->insert_before(temp_initializer); + + if (then_discard != NULL) + replace_discard(mem_ctx, temp, then_discard); + + if (else_discard != NULL) + replace_discard(mem_ctx, temp, else_discard); + + ir_discard *discard = then_discard != NULL ? then_discard : else_discard; + discard->condition = new(mem_ctx) ir_dereference_variable(temp); + ir->insert_after(discard); + + this->progress = true; + + return visit_continue; +} diff --git a/src/compiler/glsl/lower_discard_flow.cpp b/src/compiler/glsl/lower_discard_flow.cpp new file mode 100644 index 0000000..9d0a56b --- /dev/null +++ b/src/compiler/glsl/lower_discard_flow.cpp @@ -0,0 +1,155 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** @file lower_discard_flow.cpp + * + * Implements the GLSL 1.30 revision 9 rule for fragment shader + * discard handling: + * + * "Control flow exits the shader, and subsequent implicit or + * explicit derivatives are undefined when this control flow is + * non-uniform (meaning different fragments within the primitive + * take different control paths)." + * + * There seem to be two conflicting things here. "Control flow exits + * the shader" sounds like the discarded fragments should effectively + * jump to the end of the shader, but that breaks derivatives in the + * case of uniform control flow and causes rendering failure in the + * bushes in Unigine Tropics. + * + * The question, then, is whether the intent was "loops stop at the + * point that the only active channels left are discarded pixels" or + * "discarded pixels become inactive at the point that control flow + * returns to the top of a loop". This implements the second + * interpretation. + */ + +#include "compiler/glsl_types.h" +#include "ir.h" +#include "program/hash_table.h" + +namespace { + +class lower_discard_flow_visitor : public ir_hierarchical_visitor { +public: + lower_discard_flow_visitor(ir_variable *discarded) + : discarded(discarded) + { + mem_ctx = ralloc_parent(discarded); + } + + ~lower_discard_flow_visitor() + { + } + + ir_visitor_status visit_enter(ir_discard *ir); + ir_visitor_status visit_enter(ir_loop_jump *ir); + ir_visitor_status visit_enter(ir_loop *ir); + ir_visitor_status visit_enter(ir_function_signature *ir); + + ir_if *generate_discard_break(); + + ir_variable *discarded; + void *mem_ctx; +}; + +} /* anonymous namespace */ + +ir_visitor_status +lower_discard_flow_visitor::visit_enter(ir_loop_jump *ir) +{ + if (ir->mode != ir_loop_jump::jump_continue) + return visit_continue; + + ir->insert_before(generate_discard_break()); + + return visit_continue; +} + +ir_visitor_status +lower_discard_flow_visitor::visit_enter(ir_discard *ir) +{ + ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(discarded); + ir_rvalue *rhs; + if (ir->condition) { + /* discarded <- condition, use (var_ref discarded) as the condition */ + rhs = ir->condition; + ir->condition = new(mem_ctx) ir_dereference_variable(discarded); + } else { + rhs = new(mem_ctx) ir_constant(true); + } + ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, rhs); + ir->insert_before(assign); + + return visit_continue; +} + +ir_visitor_status +lower_discard_flow_visitor::visit_enter(ir_loop *ir) +{ + ir->body_instructions.push_tail(generate_discard_break()); + + return visit_continue; +} + +ir_visitor_status +lower_discard_flow_visitor::visit_enter(ir_function_signature *ir) +{ + if (strcmp(ir->function_name(), "main") != 0) + return visit_continue; + + ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(discarded); + ir_rvalue *rhs = new(mem_ctx) ir_constant(false); + ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, rhs); + ir->body.push_head(assign); + + return visit_continue; +} + +ir_if * +lower_discard_flow_visitor::generate_discard_break() +{ + ir_rvalue *if_condition = new(mem_ctx) ir_dereference_variable(discarded); + ir_if *if_inst = new(mem_ctx) ir_if(if_condition); + + ir_instruction *br = new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); + if_inst->then_instructions.push_tail(br); + + return if_inst; +} + +void +lower_discard_flow(exec_list *ir) +{ + void *mem_ctx = ir; + + ir_variable *var = new(mem_ctx) ir_variable(glsl_type::bool_type, + "discarded", + ir_var_temporary); + + ir->push_head(var); + + lower_discard_flow_visitor v(var); + + visit_list_elements(&v, ir); +} diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp new file mode 100644 index 0000000..6a70347 --- /dev/null +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp @@ -0,0 +1,252 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_if_to_cond_assign.cpp + * + * This attempts to flatten if-statements to conditional assignments for + * GPUs with limited or no flow control support. + * + * It can't handle other control flow being inside of its block, such + * as calls or loops. Hopefully loop unrolling and inlining will take + * care of those. + * + * Drivers for GPUs with no control flow support should simply call + * + * lower_if_to_cond_assign(instructions) + * + * to attempt to flatten all if-statements. + * + * Some GPUs (such as i965 prior to gen6) do support control flow, but have a + * maximum nesting depth N. Drivers for such hardware can call + * + * lower_if_to_cond_assign(instructions, N) + * + * to attempt to flatten any if-statements appearing at depth > N. + */ + +#include "compiler/glsl_types.h" +#include "ir.h" +#include "program/hash_table.h" + +namespace { + +class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { +public: + ir_if_to_cond_assign_visitor(unsigned max_depth) + { + this->progress = false; + this->max_depth = max_depth; + this->depth = 0; + + this->condition_variables = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~ir_if_to_cond_assign_visitor() + { + hash_table_dtor(this->condition_variables); + } + + ir_visitor_status visit_enter(ir_if *); + ir_visitor_status visit_leave(ir_if *); + + bool progress; + unsigned max_depth; + unsigned depth; + + struct hash_table *condition_variables; +}; + +} /* anonymous namespace */ + +bool +lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth) +{ + if (max_depth == UINT_MAX) + return false; + + ir_if_to_cond_assign_visitor v(max_depth); + + visit_list_elements(&v, instructions); + + return v.progress; +} + +void +check_control_flow(ir_instruction *ir, void *data) +{ + bool *found_control_flow = (bool *)data; + switch (ir->ir_type) { + case ir_type_call: + case ir_type_discard: + case ir_type_loop: + case ir_type_loop_jump: + case ir_type_return: + *found_control_flow = true; + break; + default: + break; + } +} + +void +move_block_to_cond_assign(void *mem_ctx, + ir_if *if_ir, ir_rvalue *cond_expr, + exec_list *instructions, + struct hash_table *ht) +{ + foreach_in_list_safe(ir_instruction, ir, instructions) { + if (ir->ir_type == ir_type_assignment) { + ir_assignment *assign = (ir_assignment *)ir; + + if (hash_table_find(ht, assign) == NULL) { + hash_table_insert(ht, assign, assign); + + /* If the LHS of the assignment is a condition variable that was + * previously added, insert an additional assignment of false to + * the variable. + */ + const bool assign_to_cv = + hash_table_find(ht, assign->lhs->variable_referenced()) != NULL; + + if (!assign->condition) { + if (assign_to_cv) { + assign->rhs = + new(mem_ctx) ir_expression(ir_binop_logic_and, + glsl_type::bool_type, + cond_expr->clone(mem_ctx, NULL), + assign->rhs); + } else { + assign->condition = cond_expr->clone(mem_ctx, NULL); + } + } else { + assign->condition = + new(mem_ctx) ir_expression(ir_binop_logic_and, + glsl_type::bool_type, + cond_expr->clone(mem_ctx, NULL), + assign->condition); + } + } + } + + /* Now, move from the if block to the block surrounding it. */ + ir->remove(); + if_ir->insert_before(ir); + } +} + +ir_visitor_status +ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir) +{ + (void) ir; + this->depth++; + + return visit_continue; +} + +ir_visitor_status +ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) +{ + /* Only flatten when beyond the GPU's maximum supported nesting depth. */ + if (this->depth-- <= this->max_depth) + return visit_continue; + + bool found_control_flow = false; + ir_assignment *assign; + + /* Check that both blocks don't contain anything we can't support. */ + foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) { + visit_tree(then_ir, check_control_flow, &found_control_flow); + } + foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) { + visit_tree(else_ir, check_control_flow, &found_control_flow); + } + if (found_control_flow) + return visit_continue; + + void *mem_ctx = ralloc_parent(ir); + + /* Store the condition to a variable. Move all of the instructions from + * the then-clause of the if-statement. Use the condition variable as a + * condition for all assignments. + */ + ir_variable *const then_var = + new(mem_ctx) ir_variable(glsl_type::bool_type, + "if_to_cond_assign_then", + ir_var_temporary); + ir->insert_before(then_var); + + ir_dereference_variable *then_cond = + new(mem_ctx) ir_dereference_variable(then_var); + + assign = new(mem_ctx) ir_assignment(then_cond, ir->condition); + ir->insert_before(assign); + + move_block_to_cond_assign(mem_ctx, ir, then_cond, + &ir->then_instructions, + this->condition_variables); + + /* Add the new condition variable to the hash table. This allows us to + * find this variable when lowering other (enclosing) if-statements. + */ + hash_table_insert(this->condition_variables, then_var, then_var); + + /* If there are instructions in the else-clause, store the inverse of the + * condition to a variable. Move all of the instructions from the + * else-clause if the if-statement. Use the (inverse) condition variable + * as a condition for all assignments. + */ + if (!ir->else_instructions.is_empty()) { + ir_variable *const else_var = + new(mem_ctx) ir_variable(glsl_type::bool_type, + "if_to_cond_assign_else", + ir_var_temporary); + ir->insert_before(else_var); + + ir_dereference_variable *else_cond = + new(mem_ctx) ir_dereference_variable(else_var); + + ir_rvalue *inverse = + new(mem_ctx) ir_expression(ir_unop_logic_not, + then_cond->clone(mem_ctx, NULL)); + + assign = new(mem_ctx) ir_assignment(else_cond, inverse); + ir->insert_before(assign); + + move_block_to_cond_assign(mem_ctx, ir, else_cond, + &ir->else_instructions, + this->condition_variables); + + /* Add the new condition variable to the hash table. This allows us to + * find this variable when lowering other (enclosing) if-statements. + */ + hash_table_insert(this->condition_variables, else_var, else_var); + } + + ir->remove(); + + this->progress = true; + + return visit_continue; +} diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp new file mode 100644 index 0000000..1875149 --- /dev/null +++ b/src/compiler/glsl/lower_instructions.cpp @@ -0,0 +1,1061 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_instructions.cpp + * + * Many GPUs lack native instructions for certain expression operations, and + * must replace them with some other expression tree. This pass lowers some + * of the most common cases, allowing the lowering code to be implemented once + * rather than in each driver backend. + * + * Currently supported transformations: + * - SUB_TO_ADD_NEG + * - DIV_TO_MUL_RCP + * - INT_DIV_TO_MUL_RCP + * - EXP_TO_EXP2 + * - POW_TO_EXP2 + * - LOG_TO_LOG2 + * - MOD_TO_FLOOR + * - LDEXP_TO_ARITH + * - DFREXP_TO_ARITH + * - CARRY_TO_ARITH + * - BORROW_TO_ARITH + * - SAT_TO_CLAMP + * - DOPS_TO_DFRAC + * + * SUB_TO_ADD_NEG: + * --------------- + * Breaks an ir_binop_sub expression down to add(op0, neg(op1)) + * + * This simplifies expression reassociation, and for many backends + * there is no subtract operation separate from adding the negation. + * For backends with native subtract operations, they will probably + * want to recognize add(op0, neg(op1)) or the other way around to + * produce a subtract anyway. + * + * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP: + * -------------------------------------- + * Breaks an ir_binop_div expression down to op0 * (rcp(op1)). + * + * Many GPUs don't have a divide instruction (945 and 965 included), + * but they do have an RCP instruction to compute an approximate + * reciprocal. By breaking the operation down, constant reciprocals + * can get constant folded. + * + * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP + * handles the integer case, converting to and from floating point so that + * RCP is possible. + * + * EXP_TO_EXP2 and LOG_TO_LOG2: + * ---------------------------- + * Many GPUs don't have a base e log or exponent instruction, but they + * do have base 2 versions, so this pass converts exp and log to exp2 + * and log2 operations. + * + * POW_TO_EXP2: + * ----------- + * Many older GPUs don't have an x**y instruction. For these GPUs, convert + * x**y to 2**(y * log2(x)). + * + * MOD_TO_FLOOR: + * ------------- + * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1)) + * + * Many GPUs don't have a MOD instruction (945 and 965 included), and + * if we have to break it down like this anyway, it gives an + * opportunity to do things like constant fold the (1.0 / op1) easily. + * + * Note: before we used to implement this as op1 * fract(op / op1) but this + * implementation had significant precision errors. + * + * LDEXP_TO_ARITH: + * ------------- + * Converts ir_binop_ldexp to arithmetic and bit operations for float sources. + * + * DFREXP_DLDEXP_TO_ARITH: + * --------------- + * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to + * arithmetic and bit ops for double arguments. + * + * CARRY_TO_ARITH: + * --------------- + * Converts ir_carry into (x + y) < x. + * + * BORROW_TO_ARITH: + * ---------------- + * Converts ir_borrow into (x < y). + * + * SAT_TO_CLAMP: + * ------------- + * Converts ir_unop_saturate into min(max(x, 0.0), 1.0) + * + * DOPS_TO_DFRAC: + * -------------- + * Converts double trunc, ceil, floor, round to fract + */ + +#include "c99_math.h" +#include "program/prog_instruction.h" /* for swizzle */ +#include "compiler/glsl_types.h" +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" + +using namespace ir_builder; + +namespace { + +class lower_instructions_visitor : public ir_hierarchical_visitor { +public: + lower_instructions_visitor(unsigned lower) + : progress(false), lower(lower) { } + + ir_visitor_status visit_leave(ir_expression *); + + bool progress; + +private: + unsigned lower; /** Bitfield of which operations to lower */ + + void sub_to_add_neg(ir_expression *); + void div_to_mul_rcp(ir_expression *); + void int_div_to_mul_rcp(ir_expression *); + void mod_to_floor(ir_expression *); + void exp_to_exp2(ir_expression *); + void pow_to_exp2(ir_expression *); + void log_to_log2(ir_expression *); + void ldexp_to_arith(ir_expression *); + void dldexp_to_arith(ir_expression *); + void dfrexp_sig_to_arith(ir_expression *); + void dfrexp_exp_to_arith(ir_expression *); + void carry_to_arith(ir_expression *); + void borrow_to_arith(ir_expression *); + void sat_to_clamp(ir_expression *); + void double_dot_to_fma(ir_expression *); + void double_lrp(ir_expression *); + void dceil_to_dfrac(ir_expression *); + void dfloor_to_dfrac(ir_expression *); + void dround_even_to_dfrac(ir_expression *); + void dtrunc_to_dfrac(ir_expression *); + void dsign_to_csel(ir_expression *); +}; + +} /* anonymous namespace */ + +/** + * Determine if a particular type of lowering should occur + */ +#define lowering(x) (this->lower & x) + +bool +lower_instructions(exec_list *instructions, unsigned what_to_lower) +{ + lower_instructions_visitor v(what_to_lower); + + visit_list_elements(&v, instructions); + return v.progress; +} + +void +lower_instructions_visitor::sub_to_add_neg(ir_expression *ir) +{ + ir->operation = ir_binop_add; + ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type, + ir->operands[1], NULL); + this->progress = true; +} + +void +lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir) +{ + assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double()); + + /* New expression for the 1.0 / op1 */ + ir_rvalue *expr; + expr = new(ir) ir_expression(ir_unop_rcp, + ir->operands[1]->type, + ir->operands[1]); + + /* op0 / op1 -> op0 * (1.0 / op1) */ + ir->operation = ir_binop_mul; + ir->operands[1] = expr; + + this->progress = true; +} + +void +lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir) +{ + assert(ir->operands[1]->type->is_integer()); + + /* Be careful with integer division -- we need to do it as a + * float and re-truncate, since rcp(n > 1) of an integer would + * just be 0. + */ + ir_rvalue *op0, *op1; + const struct glsl_type *vec_type; + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->operands[1]->type->vector_elements, + ir->operands[1]->type->matrix_columns); + + if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) + op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL); + else + op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL); + + op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL); + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->operands[0]->type->vector_elements, + ir->operands[0]->type->matrix_columns); + + if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) + op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL); + else + op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL); + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->type->vector_elements, + ir->type->matrix_columns); + + op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); + + if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) { + ir->operation = ir_unop_f2i; + ir->operands[0] = op0; + } else { + ir->operation = ir_unop_i2u; + ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0); + } + ir->operands[1] = NULL; + + this->progress = true; +} + +void +lower_instructions_visitor::exp_to_exp2(ir_expression *ir) +{ + ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E)); + + ir->operation = ir_unop_exp2; + ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type, + ir->operands[0], log2_e); + this->progress = true; +} + +void +lower_instructions_visitor::pow_to_exp2(ir_expression *ir) +{ + ir_expression *const log2_x = + new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, + ir->operands[0]); + + ir->operation = ir_unop_exp2; + ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type, + ir->operands[1], log2_x); + ir->operands[1] = NULL; + this->progress = true; +} + +void +lower_instructions_visitor::log_to_log2(ir_expression *ir) +{ + ir->operation = ir_binop_mul; + ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, + ir->operands[0], NULL); + ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E)); + this->progress = true; +} + +void +lower_instructions_visitor::mod_to_floor(ir_expression *ir) +{ + ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x", + ir_var_temporary); + ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y", + ir_var_temporary); + this->base_ir->insert_before(x); + this->base_ir->insert_before(y); + + ir_assignment *const assign_x = + new(ir) ir_assignment(new(ir) ir_dereference_variable(x), + ir->operands[0], NULL); + ir_assignment *const assign_y = + new(ir) ir_assignment(new(ir) ir_dereference_variable(y), + ir->operands[1], NULL); + + this->base_ir->insert_before(assign_x); + this->base_ir->insert_before(assign_y); + + ir_expression *const div_expr = + new(ir) ir_expression(ir_binop_div, x->type, + new(ir) ir_dereference_variable(x), + new(ir) ir_dereference_variable(y)); + + /* Don't generate new IR that would need to be lowered in an additional + * pass. + */ + if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double())) + div_to_mul_rcp(div_expr); + + ir_expression *const floor_expr = + new(ir) ir_expression(ir_unop_floor, x->type, div_expr); + + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dfloor_to_dfrac(floor_expr); + + ir_expression *const mul_expr = + new(ir) ir_expression(ir_binop_mul, + new(ir) ir_dereference_variable(y), + floor_expr); + + ir->operation = ir_binop_sub; + ir->operands[0] = new(ir) ir_dereference_variable(x); + ir->operands[1] = mul_expr; + this->progress = true; +} + +void +lower_instructions_visitor::ldexp_to_arith(ir_expression *ir) +{ + /* Translates + * ir_binop_ldexp x exp + * into + * + * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); + * resulting_biased_exp = extracted_biased_exp + exp; + * + * if (resulting_biased_exp < 1) { + * return copysign(0.0, x); + * } + * + * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | + * lshift(i2u(resulting_biased_exp), exp_shift)); + * + * which we can't actually implement as such, since the GLSL IR doesn't + * have vectorized if-statements. We actually implement it without branches + * using conditional-select: + * + * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); + * resulting_biased_exp = extracted_biased_exp + exp; + * + * is_not_zero_or_underflow = gequal(resulting_biased_exp, 1); + * x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x)); + * resulting_biased_exp = csel(is_not_zero_or_underflow, + * resulting_biased_exp, 0); + * + * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | + * lshift(i2u(resulting_biased_exp), exp_shift)); + */ + + const unsigned vec_elem = ir->type->vector_elements; + + /* Types */ + const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + + /* Constants */ + ir_constant *zeroi = ir_constant::zero(ir, ivec); + + ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem); + + ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem); + ir_constant *exp_width = new(ir) ir_constant(8, vec_elem); + + /* Temporary variables */ + ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); + ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); + + ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", + ir_var_temporary); + + ir_variable *extracted_biased_exp = + new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); + ir_variable *resulting_biased_exp = + new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); + + ir_variable *is_not_zero_or_underflow = + new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); + + ir_instruction &i = *base_ir; + + /* Copy <x> and <exp> arguments. */ + i.insert_before(x); + i.insert_before(assign(x, ir->operands[0])); + i.insert_before(exp); + i.insert_before(assign(exp, ir->operands[1])); + + /* Extract the biased exponent from <x>. */ + i.insert_before(extracted_biased_exp); + i.insert_before(assign(extracted_biased_exp, + rshift(bitcast_f2i(abs(x)), exp_shift))); + + i.insert_before(resulting_biased_exp); + i.insert_before(assign(resulting_biased_exp, + add(extracted_biased_exp, exp))); + + /* Test if result is ±0.0, subnormal, or underflow by checking if the + * resulting biased exponent would be less than 0x1. If so, the result is + * 0.0 with the sign of x. (Actually, invert the conditions so that + * immediate values are the second arguments, which is better for i965) + */ + i.insert_before(zero_sign_x); + i.insert_before(assign(zero_sign_x, + bitcast_u2f(bit_and(bitcast_f2u(x), sign_mask)))); + + i.insert_before(is_not_zero_or_underflow); + i.insert_before(assign(is_not_zero_or_underflow, + gequal(resulting_biased_exp, + new(ir) ir_constant(0x1, vec_elem)))); + i.insert_before(assign(x, csel(is_not_zero_or_underflow, + x, zero_sign_x))); + i.insert_before(assign(resulting_biased_exp, + csel(is_not_zero_or_underflow, + resulting_biased_exp, zeroi))); + + /* We could test for overflows by checking if the resulting biased exponent + * would be greater than 0xFE. Turns out we don't need to because the GLSL + * spec says: + * + * "If this product is too large to be represented in the + * floating-point type, the result is undefined." + */ + + ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL); + ir->operation = ir_unop_bitcast_i2f; + ir->operands[0] = bitfield_insert(bitcast_f2i(x), resulting_biased_exp, + exp_shift_clone, exp_width); + ir->operands[1] = NULL; + + this->progress = true; +} + +void +lower_instructions_visitor::dldexp_to_arith(ir_expression *ir) +{ + /* See ldexp_to_arith for structure. Uses frexp_exp to extract the exponent + * from the significand. + */ + + const unsigned vec_elem = ir->type->vector_elements; + + /* Types */ + const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + + /* Constants */ + ir_constant *zeroi = ir_constant::zero(ir, ivec); + + ir_constant *sign_mask = new(ir) ir_constant(0x80000000u); + + ir_constant *exp_shift = new(ir) ir_constant(20u); + ir_constant *exp_width = new(ir) ir_constant(11u); + ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem); + + /* Temporary variables */ + ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); + ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); + + ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", + ir_var_temporary); + + ir_variable *extracted_biased_exp = + new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); + ir_variable *resulting_biased_exp = + new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); + + ir_variable *is_not_zero_or_underflow = + new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); + + ir_instruction &i = *base_ir; + + /* Copy <x> and <exp> arguments. */ + i.insert_before(x); + i.insert_before(assign(x, ir->operands[0])); + i.insert_before(exp); + i.insert_before(assign(exp, ir->operands[1])); + + ir_expression *frexp_exp = expr(ir_unop_frexp_exp, x); + if (lowering(DFREXP_DLDEXP_TO_ARITH)) + dfrexp_exp_to_arith(frexp_exp); + + /* Extract the biased exponent from <x>. */ + i.insert_before(extracted_biased_exp); + i.insert_before(assign(extracted_biased_exp, add(frexp_exp, exp_bias))); + + i.insert_before(resulting_biased_exp); + i.insert_before(assign(resulting_biased_exp, + add(extracted_biased_exp, exp))); + + /* Test if result is ±0.0, subnormal, or underflow by checking if the + * resulting biased exponent would be less than 0x1. If so, the result is + * 0.0 with the sign of x. (Actually, invert the conditions so that + * immediate values are the second arguments, which is better for i965) + * TODO: Implement in a vector fashion. + */ + i.insert_before(zero_sign_x); + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_variable *unpacked = + new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); + i.insert_before(unpacked); + i.insert_before( + assign(unpacked, + expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); + i.insert_before(assign(unpacked, bit_and(swizzle_y(unpacked), sign_mask->clone(ir, NULL)), + WRITEMASK_Y)); + i.insert_before(assign(unpacked, ir_constant::zero(ir, glsl_type::uint_type), WRITEMASK_X)); + i.insert_before(assign(zero_sign_x, + expr(ir_unop_pack_double_2x32, unpacked), + 1 << elem)); + } + i.insert_before(is_not_zero_or_underflow); + i.insert_before(assign(is_not_zero_or_underflow, + gequal(resulting_biased_exp, + new(ir) ir_constant(0x1, vec_elem)))); + i.insert_before(assign(x, csel(is_not_zero_or_underflow, + x, zero_sign_x))); + i.insert_before(assign(resulting_biased_exp, + csel(is_not_zero_or_underflow, + resulting_biased_exp, zeroi))); + + /* We could test for overflows by checking if the resulting biased exponent + * would be greater than 0xFE. Turns out we don't need to because the GLSL + * spec says: + * + * "If this product is too large to be represented in the + * floating-point type, the result is undefined." + */ + + ir_rvalue *results[4] = {NULL}; + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_variable *unpacked = + new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); + i.insert_before(unpacked); + i.insert_before( + assign(unpacked, + expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); + + ir_expression *bfi = bitfield_insert( + swizzle_y(unpacked), + i2u(swizzle(resulting_biased_exp, elem, 1)), + exp_shift->clone(ir, NULL), + exp_width->clone(ir, NULL)); + + i.insert_before(assign(unpacked, bfi, WRITEMASK_Y)); + + results[elem] = expr(ir_unop_pack_double_2x32, unpacked); + } + + ir->operation = ir_quadop_vector; + ir->operands[0] = results[0]; + ir->operands[1] = results[1]; + ir->operands[2] = results[2]; + ir->operands[3] = results[3]; + + /* Don't generate new IR that would need to be lowered in an additional + * pass. + */ + + this->progress = true; +} + +void +lower_instructions_visitor::dfrexp_sig_to_arith(ir_expression *ir) +{ + const unsigned vec_elem = ir->type->vector_elements; + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + + /* Double-precision floating-point values are stored as + * 1 sign bit; + * 11 exponent bits; + * 52 mantissa bits. + * + * We're just extracting the significand here, so we only need to modify + * the upper 32-bit uint. Unfortunately we must extract each double + * independently as there is no vector version of unpackDouble. + */ + + ir_instruction &i = *base_ir; + + ir_variable *is_not_zero = + new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); + ir_rvalue *results[4] = {NULL}; + + ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); + i.insert_before(is_not_zero); + i.insert_before( + assign(is_not_zero, + nequal(abs(ir->operands[0]->clone(ir, NULL)), dzero))); + + /* TODO: Remake this as more vector-friendly when int64 support is + * available. + */ + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_constant *zero = new(ir) ir_constant(0u, 1); + ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x800fffffu, 1); + + /* Exponent of double floating-point values in the range [0.5, 1.0). */ + ir_constant *exponent_value = new(ir) ir_constant(0x3fe00000u, 1); + + ir_variable *bits = + new(ir) ir_variable(glsl_type::uint_type, "bits", ir_var_temporary); + ir_variable *unpacked = + new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); + + ir_rvalue *x = swizzle(ir->operands[0]->clone(ir, NULL), elem, 1); + + i.insert_before(bits); + i.insert_before(unpacked); + i.insert_before(assign(unpacked, expr(ir_unop_unpack_double_2x32, x))); + + /* Manipulate the high uint to remove the exponent and replace it with + * either the default exponent or zero. + */ + i.insert_before(assign(bits, swizzle_y(unpacked))); + i.insert_before(assign(bits, bit_and(bits, sign_mantissa_mask))); + i.insert_before(assign(bits, bit_or(bits, + csel(swizzle(is_not_zero, elem, 1), + exponent_value, + zero)))); + i.insert_before(assign(unpacked, bits, WRITEMASK_Y)); + results[elem] = expr(ir_unop_pack_double_2x32, unpacked); + } + + /* Put the dvec back together */ + ir->operation = ir_quadop_vector; + ir->operands[0] = results[0]; + ir->operands[1] = results[1]; + ir->operands[2] = results[2]; + ir->operands[3] = results[3]; + + this->progress = true; +} + +void +lower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir) +{ + const unsigned vec_elem = ir->type->vector_elements; + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); + + /* Double-precision floating-point values are stored as + * 1 sign bit; + * 11 exponent bits; + * 52 mantissa bits. + * + * We're just extracting the exponent here, so we only care about the upper + * 32-bit uint. + */ + + ir_instruction &i = *base_ir; + + ir_variable *is_not_zero = + new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); + ir_variable *high_words = + new(ir) ir_variable(uvec, "high_words", ir_var_temporary); + ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); + ir_constant *izero = new(ir) ir_constant(0, vec_elem); + + ir_rvalue *absval = abs(ir->operands[0]); + + i.insert_before(is_not_zero); + i.insert_before(high_words); + i.insert_before(assign(is_not_zero, nequal(absval->clone(ir, NULL), dzero))); + + /* Extract all of the upper uints. */ + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_rvalue *x = swizzle(absval->clone(ir, NULL), elem, 1); + + i.insert_before(assign(high_words, + swizzle_y(expr(ir_unop_unpack_double_2x32, x)), + 1 << elem)); + + } + ir_constant *exponent_shift = new(ir) ir_constant(20, vec_elem); + ir_constant *exponent_bias = new(ir) ir_constant(-1022, vec_elem); + + /* For non-zero inputs, shift the exponent down and apply bias. */ + ir->operation = ir_triop_csel; + ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero); + ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift))); + ir->operands[2] = izero; + + this->progress = true; +} + +void +lower_instructions_visitor::carry_to_arith(ir_expression *ir) +{ + /* Translates + * ir_binop_carry x y + * into + * sum = ir_binop_add x y + * bcarry = ir_binop_less sum x + * carry = ir_unop_b2i bcarry + */ + + ir_rvalue *x_clone = ir->operands[0]->clone(ir, NULL); + ir->operation = ir_unop_i2u; + ir->operands[0] = b2i(less(add(ir->operands[0], ir->operands[1]), x_clone)); + ir->operands[1] = NULL; + + this->progress = true; +} + +void +lower_instructions_visitor::borrow_to_arith(ir_expression *ir) +{ + /* Translates + * ir_binop_borrow x y + * into + * bcarry = ir_binop_less x y + * carry = ir_unop_b2i bcarry + */ + + ir->operation = ir_unop_i2u; + ir->operands[0] = b2i(less(ir->operands[0], ir->operands[1])); + ir->operands[1] = NULL; + + this->progress = true; +} + +void +lower_instructions_visitor::sat_to_clamp(ir_expression *ir) +{ + /* Translates + * ir_unop_saturate x + * into + * ir_binop_min (ir_binop_max(x, 0.0), 1.0) + */ + + ir->operation = ir_binop_min; + ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type, + ir->operands[0], + new(ir) ir_constant(0.0f)); + ir->operands[1] = new(ir) ir_constant(1.0f); + + this->progress = true; +} + +void +lower_instructions_visitor::double_dot_to_fma(ir_expression *ir) +{ + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type->get_base_type(), "dot_res", + ir_var_temporary); + this->base_ir->insert_before(temp); + + int nc = ir->operands[0]->type->components(); + for (int i = nc - 1; i >= 1; i--) { + ir_assignment *assig; + if (i == (nc - 1)) { + assig = assign(temp, mul(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), + swizzle(ir->operands[1]->clone(ir, NULL), i, 1))); + } else { + assig = assign(temp, fma(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), + swizzle(ir->operands[1]->clone(ir, NULL), i, 1), + temp)); + } + this->base_ir->insert_before(assig); + } + + ir->operation = ir_triop_fma; + ir->operands[0] = swizzle(ir->operands[0], 0, 1); + ir->operands[1] = swizzle(ir->operands[1], 0, 1); + ir->operands[2] = new(ir) ir_dereference_variable(temp); + + this->progress = true; + +} + +void +lower_instructions_visitor::double_lrp(ir_expression *ir) +{ + int swizval; + ir_rvalue *op0 = ir->operands[0], *op2 = ir->operands[2]; + ir_constant *one = new(ir) ir_constant(1.0, op2->type->vector_elements); + + switch (op2->type->vector_elements) { + case 1: + swizval = SWIZZLE_XXXX; + break; + default: + assert(op0->type->vector_elements == op2->type->vector_elements); + swizval = SWIZZLE_XYZW; + break; + } + + ir->operation = ir_triop_fma; + ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements); + ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0); + + this->progress = true; +} + +void +lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir) +{ + /* + * frtemp = frac(x); + * temp = sub(x, frtemp); + * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0); + */ + ir_instruction &i = *base_ir; + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", + ir_var_temporary); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(ir->operands[0]))); + + ir->operation = ir_binop_add; + ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp); + ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL)); + + this->progress = true; +} + +void +lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir) +{ + /* + * frtemp = frac(x); + * result = sub(x, frtemp); + */ + ir->operation = ir_binop_sub; + ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL)); + + this->progress = true; +} +void +lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir) +{ + /* + * insane but works + * temp = x + 0.5; + * frtemp = frac(temp); + * t2 = sub(temp, frtemp); + * if (frac(x) == 0.5) + * result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1; + * else + * result = t2; + + */ + ir_instruction &i = *base_ir; + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", + ir_var_temporary); + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", + ir_var_temporary); + ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2", + ir_var_temporary); + ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); + + i.insert_before(temp); + i.insert_before(assign(temp, add(ir->operands[0], p5))); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(temp))); + + i.insert_before(t2); + i.insert_before(assign(t2, sub(temp, frtemp))); + + ir->operation = ir_triop_csel; + ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)), + p5->clone(ir, NULL)); + ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))), + zero), + t2, + sub(t2, one)); + ir->operands[2] = new(ir) ir_dereference_variable(t2); + + this->progress = true; +} + +void +lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir) +{ + /* + * frtemp = frac(x); + * temp = sub(x, frtemp); + * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1; + */ + ir_rvalue *arg = ir->operands[0]; + ir_instruction &i = *base_ir; + + ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); + ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp", + ir_var_temporary); + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", + ir_var_temporary); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(arg))); + i.insert_before(temp); + i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp))); + + ir->operation = ir_triop_csel; + ir->operands[0] = gequal(arg->clone(ir, NULL), zero); + ir->operands[1] = new (ir) ir_dereference_variable(temp); + ir->operands[2] = add(temp, + csel(equal(frtemp, zero->clone(ir, NULL)), + zero->clone(ir, NULL), + one)); + + this->progress = true; +} + +void +lower_instructions_visitor::dsign_to_csel(ir_expression *ir) +{ + /* + * temp = x > 0.0 ? 1.0 : 0.0; + * result = x < 0.0 ? -1.0 : temp; + */ + ir_rvalue *arg = ir->operands[0]; + ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); + ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements); + + ir->operation = ir_triop_csel; + ir->operands[0] = less(arg->clone(ir, NULL), + zero->clone(ir, NULL)); + ir->operands[1] = neg_one; + ir->operands[2] = csel(greater(arg, zero), + one, + zero->clone(ir, NULL)); + + this->progress = true; +} + +ir_visitor_status +lower_instructions_visitor::visit_leave(ir_expression *ir) +{ + switch (ir->operation) { + case ir_binop_dot: + if (ir->operands[0]->type->is_double()) + double_dot_to_fma(ir); + break; + case ir_triop_lrp: + if (ir->operands[0]->type->is_double()) + double_lrp(ir); + break; + case ir_binop_sub: + if (lowering(SUB_TO_ADD_NEG)) + sub_to_add_neg(ir); + break; + + case ir_binop_div: + if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP)) + int_div_to_mul_rcp(ir); + else if ((ir->operands[1]->type->is_float() || + ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP)) + div_to_mul_rcp(ir); + break; + + case ir_unop_exp: + if (lowering(EXP_TO_EXP2)) + exp_to_exp2(ir); + break; + + case ir_unop_log: + if (lowering(LOG_TO_LOG2)) + log_to_log2(ir); + break; + + case ir_binop_mod: + if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double())) + mod_to_floor(ir); + break; + + case ir_binop_pow: + if (lowering(POW_TO_EXP2)) + pow_to_exp2(ir); + break; + + case ir_binop_ldexp: + if (lowering(LDEXP_TO_ARITH) && ir->type->is_float()) + ldexp_to_arith(ir); + if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->type->is_double()) + dldexp_to_arith(ir); + break; + + case ir_unop_frexp_exp: + if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) + dfrexp_exp_to_arith(ir); + break; + + case ir_unop_frexp_sig: + if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) + dfrexp_sig_to_arith(ir); + break; + + case ir_binop_carry: + if (lowering(CARRY_TO_ARITH)) + carry_to_arith(ir); + break; + + case ir_binop_borrow: + if (lowering(BORROW_TO_ARITH)) + borrow_to_arith(ir); + break; + + case ir_unop_saturate: + if (lowering(SAT_TO_CLAMP)) + sat_to_clamp(ir); + break; + + case ir_unop_trunc: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dtrunc_to_dfrac(ir); + break; + + case ir_unop_ceil: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dceil_to_dfrac(ir); + break; + + case ir_unop_floor: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dfloor_to_dfrac(ir); + break; + + case ir_unop_round_even: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dround_even_to_dfrac(ir); + break; + + case ir_unop_sign: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dsign_to_csel(ir); + break; + default: + return visit_continue; + } + + return visit_continue; +} diff --git a/src/compiler/glsl/lower_jumps.cpp b/src/compiler/glsl/lower_jumps.cpp new file mode 100644 index 0000000..3cfa2e0 --- /dev/null +++ b/src/compiler/glsl/lower_jumps.cpp @@ -0,0 +1,1022 @@ +/* + * Copyright © 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_jumps.cpp + * + * This pass lowers jumps (break, continue, and return) to if/else structures. + * + * It can be asked to: + * 1. Pull jumps out of ifs where possible + * 2. Remove all "continue"s, replacing them with an "execute flag" + * 3. Replace all "break" with a single conditional one at the end of the loop + * 4. Replace all "return"s with a single return at the end of the function, + * for the main function and/or other functions + * + * Applying this pass gives several benefits: + * 1. All functions can be inlined. + * 2. nv40 and other pre-DX10 chips without "continue" can be supported + * 3. nv30 and other pre-DX10 chips with no control flow at all are better + * supported + * + * Continues are lowered by adding a per-loop "execute flag", initialized to + * true, that when cleared inhibits all execution until the end of the loop. + * + * Breaks are lowered to continues, plus setting a "break flag" that is checked + * at the end of the loop, and trigger the unique "break". + * + * Returns are lowered to breaks/continues, plus adding a "return flag" that + * causes loops to break again out of their enclosing loops until all the + * loops are exited: then the "execute flag" logic will ignore everything + * until the end of the function. + * + * Note that "continue" and "return" can also be implemented by adding + * a dummy loop and using break. + * However, this is bad for hardware with limited nesting depth, and + * prevents further optimization, and thus is not currently performed. + */ + +#include "compiler/glsl_types.h" +#include <string.h> +#include "ir.h" + +/** + * Enum recording the result of analyzing how control flow might exit + * an IR node. + * + * Each possible value of jump_strength indicates a strictly stronger + * guarantee on control flow than the previous value. + * + * The ordering of strengths roughly reflects the way jumps are + * lowered: jumps with higher strength tend to be lowered to jumps of + * lower strength. Accordingly, strength is used as a heuristic to + * determine which lowering to perform first. + * + * This enum is also used by get_jump_strength() to categorize + * instructions as either break, continue, return, or other. When + * used in this fashion, strength_always_clears_execute_flag is not + * used. + * + * The control flow analysis made by this optimization pass makes two + * simplifying assumptions: + * + * - It ignores discard instructions, since they are lowered by a + * separate pass (lower_discard.cpp). + * + * - It assumes it is always possible for control to flow from a loop + * to the instruction immediately following it. Technically, this + * is not true (since all execution paths through the loop might + * jump back to the top, or return from the function). + * + * Both of these simplifying assumtions are safe, since they can never + * cause reachable code to be incorrectly classified as unreachable; + * they can only do the opposite. + */ +enum jump_strength +{ + /** + * Analysis has produced no guarantee on how control flow might + * exit this IR node. It might fall out the bottom (with or + * without clearing the execute flag, if present), or it might + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ + strength_none, + + /** + * The only way control can fall out the bottom of this node is + * through a code path that clears the execute flag. It might also + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ + strength_always_clears_execute_flag, + + /** + * Control cannot fall out the bottom of this node. It might + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ + strength_continue, + + /** + * Control cannot fall out the bottom of this node, or continue the + * top of the innermost enclosing loop. It can only break out of + * it or return from the function. + */ + strength_break, + + /** + * Control cannot fall out the bottom of this node, continue to the + * top of the innermost enclosing loop, or break out of it. It can + * only return from the function. + */ + strength_return +}; + +namespace { + +struct block_record +{ + /* minimum jump strength (of lowered IR, not pre-lowering IR) + * + * If the block ends with a jump, must be the strength of the jump. + * Otherwise, the jump would be dead and have been deleted before) + * + * If the block doesn't end with a jump, it can be different than strength_none if all paths before it lead to some jump + * (e.g. an if with a return in one branch, and a break in the other, while not lowering them) + * Note that identical jumps are usually unified though. + */ + jump_strength min_strength; + + /* can anything clear the execute flag? */ + bool may_clear_execute_flag; + + block_record() + { + this->min_strength = strength_none; + this->may_clear_execute_flag = false; + } +}; + +struct loop_record +{ + ir_function_signature* signature; + ir_loop* loop; + + /* used to avoid lowering the break used to represent lowered breaks */ + unsigned nesting_depth; + bool in_if_at_the_end_of_the_loop; + + bool may_set_return_flag; + + ir_variable* break_flag; + ir_variable* execute_flag; /* cleared to emulate continue */ + + loop_record(ir_function_signature* p_signature = 0, ir_loop* p_loop = 0) + { + this->signature = p_signature; + this->loop = p_loop; + this->nesting_depth = 0; + this->in_if_at_the_end_of_the_loop = false; + this->may_set_return_flag = false; + this->break_flag = 0; + this->execute_flag = 0; + } + + ir_variable* get_execute_flag() + { + /* also supported for the "function loop" */ + if(!this->execute_flag) { + exec_list& list = this->loop ? this->loop->body_instructions : signature->body; + this->execute_flag = new(this->signature) ir_variable(glsl_type::bool_type, "execute_flag", ir_var_temporary); + list.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(execute_flag), new(this->signature) ir_constant(true), 0)); + list.push_head(this->execute_flag); + } + return this->execute_flag; + } + + ir_variable* get_break_flag() + { + assert(this->loop); + if(!this->break_flag) { + this->break_flag = new(this->signature) ir_variable(glsl_type::bool_type, "break_flag", ir_var_temporary); + this->loop->insert_before(this->break_flag); + this->loop->insert_before(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(break_flag), new(this->signature) ir_constant(false), 0)); + } + return this->break_flag; + } +}; + +struct function_record +{ + ir_function_signature* signature; + ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */ + ir_variable* return_value; + bool lower_return; + unsigned nesting_depth; + + function_record(ir_function_signature* p_signature = 0, + bool lower_return = false) + { + this->signature = p_signature; + this->return_flag = 0; + this->return_value = 0; + this->nesting_depth = 0; + this->lower_return = lower_return; + } + + ir_variable* get_return_flag() + { + if(!this->return_flag) { + this->return_flag = new(this->signature) ir_variable(glsl_type::bool_type, "return_flag", ir_var_temporary); + this->signature->body.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(return_flag), new(this->signature) ir_constant(false), 0)); + this->signature->body.push_head(this->return_flag); + } + return this->return_flag; + } + + ir_variable* get_return_value() + { + if(!this->return_value) { + assert(!this->signature->return_type->is_void()); + return_value = new(this->signature) ir_variable(this->signature->return_type, "return_value", ir_var_temporary); + this->signature->body.push_head(this->return_value); + } + return this->return_value; + } +}; + +struct ir_lower_jumps_visitor : public ir_control_flow_visitor { + /* Postconditions: on exit of any visit() function: + * + * ANALYSIS: this->block.min_strength, + * this->block.may_clear_execute_flag, and + * this->loop.may_set_return_flag are updated to reflect the + * characteristics of the visited statement. + * + * DEAD_CODE_ELIMINATION: If this->block.min_strength is not + * strength_none, the visited node is at the end of its exec_list. + * In other words, any unreachable statements that follow the + * visited statement in its exec_list have been removed. + * + * CONTAINED_JUMPS_LOWERED: If the visited statement contains other + * statements, then should_lower_jump() is false for all of the + * return, break, or continue statements it contains. + * + * Note that visiting a jump does not lower it. That is the + * responsibility of the statement (or function signature) that + * contains the jump. + */ + + bool progress; + + struct function_record function; + struct loop_record loop; + struct block_record block; + + bool pull_out_jumps; + bool lower_continue; + bool lower_break; + bool lower_sub_return; + bool lower_main_return; + + ir_lower_jumps_visitor() + : progress(false), + pull_out_jumps(false), + lower_continue(false), + lower_break(false), + lower_sub_return(false), + lower_main_return(false) + { + } + + void truncate_after_instruction(exec_node *ir) + { + if (!ir) + return; + + while (!ir->get_next()->is_tail_sentinel()) { + ((ir_instruction *)ir->get_next())->remove(); + this->progress = true; + } + } + + void move_outer_block_inside(ir_instruction *ir, exec_list *inner_block) + { + while (!ir->get_next()->is_tail_sentinel()) { + ir_instruction *move_ir = (ir_instruction *)ir->get_next(); + + move_ir->remove(); + inner_block->push_tail(move_ir); + } + } + + /** + * Insert the instructions necessary to lower a return statement, + * before the given return instruction. + */ + void insert_lowered_return(ir_return *ir) + { + ir_variable* return_flag = this->function.get_return_flag(); + if(!this->function.signature->return_type->is_void()) { + ir_variable* return_value = this->function.get_return_value(); + ir->insert_before( + new(ir) ir_assignment( + new (ir) ir_dereference_variable(return_value), + ir->value)); + } + ir->insert_before( + new(ir) ir_assignment( + new (ir) ir_dereference_variable(return_flag), + new (ir) ir_constant(true))); + this->loop.may_set_return_flag = true; + } + + /** + * If the given instruction is a return, lower it to instructions + * that store the return value (if there is one), set the return + * flag, and then break. + * + * It is safe to pass NULL to this function. + */ + void lower_return_unconditionally(ir_instruction *ir) + { + if (get_jump_strength(ir) != strength_return) { + return; + } + insert_lowered_return((ir_return*)ir); + ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + } + + /** + * Create the necessary instruction to replace a break instruction. + */ + ir_instruction *create_lowered_break() + { + void *ctx = this->function.signature; + return new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(this->loop.get_break_flag()), + new(ctx) ir_constant(true), + 0); + } + + /** + * If the given instruction is a break, lower it to an instruction + * that sets the break flag, without consulting + * should_lower_jump(). + * + * It is safe to pass NULL to this function. + */ + void lower_break_unconditionally(ir_instruction *ir) + { + if (get_jump_strength(ir) != strength_break) { + return; + } + ir->replace_with(create_lowered_break()); + } + + /** + * If the block ends in a conditional or unconditional break, lower + * it, even though should_lower_jump() says it needn't be lowered. + */ + void lower_final_breaks(exec_list *block) + { + ir_instruction *ir = (ir_instruction *) block->get_tail(); + lower_break_unconditionally(ir); + ir_if *ir_if = ir->as_if(); + if (ir_if) { + lower_break_unconditionally( + (ir_instruction *) ir_if->then_instructions.get_tail()); + lower_break_unconditionally( + (ir_instruction *) ir_if->else_instructions.get_tail()); + } + } + + virtual void visit(class ir_loop_jump * ir) + { + /* Eliminate all instructions after each one, since they are + * unreachable. This satisfies the DEAD_CODE_ELIMINATION + * postcondition. + */ + truncate_after_instruction(ir); + + /* Set this->block.min_strength based on this instruction. This + * satisfies the ANALYSIS postcondition. It is not necessary to + * update this->block.may_clear_execute_flag or + * this->loop.may_set_return_flag, because an unlowered jump + * instruction can't change any flags. + */ + this->block.min_strength = ir->is_break() ? strength_break : strength_continue; + + /* The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because jump statements can't contain other + * statements. + */ + } + + virtual void visit(class ir_return * ir) + { + /* Eliminate all instructions after each one, since they are + * unreachable. This satisfies the DEAD_CODE_ELIMINATION + * postcondition. + */ + truncate_after_instruction(ir); + + /* Set this->block.min_strength based on this instruction. This + * satisfies the ANALYSIS postcondition. It is not necessary to + * update this->block.may_clear_execute_flag or + * this->loop.may_set_return_flag, because an unlowered return + * instruction can't change any flags. + */ + this->block.min_strength = strength_return; + + /* The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because jump statements can't contain other + * statements. + */ + } + + virtual void visit(class ir_discard * ir) + { + /* Nothing needs to be done. The ANALYSIS and + * DEAD_CODE_ELIMINATION postconditions are already satisfied, + * because discard statements are ignored by this optimization + * pass. The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because discard statements can't contain other + * statements. + */ + (void) ir; + } + + enum jump_strength get_jump_strength(ir_instruction* ir) + { + if(!ir) + return strength_none; + else if(ir->ir_type == ir_type_loop_jump) { + if(((ir_loop_jump*)ir)->is_break()) + return strength_break; + else + return strength_continue; + } else if(ir->ir_type == ir_type_return) + return strength_return; + else + return strength_none; + } + + bool should_lower_jump(ir_jump* ir) + { + unsigned strength = get_jump_strength(ir); + bool lower; + switch(strength) + { + case strength_none: + lower = false; /* don't change this, code relies on it */ + break; + case strength_continue: + lower = lower_continue; + break; + case strength_break: + assert(this->loop.loop); + /* never lower "canonical break" */ + if(ir->get_next()->is_tail_sentinel() && (this->loop.nesting_depth == 0 + || (this->loop.nesting_depth == 1 && this->loop.in_if_at_the_end_of_the_loop))) + lower = false; + else + lower = lower_break; + break; + case strength_return: + /* never lower return at the end of a this->function */ + if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel()) + lower = false; + else + lower = this->function.lower_return; + break; + } + return lower; + } + + block_record visit_block(exec_list* list) + { + /* Note: since visiting a node may change that node's next + * pointer, we can't use visit_exec_list(), because + * visit_exec_list() caches the node's next pointer before + * visiting it. So we use foreach_in_list() instead. + * + * foreach_in_list() isn't safe if the node being visited gets + * removed, but fortunately this visitor doesn't do that. + */ + + block_record saved_block = this->block; + this->block = block_record(); + foreach_in_list(ir_instruction, node, list) { + node->accept(this); + } + block_record ret = this->block; + this->block = saved_block; + return ret; + } + + virtual void visit(ir_if *ir) + { + if(this->loop.nesting_depth == 0 && ir->get_next()->is_tail_sentinel()) + this->loop.in_if_at_the_end_of_the_loop = true; + + ++this->function.nesting_depth; + ++this->loop.nesting_depth; + + block_record block_records[2]; + ir_jump* jumps[2]; + + /* Recursively lower nested jumps. This satisfies the + * CONTAINED_JUMPS_LOWERED postcondition, except in the case of + * unconditional jumps at the end of ir->then_instructions and + * ir->else_instructions, which are handled below. + */ + block_records[0] = visit_block(&ir->then_instructions); + block_records[1] = visit_block(&ir->else_instructions); + +retry: /* we get here if we put code after the if inside a branch */ + + /* Determine which of ir->then_instructions and + * ir->else_instructions end with an unconditional jump. + */ + for(unsigned i = 0; i < 2; ++i) { + exec_list& list = i ? ir->else_instructions : ir->then_instructions; + jumps[i] = 0; + if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail())) + jumps[i] = (ir_jump*)list.get_tail(); + } + + /* Loop until we have satisfied the CONTAINED_JUMPS_LOWERED + * postcondition by lowering jumps in both then_instructions and + * else_instructions. + */ + for(;;) { + /* Determine the types of the jumps that terminate + * ir->then_instructions and ir->else_instructions. + */ + jump_strength jump_strengths[2]; + + for(unsigned i = 0; i < 2; ++i) { + if(jumps[i]) { + jump_strengths[i] = block_records[i].min_strength; + assert(jump_strengths[i] == get_jump_strength(jumps[i])); + } else + jump_strengths[i] = strength_none; + } + + /* If both code paths end in a jump, and the jumps are the + * same, and we are pulling out jumps, replace them with a + * single jump that comes after the if instruction. The new + * jump will be visited next, and it will be lowered if + * necessary by the loop or conditional that encloses it. + */ + if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) { + bool unify = true; + if(jump_strengths[0] == strength_continue) + ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_continue)); + else if(jump_strengths[0] == strength_break) + ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + /* FINISHME: unify returns with identical expressions */ + else if(jump_strengths[0] == strength_return && this->function.signature->return_type->is_void()) + ir->insert_after(new(ir) ir_return(NULL)); + else + unify = false; + + if(unify) { + jumps[0]->remove(); + jumps[1]->remove(); + this->progress = true; + + /* Update jumps[] to reflect the fact that the jumps + * are gone, and update block_records[] to reflect the + * fact that control can now flow to the next + * instruction. + */ + jumps[0] = 0; + jumps[1] = 0; + block_records[0].min_strength = strength_none; + block_records[1].min_strength = strength_none; + + /* The CONTAINED_JUMPS_LOWERED postcondition is now + * satisfied, so we can break out of the loop. + */ + break; + } + } + + /* lower a jump: if both need to lowered, start with the strongest one, so that + * we might later unify the lowered version with the other one + */ + bool should_lower[2]; + for(unsigned i = 0; i < 2; ++i) + should_lower[i] = should_lower_jump(jumps[i]); + + int lower; + if(should_lower[1] && should_lower[0]) + lower = jump_strengths[1] > jump_strengths[0]; + else if(should_lower[0]) + lower = 0; + else if(should_lower[1]) + lower = 1; + else + /* Neither code path ends in a jump that needs to be + * lowered, so the CONTAINED_JUMPS_LOWERED postcondition + * is satisfied and we can break out of the loop. + */ + break; + + if(jump_strengths[lower] == strength_return) { + /* To lower a return, we create a return flag (if the + * function doesn't have one already) and add instructions + * that: 1. store the return value (if this function has a + * non-void return) and 2. set the return flag + */ + insert_lowered_return((ir_return*)jumps[lower]); + if(this->loop.loop) { + /* If we are in a loop, replace the return instruction + * with a break instruction, and then loop so that the + * break instruction can be lowered if necessary. + */ + ir_loop_jump* lowered = 0; + lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + /* Note: we must update block_records and jumps to + * reflect the fact that the control path has been + * altered from a return to a break. + */ + block_records[lower].min_strength = strength_break; + jumps[lower]->replace_with(lowered); + jumps[lower] = lowered; + } else { + /* If we are not in a loop, we then proceed as we would + * for a continue statement (set the execute flag to + * false to prevent the rest of the function from + * executing). + */ + goto lower_continue; + } + this->progress = true; + } else if(jump_strengths[lower] == strength_break) { + /* To lower a break, we create a break flag (if the loop + * doesn't have one already) and add an instruction that + * sets it. + * + * Then we proceed as we would for a continue statement + * (set the execute flag to false to prevent the rest of + * the loop body from executing). + * + * The visit() function for the loop will ensure that the + * break flag is checked after executing the loop body. + */ + jumps[lower]->insert_before(create_lowered_break()); + goto lower_continue; + } else if(jump_strengths[lower] == strength_continue) { +lower_continue: + /* To lower a continue, we create an execute flag (if the + * loop doesn't have one already) and replace the continue + * with an instruction that clears it. + * + * Note that this code path gets exercised when lowering + * return statements that are not inside a loop, so + * this->loop must be initialized even outside of loops. + */ + ir_variable* execute_flag = this->loop.get_execute_flag(); + jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0)); + /* Note: we must update block_records and jumps to reflect + * the fact that the control path has been altered to an + * instruction that clears the execute flag. + */ + jumps[lower] = 0; + block_records[lower].min_strength = strength_always_clears_execute_flag; + block_records[lower].may_clear_execute_flag = true; + this->progress = true; + + /* Let the loop run again, in case the other branch of the + * if needs to be lowered too. + */ + } + } + + /* move out a jump out if possible */ + if(pull_out_jumps) { + /* If one of the branches ends in a jump, and control cannot + * fall out the bottom of the other branch, then we can move + * the jump after the if. + * + * Set move_out to the branch we are moving a jump out of. + */ + int move_out = -1; + if(jumps[0] && block_records[1].min_strength >= strength_continue) + move_out = 0; + else if(jumps[1] && block_records[0].min_strength >= strength_continue) + move_out = 1; + + if(move_out >= 0) + { + jumps[move_out]->remove(); + ir->insert_after(jumps[move_out]); + /* Note: we must update block_records and jumps to reflect + * the fact that the jump has been moved out of the if. + */ + jumps[move_out] = 0; + block_records[move_out].min_strength = strength_none; + this->progress = true; + } + } + + /* Now satisfy the ANALYSIS postcondition by setting + * this->block.min_strength and + * this->block.may_clear_execute_flag based on the + * characteristics of the two branches. + */ + if(block_records[0].min_strength < block_records[1].min_strength) + this->block.min_strength = block_records[0].min_strength; + else + this->block.min_strength = block_records[1].min_strength; + this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag; + + /* Now we need to clean up the instructions that follow the + * if. + * + * If those instructions are unreachable, then satisfy the + * DEAD_CODE_ELIMINATION postcondition by eliminating them. + * Otherwise that postcondition is already satisfied. + */ + if(this->block.min_strength) + truncate_after_instruction(ir); + else if(this->block.may_clear_execute_flag) + { + /* If the "if" instruction might clear the execute flag, then + * we need to guard any instructions that follow so that they + * are only executed if the execute flag is set. + * + * If one of the branches of the "if" always clears the + * execute flag, and the other branch never clears it, then + * this is easy: just move all the instructions following the + * "if" into the branch that never clears it. + */ + int move_into = -1; + if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag) + move_into = 1; + else if(block_records[1].min_strength && !block_records[0].may_clear_execute_flag) + move_into = 0; + + if(move_into >= 0) { + assert(!block_records[move_into].min_strength && !block_records[move_into].may_clear_execute_flag); /* otherwise, we just truncated */ + + exec_list* list = move_into ? &ir->else_instructions : &ir->then_instructions; + exec_node* next = ir->get_next(); + if(!next->is_tail_sentinel()) { + move_outer_block_inside(ir, list); + + /* If any instructions moved, then we need to visit + * them (since they are now inside the "if"). Since + * block_records[move_into] is in its default state + * (see assertion above), we can safely replace + * block_records[move_into] with the result of this + * analysis. + */ + exec_list list; + list.head = next; + block_records[move_into] = visit_block(&list); + + /* + * Then we need to re-start our jump lowering, since one + * of the instructions we moved might be a jump that + * needs to be lowered. + */ + this->progress = true; + goto retry; + } + } else { + /* If we get here, then the simple case didn't apply; we + * need to actually guard the instructions that follow. + * + * To avoid creating unnecessarily-deep nesting, first + * look through the instructions that follow and unwrap + * any instructions that that are already wrapped in the + * appropriate guard. + */ + ir_instruction* ir_after; + for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();) + { + ir_if* ir_if = ir_after->as_if(); + if(ir_if && ir_if->else_instructions.is_empty()) { + ir_dereference_variable* ir_if_cond_deref = ir_if->condition->as_dereference_variable(); + if(ir_if_cond_deref && ir_if_cond_deref->var == this->loop.execute_flag) { + ir_instruction* ir_next = (ir_instruction*)ir_after->get_next(); + ir_after->insert_before(&ir_if->then_instructions); + ir_after->remove(); + ir_after = ir_next; + continue; + } + } + ir_after = (ir_instruction*)ir_after->get_next(); + + /* only set this if we find any unprotected instruction */ + this->progress = true; + } + + /* Then, wrap all the instructions that follow in a single + * guard. + */ + if(!ir->get_next()->is_tail_sentinel()) { + assert(this->loop.execute_flag); + ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag)); + move_outer_block_inside(ir, &if_execute->then_instructions); + ir->insert_after(if_execute); + } + } + } + --this->loop.nesting_depth; + --this->function.nesting_depth; + } + + virtual void visit(ir_loop *ir) + { + /* Visit the body of the loop, with a fresh data structure in + * this->loop so that the analysis we do here won't bleed into + * enclosing loops. + * + * We assume that all code after a loop is reachable from the + * loop (see comments on enum jump_strength), so the + * DEAD_CODE_ELIMINATION postcondition is automatically + * satisfied, as is the block.min_strength portion of the + * ANALYSIS postcondition. + * + * The block.may_clear_execute_flag portion of the ANALYSIS + * postcondition is automatically satisfied because execute + * flags do not propagate outside of loops. + * + * The loop.may_set_return_flag portion of the ANALYSIS + * postcondition is handled below. + */ + ++this->function.nesting_depth; + loop_record saved_loop = this->loop; + this->loop = loop_record(this->function.signature, ir); + + /* Recursively lower nested jumps. This satisfies the + * CONTAINED_JUMPS_LOWERED postcondition, except in the case of + * an unconditional continue or return at the bottom of the + * loop, which are handled below. + */ + block_record body = visit_block(&ir->body_instructions); + + /* If the loop ends in an unconditional continue, eliminate it + * because it is redundant. + */ + ir_instruction *ir_last + = (ir_instruction *) ir->body_instructions.get_tail(); + if (get_jump_strength(ir_last) == strength_continue) { + ir_last->remove(); + } + + /* If the loop ends in an unconditional return, and we are + * lowering returns, lower it. + */ + if (this->function.lower_return) + lower_return_unconditionally(ir_last); + + if(body.min_strength >= strength_break) { + /* FINISHME: If the min_strength of the loop body is + * strength_break or strength_return, that means that it + * isn't a loop at all, since control flow always leaves the + * body of the loop via break or return. In principle the + * loop could be eliminated in this case. This optimization + * is not implemented yet. + */ + } + + if(this->loop.break_flag) { + /* We only get here if we are lowering breaks */ + assert (lower_break); + + /* If a break flag was generated while visiting the body of + * the loop, then at least one break was lowered, so we need + * to generate an if statement at the end of the loop that + * does a "break" if the break flag is set. The break we + * generate won't violate the CONTAINED_JUMPS_LOWERED + * postcondition, because should_lower_jump() always returns + * false for a break that happens at the end of a loop. + * + * However, if the loop already ends in a conditional or + * unconditional break, then we need to lower that break, + * because it won't be at the end of the loop anymore. + */ + lower_final_breaks(&ir->body_instructions); + + ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag)); + break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + ir->body_instructions.push_tail(break_if); + } + + /* If the body of the loop may set the return flag, then at + * least one return was lowered to a break, so we need to ensure + * that the return flag is checked after the body of the loop is + * executed. + */ + if(this->loop.may_set_return_flag) { + assert(this->function.return_flag); + /* Generate the if statement to check the return flag */ + ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag)); + /* Note: we also need to propagate the knowledge that the + * return flag may get set to the outer context. This + * satisfies the loop.may_set_return_flag part of the + * ANALYSIS postcondition. + */ + saved_loop.may_set_return_flag = true; + if(saved_loop.loop) + /* If this loop is nested inside another one, then the if + * statement that we generated should break out of that + * loop if the return flag is set. Caller will lower that + * break statement if necessary. + */ + return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + else + /* Otherwise, all we need to do is ensure that the + * instructions that follow are only executed if the + * return flag is clear. We can do that by moving those + * instructions into the else clause of the generated if + * statement. + */ + move_outer_block_inside(ir, &return_if->else_instructions); + ir->insert_after(return_if); + } + + this->loop = saved_loop; + --this->function.nesting_depth; + } + + virtual void visit(ir_function_signature *ir) + { + /* these are not strictly necessary */ + assert(!this->function.signature); + assert(!this->loop.loop); + + bool lower_return; + if (strcmp(ir->function_name(), "main") == 0) + lower_return = lower_main_return; + else + lower_return = lower_sub_return; + + function_record saved_function = this->function; + loop_record saved_loop = this->loop; + this->function = function_record(ir, lower_return); + this->loop = loop_record(ir); + + assert(!this->loop.loop); + + /* Visit the body of the function to lower any jumps that occur + * in it, except possibly an unconditional return statement at + * the end of it. + */ + visit_block(&ir->body); + + /* If the body ended in an unconditional return of non-void, + * then we don't need to lower it because it's the one canonical + * return. + * + * If the body ended in a return of void, eliminate it because + * it is redundant. + */ + if (ir->return_type->is_void() && + get_jump_strength((ir_instruction *) ir->body.get_tail())) { + ir_jump *jump = (ir_jump *) ir->body.get_tail(); + assert (jump->ir_type == ir_type_return); + jump->remove(); + } + + if(this->function.return_value) + ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value))); + + this->loop = saved_loop; + this->function = saved_function; + } + + virtual void visit(class ir_function * ir) + { + visit_block(&ir->signatures); + } +}; + +} /* anonymous namespace */ + +bool +do_lower_jumps(exec_list *instructions, bool pull_out_jumps, bool lower_sub_return, bool lower_main_return, bool lower_continue, bool lower_break) +{ + ir_lower_jumps_visitor v; + v.pull_out_jumps = pull_out_jumps; + v.lower_continue = lower_continue; + v.lower_break = lower_break; + v.lower_sub_return = lower_sub_return; + v.lower_main_return = lower_main_return; + + bool progress_ever = false; + do { + v.progress = false; + visit_exec_list(instructions, &v); + progress_ever = v.progress || progress_ever; + } while (v.progress); + + return progress_ever; +} diff --git a/src/compiler/glsl/lower_mat_op_to_vec.cpp b/src/compiler/glsl/lower_mat_op_to_vec.cpp new file mode 100644 index 0000000..266fdc6 --- /dev/null +++ b/src/compiler/glsl/lower_mat_op_to_vec.cpp @@ -0,0 +1,438 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_mat_op_to_vec.cpp + * + * Breaks matrix operation expressions down to a series of vector operations. + * + * Generally this is how we have to codegen matrix operations for a + * GPU, so this gives us the chance to constant fold operations on a + * column or row. + */ + +#include "ir.h" +#include "ir_expression_flattening.h" +#include "compiler/glsl_types.h" + +namespace { + +class ir_mat_op_to_vec_visitor : public ir_hierarchical_visitor { +public: + ir_mat_op_to_vec_visitor() + { + this->made_progress = false; + this->mem_ctx = NULL; + } + + ir_visitor_status visit_leave(ir_assignment *); + + ir_dereference *get_column(ir_dereference *val, int col); + ir_rvalue *get_element(ir_dereference *val, int col, int row); + + void do_mul_mat_mat(ir_dereference *result, + ir_dereference *a, ir_dereference *b); + void do_mul_mat_vec(ir_dereference *result, + ir_dereference *a, ir_dereference *b); + void do_mul_vec_mat(ir_dereference *result, + ir_dereference *a, ir_dereference *b); + void do_mul_mat_scalar(ir_dereference *result, + ir_dereference *a, ir_dereference *b); + void do_equal_mat_mat(ir_dereference *result, ir_dereference *a, + ir_dereference *b, bool test_equal); + + void *mem_ctx; + bool made_progress; +}; + +} /* anonymous namespace */ + +static bool +mat_op_to_vec_predicate(ir_instruction *ir) +{ + ir_expression *expr = ir->as_expression(); + unsigned int i; + + if (!expr) + return false; + + for (i = 0; i < expr->get_num_operands(); i++) { + if (expr->operands[i]->type->is_matrix()) + return true; + } + + return false; +} + +bool +do_mat_op_to_vec(exec_list *instructions) +{ + ir_mat_op_to_vec_visitor v; + + /* Pull out any matrix expression to a separate assignment to a + * temp. This will make our handling of the breakdown to + * operations on the matrix's vector components much easier. + */ + do_expression_flattening(instructions, mat_op_to_vec_predicate); + + visit_list_elements(&v, instructions); + + return v.made_progress; +} + +ir_rvalue * +ir_mat_op_to_vec_visitor::get_element(ir_dereference *val, int col, int row) +{ + val = get_column(val, col); + + return new(mem_ctx) ir_swizzle(val, row, 0, 0, 0, 1); +} + +ir_dereference * +ir_mat_op_to_vec_visitor::get_column(ir_dereference *val, int row) +{ + val = val->clone(mem_ctx, NULL); + + if (val->type->is_matrix()) { + val = new(mem_ctx) ir_dereference_array(val, + new(mem_ctx) ir_constant(row)); + } + + return val; +} + +void +ir_mat_op_to_vec_visitor::do_mul_mat_mat(ir_dereference *result, + ir_dereference *a, + ir_dereference *b) +{ + unsigned b_col, i; + ir_assignment *assign; + ir_expression *expr; + + for (b_col = 0; b_col < b->type->matrix_columns; b_col++) { + /* first column */ + expr = new(mem_ctx) ir_expression(ir_binop_mul, + get_column(a, 0), + get_element(b, b_col, 0)); + + /* following columns */ + for (i = 1; i < a->type->matrix_columns; i++) { + ir_expression *mul_expr; + + mul_expr = new(mem_ctx) ir_expression(ir_binop_mul, + get_column(a, i), + get_element(b, b_col, i)); + expr = new(mem_ctx) ir_expression(ir_binop_add, + expr, + mul_expr); + } + + assign = new(mem_ctx) ir_assignment(get_column(result, b_col), expr); + base_ir->insert_before(assign); + } +} + +void +ir_mat_op_to_vec_visitor::do_mul_mat_vec(ir_dereference *result, + ir_dereference *a, + ir_dereference *b) +{ + unsigned i; + ir_assignment *assign; + ir_expression *expr; + + /* first column */ + expr = new(mem_ctx) ir_expression(ir_binop_mul, + get_column(a, 0), + get_element(b, 0, 0)); + + /* following columns */ + for (i = 1; i < a->type->matrix_columns; i++) { + ir_expression *mul_expr; + + mul_expr = new(mem_ctx) ir_expression(ir_binop_mul, + get_column(a, i), + get_element(b, 0, i)); + expr = new(mem_ctx) ir_expression(ir_binop_add, expr, mul_expr); + } + + result = result->clone(mem_ctx, NULL); + assign = new(mem_ctx) ir_assignment(result, expr); + base_ir->insert_before(assign); +} + +void +ir_mat_op_to_vec_visitor::do_mul_vec_mat(ir_dereference *result, + ir_dereference *a, + ir_dereference *b) +{ + unsigned i; + + for (i = 0; i < b->type->matrix_columns; i++) { + ir_rvalue *column_result; + ir_expression *column_expr; + ir_assignment *column_assign; + + column_result = result->clone(mem_ctx, NULL); + column_result = new(mem_ctx) ir_swizzle(column_result, i, 0, 0, 0, 1); + + column_expr = new(mem_ctx) ir_expression(ir_binop_dot, + a->clone(mem_ctx, NULL), + get_column(b, i)); + + column_assign = new(mem_ctx) ir_assignment(column_result, + column_expr); + base_ir->insert_before(column_assign); + } +} + +void +ir_mat_op_to_vec_visitor::do_mul_mat_scalar(ir_dereference *result, + ir_dereference *a, + ir_dereference *b) +{ + unsigned i; + + for (i = 0; i < a->type->matrix_columns; i++) { + ir_expression *column_expr; + ir_assignment *column_assign; + + column_expr = new(mem_ctx) ir_expression(ir_binop_mul, + get_column(a, i), + b->clone(mem_ctx, NULL)); + + column_assign = new(mem_ctx) ir_assignment(get_column(result, i), + column_expr); + base_ir->insert_before(column_assign); + } +} + +void +ir_mat_op_to_vec_visitor::do_equal_mat_mat(ir_dereference *result, + ir_dereference *a, + ir_dereference *b, + bool test_equal) +{ + /* This essentially implements the following GLSL: + * + * bool equal(mat4 a, mat4 b) + * { + * return !any(bvec4(a[0] != b[0], + * a[1] != b[1], + * a[2] != b[2], + * a[3] != b[3]); + * } + * + * bool nequal(mat4 a, mat4 b) + * { + * return any(bvec4(a[0] != b[0], + * a[1] != b[1], + * a[2] != b[2], + * a[3] != b[3]); + * } + */ + const unsigned columns = a->type->matrix_columns; + const glsl_type *const bvec_type = + glsl_type::get_instance(GLSL_TYPE_BOOL, columns, 1); + + ir_variable *const tmp_bvec = + new(this->mem_ctx) ir_variable(bvec_type, "mat_cmp_bvec", + ir_var_temporary); + this->base_ir->insert_before(tmp_bvec); + + for (unsigned i = 0; i < columns; i++) { + ir_expression *const cmp = + new(this->mem_ctx) ir_expression(ir_binop_any_nequal, + get_column(a, i), + get_column(b, i)); + + ir_dereference *const lhs = + new(this->mem_ctx) ir_dereference_variable(tmp_bvec); + + ir_assignment *const assign = + new(this->mem_ctx) ir_assignment(lhs, cmp, NULL, (1U << i)); + + this->base_ir->insert_before(assign); + } + + ir_rvalue *const val = new(this->mem_ctx) ir_dereference_variable(tmp_bvec); + uint8_t vec_elems = val->type->vector_elements; + ir_expression *any = + new(this->mem_ctx) ir_expression(ir_binop_any_nequal, val, + new(this->mem_ctx) ir_constant(false, + vec_elems)); + + if (test_equal) + any = new(this->mem_ctx) ir_expression(ir_unop_logic_not, any); + + ir_assignment *const assign = + new(mem_ctx) ir_assignment(result->clone(mem_ctx, NULL), any); + base_ir->insert_before(assign); +} + +static bool +has_matrix_operand(const ir_expression *expr, unsigned &columns) +{ + for (unsigned i = 0; i < expr->get_num_operands(); i++) { + if (expr->operands[i]->type->is_matrix()) { + columns = expr->operands[i]->type->matrix_columns; + return true; + } + } + + return false; +} + + +ir_visitor_status +ir_mat_op_to_vec_visitor::visit_leave(ir_assignment *orig_assign) +{ + ir_expression *orig_expr = orig_assign->rhs->as_expression(); + unsigned int i, matrix_columns = 1; + ir_dereference *op[2]; + + if (!orig_expr) + return visit_continue; + + if (!has_matrix_operand(orig_expr, matrix_columns)) + return visit_continue; + + assert(orig_expr->get_num_operands() <= 2); + + mem_ctx = ralloc_parent(orig_assign); + + ir_dereference_variable *result = + orig_assign->lhs->as_dereference_variable(); + assert(result); + + /* Store the expression operands in temps so we can use them + * multiple times. + */ + for (i = 0; i < orig_expr->get_num_operands(); i++) { + ir_assignment *assign; + ir_dereference *deref = orig_expr->operands[i]->as_dereference(); + + /* Avoid making a temporary if we don't need to to avoid aliasing. */ + if (deref && + deref->variable_referenced() != result->variable_referenced()) { + op[i] = deref; + continue; + } + + /* Otherwise, store the operand in a temporary generally if it's + * not a dereference. + */ + ir_variable *var = new(mem_ctx) ir_variable(orig_expr->operands[i]->type, + "mat_op_to_vec", + ir_var_temporary); + base_ir->insert_before(var); + + /* Note that we use this dereference for the assignment. That means + * that others that want to use op[i] have to clone the deref. + */ + op[i] = new(mem_ctx) ir_dereference_variable(var); + assign = new(mem_ctx) ir_assignment(op[i], orig_expr->operands[i]); + base_ir->insert_before(assign); + } + + /* OK, time to break down this matrix operation. */ + switch (orig_expr->operation) { + case ir_unop_d2f: + case ir_unop_f2d: + case ir_unop_neg: { + /* Apply the operation to each column.*/ + for (i = 0; i < matrix_columns; i++) { + ir_expression *column_expr; + ir_assignment *column_assign; + + column_expr = new(mem_ctx) ir_expression(orig_expr->operation, + get_column(op[0], i)); + + column_assign = new(mem_ctx) ir_assignment(get_column(result, i), + column_expr); + assert(column_assign->write_mask != 0); + base_ir->insert_before(column_assign); + } + break; + } + case ir_binop_add: + case ir_binop_sub: + case ir_binop_div: + case ir_binop_mod: { + /* For most operations, the matrix version is just going + * column-wise through and applying the operation to each column + * if available. + */ + for (i = 0; i < matrix_columns; i++) { + ir_expression *column_expr; + ir_assignment *column_assign; + + column_expr = new(mem_ctx) ir_expression(orig_expr->operation, + get_column(op[0], i), + get_column(op[1], i)); + + column_assign = new(mem_ctx) ir_assignment(get_column(result, i), + column_expr); + assert(column_assign->write_mask != 0); + base_ir->insert_before(column_assign); + } + break; + } + case ir_binop_mul: + if (op[0]->type->is_matrix()) { + if (op[1]->type->is_matrix()) { + do_mul_mat_mat(result, op[0], op[1]); + } else if (op[1]->type->is_vector()) { + do_mul_mat_vec(result, op[0], op[1]); + } else { + assert(op[1]->type->is_scalar()); + do_mul_mat_scalar(result, op[0], op[1]); + } + } else { + assert(op[1]->type->is_matrix()); + if (op[0]->type->is_vector()) { + do_mul_vec_mat(result, op[0], op[1]); + } else { + assert(op[0]->type->is_scalar()); + do_mul_mat_scalar(result, op[1], op[0]); + } + } + break; + + case ir_binop_all_equal: + case ir_binop_any_nequal: + do_equal_mat_mat(result, op[1], op[0], + (orig_expr->operation == ir_binop_all_equal)); + break; + + default: + printf("FINISHME: Handle matrix operation for %s\n", + orig_expr->operator_string()); + abort(); + } + orig_assign->remove(); + this->made_progress = true; + + return visit_continue; +} diff --git a/src/compiler/glsl/lower_named_interface_blocks.cpp b/src/compiler/glsl/lower_named_interface_blocks.cpp new file mode 100644 index 0000000..f29eba4 --- /dev/null +++ b/src/compiler/glsl/lower_named_interface_blocks.cpp @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_named_interface_blocks.cpp + * + * This lowering pass converts all interface blocks with instance names + * into interface blocks without an instance name. + * + * For example, the following shader: + * + * out block { + * float block_var; + * } inst_name; + * + * main() + * { + * inst_name.block_var = 0.0; + * } + * + * Is rewritten to: + * + * out block { + * float block_var; + * }; + * + * main() + * { + * block_var = 0.0; + * } + * + * This takes place after the shader code has already been verified with + * the interface name in place. + * + * The linking phase will use the interface block name rather than the + * interface's instance name when linking interfaces. + * + * This modification to the ir allows our currently existing dead code + * elimination to work with interface blocks without changes. + */ + +#include "glsl_symbol_table.h" +#include "ir.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" +#include "program/hash_table.h" + +static const glsl_type * +process_array_type(const glsl_type *type, unsigned idx) +{ + const glsl_type *element_type = type->fields.array; + if (element_type->is_array()) { + const glsl_type *new_array_type = process_array_type(element_type, idx); + return glsl_type::get_array_instance(new_array_type, type->length); + } else { + return glsl_type::get_array_instance( + element_type->fields.structure[idx].type, type->length); + } +} + +static ir_rvalue * +process_array_ir(void * const mem_ctx, + ir_dereference_array *deref_array_prev, + ir_rvalue *deref_var) +{ + ir_dereference_array *deref_array = + deref_array_prev->array->as_dereference_array(); + + if (deref_array == NULL) { + return new(mem_ctx) ir_dereference_array(deref_var, + deref_array_prev->array_index); + } else { + deref_array = (ir_dereference_array *) process_array_ir(mem_ctx, + deref_array, + deref_var); + return new(mem_ctx) ir_dereference_array(deref_array, + deref_array_prev->array_index); + } +} + +namespace { + +class flatten_named_interface_blocks_declarations : public ir_rvalue_visitor +{ +public: + void * const mem_ctx; + hash_table *interface_namespace; + + flatten_named_interface_blocks_declarations(void *mem_ctx) + : mem_ctx(mem_ctx), + interface_namespace(NULL) + { + } + + void run(exec_list *instructions); + + virtual ir_visitor_status visit_leave(ir_assignment *); + virtual void handle_rvalue(ir_rvalue **rvalue); +}; + +} /* anonymous namespace */ + +void +flatten_named_interface_blocks_declarations::run(exec_list *instructions) +{ + interface_namespace = hash_table_ctor(0, hash_table_string_hash, + hash_table_string_compare); + + /* First pass: adjust instance block variables with an instance name + * to not have an instance name. + * + * The interface block variables are stored in the interface_namespace + * hash table so they can be used in the second pass. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + if (!var || !var->is_interface_instance()) + continue; + + /* It should be possible to handle uniforms during this pass, + * but, this will require changes to the other uniform block + * support code. + */ + if (var->data.mode == ir_var_uniform || + var->data.mode == ir_var_shader_storage) + continue; + + const glsl_type * iface_t = var->type->without_array(); + exec_node *insert_pos = var; + + assert (iface_t->is_interface()); + + for (unsigned i = 0; i < iface_t->length; i++) { + const char * field_name = iface_t->fields.structure[i].name; + char *iface_field_name = + ralloc_asprintf(mem_ctx, "%s %s.%s.%s", + var->data.mode == ir_var_shader_in ? "in" : "out", + iface_t->name, var->name, field_name); + + ir_variable *found_var = + (ir_variable *) hash_table_find(interface_namespace, + iface_field_name); + if (!found_var) { + ir_variable *new_var; + char *var_name = + ralloc_strdup(mem_ctx, iface_t->fields.structure[i].name); + if (!var->type->is_array()) { + new_var = + new(mem_ctx) ir_variable(iface_t->fields.structure[i].type, + var_name, + (ir_variable_mode) var->data.mode); + new_var->data.from_named_ifc_block_nonarray = 1; + } else { + const glsl_type *new_array_type = + process_array_type(var->type, i); + new_var = + new(mem_ctx) ir_variable(new_array_type, + var_name, + (ir_variable_mode) var->data.mode); + new_var->data.from_named_ifc_block_array = 1; + } + new_var->data.location = iface_t->fields.structure[i].location; + new_var->data.explicit_location = (new_var->data.location >= 0); + new_var->data.interpolation = + iface_t->fields.structure[i].interpolation; + new_var->data.centroid = iface_t->fields.structure[i].centroid; + new_var->data.sample = iface_t->fields.structure[i].sample; + new_var->data.patch = iface_t->fields.structure[i].patch; + new_var->data.stream = var->data.stream; + new_var->data.how_declared = var->data.how_declared; + + new_var->init_interface_type(iface_t); + hash_table_insert(interface_namespace, new_var, + iface_field_name); + insert_pos->insert_after(new_var); + insert_pos = new_var; + } + } + var->remove(); + } + + /* Second pass: visit all ir_dereference_record instances, and if they + * reference an interface block, then flatten the refererence out. + */ + visit_list_elements(this, instructions); + hash_table_dtor(interface_namespace); + interface_namespace = NULL; +} + +ir_visitor_status +flatten_named_interface_blocks_declarations::visit_leave(ir_assignment *ir) +{ + ir_dereference_record *lhs_rec = ir->lhs->as_dereference_record(); + if (lhs_rec) { + ir_rvalue *lhs_rec_tmp = lhs_rec; + handle_rvalue(&lhs_rec_tmp); + if (lhs_rec_tmp != lhs_rec) { + ir->set_lhs(lhs_rec_tmp); + } + } + return rvalue_visit(ir); +} + +void +flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue) +{ + if (*rvalue == NULL) + return; + + ir_dereference_record *ir = (*rvalue)->as_dereference_record(); + if (ir == NULL) + return; + + ir_variable *var = ir->variable_referenced(); + if (var == NULL) + return; + + if (!var->is_interface_instance()) + return; + + /* It should be possible to handle uniforms during this pass, + * but, this will require changes to the other uniform block + * support code. + */ + if (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage) + return; + + if (var->get_interface_type() != NULL) { + char *iface_field_name = + ralloc_asprintf(mem_ctx, "%s %s.%s.%s", + var->data.mode == ir_var_shader_in ? "in" : "out", + var->get_interface_type()->name, + var->name, ir->field); + /* Find the variable in the set of flattened interface blocks */ + ir_variable *found_var = + (ir_variable *) hash_table_find(interface_namespace, + iface_field_name); + assert(found_var); + + ir_dereference_variable *deref_var = + new(mem_ctx) ir_dereference_variable(found_var); + + ir_dereference_array *deref_array = + ir->record->as_dereference_array(); + if (deref_array != NULL) { + *rvalue = process_array_ir(mem_ctx, deref_array, + (ir_rvalue *)deref_var); + } else { + *rvalue = deref_var; + } + } +} + +void +lower_named_interface_blocks(void *mem_ctx, gl_shader *shader) +{ + flatten_named_interface_blocks_declarations v_decl(mem_ctx); + v_decl.run(shader->ir); +} + diff --git a/src/compiler/glsl/lower_noise.cpp b/src/compiler/glsl/lower_noise.cpp new file mode 100644 index 0000000..85f59b6 --- /dev/null +++ b/src/compiler/glsl/lower_noise.cpp @@ -0,0 +1,71 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_noise.cpp + * IR lower pass to remove noise opcodes. + * + * \author Ian Romanick <ian.d.romanick@intel.com> + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" + +class lower_noise_visitor : public ir_rvalue_visitor { +public: + lower_noise_visitor() : progress(false) + { + /* empty */ + } + + void handle_rvalue(ir_rvalue **rvalue) + { + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr) + return; + + /* In the future, ir_unop_noise may be replaced by a call to a function + * that implements noise. No hardware has a noise instruction. + */ + if (expr->operation == ir_unop_noise) { + *rvalue = ir_constant::zero(ralloc_parent(expr), expr->type); + this->progress = true; + } + } + + bool progress; +}; + + +bool +lower_noise(exec_list *instructions) +{ + lower_noise_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_offset_array.cpp b/src/compiler/glsl/lower_offset_array.cpp new file mode 100644 index 0000000..96486c3 --- /dev/null +++ b/src/compiler/glsl/lower_offset_array.cpp @@ -0,0 +1,91 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_offset_array.cpp + * + * IR lower pass to decompose ir_texture ir_tg4 with an array of offsets + * into four ir_tg4s with a single ivec2 offset, select the .w component of each, + * and return those four values packed into a gvec4. + * + * \author Chris Forbes <chrisf@ijw.co.nz> + */ + +#include "compiler/glsl_types.h" +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" + +using namespace ir_builder; + +class lower_offset_array_visitor : public ir_rvalue_visitor { +public: + lower_offset_array_visitor() + { + progress = false; + } + + void handle_rvalue(ir_rvalue **rv); + + bool progress; +}; + +void +lower_offset_array_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL || (*rv)->ir_type != ir_type_texture) + return; + + ir_texture *ir = (ir_texture *) *rv; + if (ir->op != ir_tg4 || !ir->offset || !ir->offset->type->is_array()) + return; + + void *mem_ctx = ralloc_parent(ir); + + ir_variable *var = + new (mem_ctx) ir_variable(ir->type, "result", ir_var_temporary); + base_ir->insert_before(var); + + for (int i = 0; i < 4; i++) { + ir_texture *tex = ir->clone(mem_ctx, NULL); + tex->offset = new (mem_ctx) ir_dereference_array(tex->offset, + new (mem_ctx) ir_constant(i)); + + base_ir->insert_before(assign(var, swizzle_w(tex), 1 << i)); + } + + *rv = new (mem_ctx) ir_dereference_variable(var); + + progress = true; +} + +bool +lower_offset_arrays(exec_list *instructions) +{ + lower_offset_array_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_output_reads.cpp b/src/compiler/glsl/lower_output_reads.cpp new file mode 100644 index 0000000..79488df --- /dev/null +++ b/src/compiler/glsl/lower_output_reads.cpp @@ -0,0 +1,178 @@ +/* + * Copyright © 2012 Vincent Lejeune + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "program/hash_table.h" + +/** + * \file lower_output_reads.cpp + * + * In GLSL, shader output variables (such as varyings) can be both read and + * written. However, on some hardware, reading an output register causes + * trouble. + * + * This pass creates temporary shadow copies of every (used) shader output, + * and replaces all accesses to use those instead. It also adds code to the + * main() function to copy the final values to the actual shader outputs. + */ + +namespace { + +class output_read_remover : public ir_hierarchical_visitor { +protected: + /** + * A hash table mapping from the original ir_variable shader outputs + * (ir_var_shader_out mode) to the new temporaries to be used instead. + */ + hash_table *replacements; + + void *mem_ctx; + + unsigned stage; +public: + output_read_remover(unsigned stage); + ~output_read_remover(); + virtual ir_visitor_status visit(class ir_dereference_variable *); + virtual ir_visitor_status visit_leave(class ir_emit_vertex *); + virtual ir_visitor_status visit_leave(class ir_return *); + virtual ir_visitor_status visit_leave(class ir_function_signature *); +}; + +} /* anonymous namespace */ + +/** + * Hash function for the output variables - computes the hash of the name. + * NOTE: We're using the name string to ensure that the hash doesn't depend + * on any random factors, otherwise the output_read_remover could produce + * the random order of the assignments. + * + * NOTE: If you want to reuse this function please take into account that + * generally the names of the variables are non-unique. + */ +static unsigned +hash_table_var_hash(const void *key) +{ + const ir_variable * var = static_cast<const ir_variable *>(key); + return hash_table_string_hash(var->name); +} + +output_read_remover::output_read_remover(unsigned stage) +{ + this->stage = stage; + mem_ctx = ralloc_context(NULL); + replacements = + hash_table_ctor(0, hash_table_var_hash, hash_table_pointer_compare); +} + +output_read_remover::~output_read_remover() +{ + hash_table_dtor(replacements); + ralloc_free(mem_ctx); +} + +ir_visitor_status +output_read_remover::visit(ir_dereference_variable *ir) +{ + if (ir->var->data.mode != ir_var_shader_out) + return visit_continue; + if (stage == MESA_SHADER_TESS_CTRL) + return visit_continue; + + ir_variable *temp = (ir_variable *) hash_table_find(replacements, ir->var); + + /* If we don't have an existing temporary, create one. */ + if (temp == NULL) { + void *var_ctx = ralloc_parent(ir->var); + temp = new(var_ctx) ir_variable(ir->var->type, ir->var->name, + ir_var_temporary); + hash_table_insert(replacements, temp, ir->var); + ir->var->insert_after(temp); + } + + /* Update the dereference to use the temporary */ + ir->var = temp; + + return visit_continue; +} + +/** + * Create an assignment to copy a temporary value back to the actual output. + */ +static ir_assignment * +copy(void *ctx, ir_variable *output, ir_variable *temp) +{ + ir_dereference_variable *lhs = new(ctx) ir_dereference_variable(output); + ir_dereference_variable *rhs = new(ctx) ir_dereference_variable(temp); + return new(ctx) ir_assignment(lhs, rhs); +} + +/** Insert a copy-back assignment before a "return" statement or a call to + * EmitVertex(). + */ +static void +emit_return_copy(const void *key, void *data, void *closure) +{ + ir_return *ir = (ir_return *) closure; + ir->insert_before(copy(ir, (ir_variable *) key, (ir_variable *) data)); +} + +/** Insert a copy-back assignment at the end of the main() function */ +static void +emit_main_copy(const void *key, void *data, void *closure) +{ + ir_function_signature *sig = (ir_function_signature *) closure; + sig->body.push_tail(copy(sig, (ir_variable *) key, (ir_variable *) data)); +} + +ir_visitor_status +output_read_remover::visit_leave(ir_return *ir) +{ + hash_table_call_foreach(replacements, emit_return_copy, ir); + return visit_continue; +} + +ir_visitor_status +output_read_remover::visit_leave(ir_emit_vertex *ir) +{ + hash_table_call_foreach(replacements, emit_return_copy, ir); + hash_table_clear(replacements); + return visit_continue; +} + +ir_visitor_status +output_read_remover::visit_leave(ir_function_signature *sig) +{ + if (strcmp(sig->function_name(), "main") != 0) + return visit_continue; + + hash_table_call_foreach(replacements, emit_main_copy, sig); + return visit_continue; +} + +void +lower_output_reads(unsigned stage, exec_list *instructions) +{ + output_read_remover v(stage); + visit_list_elements(&v, instructions); +} diff --git a/src/compiler/glsl/lower_packed_varyings.cpp b/src/compiler/glsl/lower_packed_varyings.cpp new file mode 100644 index 0000000..8d1eb17 --- /dev/null +++ b/src/compiler/glsl/lower_packed_varyings.cpp @@ -0,0 +1,749 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_varyings_to_packed.cpp + * + * This lowering pass generates GLSL code that manually packs varyings into + * vec4 slots, for the benefit of back-ends that don't support packed varyings + * natively. + * + * For example, the following shader: + * + * out mat3x2 foo; // location=4, location_frac=0 + * out vec3 bar[2]; // location=5, location_frac=2 + * + * main() + * { + * ... + * } + * + * Is rewritten to: + * + * mat3x2 foo; + * vec3 bar[2]; + * out vec4 packed4; // location=4, location_frac=0 + * out vec4 packed5; // location=5, location_frac=0 + * out vec4 packed6; // location=6, location_frac=0 + * + * main() + * { + * ... + * packed4.xy = foo[0]; + * packed4.zw = foo[1]; + * packed5.xy = foo[2]; + * packed5.zw = bar[0].xy; + * packed6.x = bar[0].z; + * packed6.yzw = bar[1]; + * } + * + * This lowering pass properly handles "double parking" of a varying vector + * across two varying slots. For example, in the code above, two of the + * components of bar[0] are stored in packed5, and the remaining component is + * stored in packed6. + * + * Note that in theory, the extra instructions may cause some loss of + * performance. However, hopefully in most cases the performance loss will + * either be absorbed by a later optimization pass, or it will be offset by + * memory bandwidth savings (because fewer varyings are used). + * + * This lowering pass also packs flat floats, ints, and uints together, by + * using ivec4 as the base type of flat "varyings", and using appropriate + * casts to convert floats and uints into ints. + * + * This lowering pass also handles varyings whose type is a struct or an array + * of struct. Structs are packed in order and with no gaps, so there may be a + * performance penalty due to structure elements being double-parked. + * + * Lowering of geometry shader inputs is slightly more complex, since geometry + * inputs are always arrays, so we need to lower arrays to arrays. For + * example, the following input: + * + * in struct Foo { + * float f; + * vec3 v; + * vec2 a[2]; + * } arr[3]; // location=4, location_frac=0 + * + * Would get lowered like this if it occurred in a fragment shader: + * + * struct Foo { + * float f; + * vec3 v; + * vec2 a[2]; + * } arr[3]; + * in vec4 packed4; // location=4, location_frac=0 + * in vec4 packed5; // location=5, location_frac=0 + * in vec4 packed6; // location=6, location_frac=0 + * in vec4 packed7; // location=7, location_frac=0 + * in vec4 packed8; // location=8, location_frac=0 + * in vec4 packed9; // location=9, location_frac=0 + * + * main() + * { + * arr[0].f = packed4.x; + * arr[0].v = packed4.yzw; + * arr[0].a[0] = packed5.xy; + * arr[0].a[1] = packed5.zw; + * arr[1].f = packed6.x; + * arr[1].v = packed6.yzw; + * arr[1].a[0] = packed7.xy; + * arr[1].a[1] = packed7.zw; + * arr[2].f = packed8.x; + * arr[2].v = packed8.yzw; + * arr[2].a[0] = packed9.xy; + * arr[2].a[1] = packed9.zw; + * ... + * } + * + * But it would get lowered like this if it occurred in a geometry shader: + * + * struct Foo { + * float f; + * vec3 v; + * vec2 a[2]; + * } arr[3]; + * in vec4 packed4[3]; // location=4, location_frac=0 + * in vec4 packed5[3]; // location=5, location_frac=0 + * + * main() + * { + * arr[0].f = packed4[0].x; + * arr[0].v = packed4[0].yzw; + * arr[0].a[0] = packed5[0].xy; + * arr[0].a[1] = packed5[0].zw; + * arr[1].f = packed4[1].x; + * arr[1].v = packed4[1].yzw; + * arr[1].a[0] = packed5[1].xy; + * arr[1].a[1] = packed5[1].zw; + * arr[2].f = packed4[2].x; + * arr[2].v = packed4[2].yzw; + * arr[2].a[0] = packed5[2].xy; + * arr[2].a[1] = packed5[2].zw; + * ... + * } + */ + +#include "glsl_symbol_table.h" +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" +#include "program/prog_instruction.h" + +using namespace ir_builder; + +namespace { + +/** + * Visitor that performs varying packing. For each varying declared in the + * shader, this visitor determines whether it needs to be packed. If so, it + * demotes it to an ordinary global, creates new packed varyings, and + * generates assignments to convert between the original varying and the + * packed varying. + */ +class lower_packed_varyings_visitor +{ +public: + lower_packed_varyings_visitor(void *mem_ctx, unsigned locations_used, + ir_variable_mode mode, + unsigned gs_input_vertices, + exec_list *out_instructions, + exec_list *out_variables); + + void run(struct gl_shader *shader); + +private: + void bitwise_assign_pack(ir_rvalue *lhs, ir_rvalue *rhs); + void bitwise_assign_unpack(ir_rvalue *lhs, ir_rvalue *rhs); + unsigned lower_rvalue(ir_rvalue *rvalue, unsigned fine_location, + ir_variable *unpacked_var, const char *name, + bool gs_input_toplevel, unsigned vertex_index); + unsigned lower_arraylike(ir_rvalue *rvalue, unsigned array_size, + unsigned fine_location, + ir_variable *unpacked_var, const char *name, + bool gs_input_toplevel, unsigned vertex_index); + ir_dereference *get_packed_varying_deref(unsigned location, + ir_variable *unpacked_var, + const char *name, + unsigned vertex_index); + bool needs_lowering(ir_variable *var); + + /** + * Memory context used to allocate new instructions for the shader. + */ + void * const mem_ctx; + + /** + * Number of generic varying slots which are used by this shader. This is + * used to allocate temporary intermediate data structures. If any varying + * used by this shader has a location greater than or equal to + * VARYING_SLOT_VAR0 + locations_used, an assertion will fire. + */ + const unsigned locations_used; + + /** + * Array of pointers to the packed varyings that have been created for each + * generic varying slot. NULL entries in this array indicate varying slots + * for which a packed varying has not been created yet. + */ + ir_variable **packed_varyings; + + /** + * Type of varying which is being lowered in this pass (either + * ir_var_shader_in or ir_var_shader_out). + */ + const ir_variable_mode mode; + + /** + * If we are currently lowering geometry shader inputs, the number of input + * vertices the geometry shader accepts. Otherwise zero. + */ + const unsigned gs_input_vertices; + + /** + * Exec list into which the visitor should insert the packing instructions. + * Caller provides this list; it should insert the instructions into the + * appropriate place in the shader once the visitor has finished running. + */ + exec_list *out_instructions; + + /** + * Exec list into which the visitor should insert any new variables. + */ + exec_list *out_variables; +}; + +} /* anonymous namespace */ + +lower_packed_varyings_visitor::lower_packed_varyings_visitor( + void *mem_ctx, unsigned locations_used, ir_variable_mode mode, + unsigned gs_input_vertices, exec_list *out_instructions, + exec_list *out_variables) + : mem_ctx(mem_ctx), + locations_used(locations_used), + packed_varyings((ir_variable **) + rzalloc_array_size(mem_ctx, sizeof(*packed_varyings), + locations_used)), + mode(mode), + gs_input_vertices(gs_input_vertices), + out_instructions(out_instructions), + out_variables(out_variables) +{ +} + +void +lower_packed_varyings_visitor::run(struct gl_shader *shader) +{ + foreach_in_list(ir_instruction, node, shader->ir) { + ir_variable *var = node->as_variable(); + if (var == NULL) + continue; + + if (var->data.mode != this->mode || + var->data.location < VARYING_SLOT_VAR0 || + !this->needs_lowering(var)) + continue; + + /* This lowering pass is only capable of packing floats and ints + * together when their interpolation mode is "flat". Therefore, to be + * safe, caller should ensure that integral varyings always use flat + * interpolation, even when this is not required by GLSL. + */ + assert(var->data.interpolation == INTERP_QUALIFIER_FLAT || + !var->type->contains_integer()); + + /* Clone the variable for program resource list before + * it gets modified and lost. + */ + if (!shader->packed_varyings) + shader->packed_varyings = new (shader) exec_list; + + shader->packed_varyings->push_tail(var->clone(shader, NULL)); + + /* Change the old varying into an ordinary global. */ + assert(var->data.mode != ir_var_temporary); + var->data.mode = ir_var_auto; + + /* Create a reference to the old varying. */ + ir_dereference_variable *deref + = new(this->mem_ctx) ir_dereference_variable(var); + + /* Recursively pack or unpack it. */ + this->lower_rvalue(deref, var->data.location * 4 + var->data.location_frac, var, + var->name, this->gs_input_vertices != 0, 0); + } +} + +#define SWIZZLE_ZWZW MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W) + +/** + * Make an ir_assignment from \c rhs to \c lhs, performing appropriate + * bitcasts if necessary to match up types. + * + * This function is called when packing varyings. + */ +void +lower_packed_varyings_visitor::bitwise_assign_pack(ir_rvalue *lhs, + ir_rvalue *rhs) +{ + if (lhs->type->base_type != rhs->type->base_type) { + /* Since we only mix types in flat varyings, and we always store flat + * varyings as type ivec4, we need only produce conversions from (uint + * or float) to int. + */ + assert(lhs->type->base_type == GLSL_TYPE_INT); + switch (rhs->type->base_type) { + case GLSL_TYPE_UINT: + rhs = new(this->mem_ctx) + ir_expression(ir_unop_u2i, lhs->type, rhs); + break; + case GLSL_TYPE_FLOAT: + rhs = new(this->mem_ctx) + ir_expression(ir_unop_bitcast_f2i, lhs->type, rhs); + break; + case GLSL_TYPE_DOUBLE: + assert(rhs->type->vector_elements <= 2); + if (rhs->type->vector_elements == 2) { + ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "pack", ir_var_temporary); + + assert(lhs->type->vector_elements == 4); + this->out_variables->push_tail(t); + this->out_instructions->push_tail( + assign(t, u2i(expr(ir_unop_unpack_double_2x32, swizzle_x(rhs->clone(mem_ctx, NULL)))), 0x3)); + this->out_instructions->push_tail( + assign(t, u2i(expr(ir_unop_unpack_double_2x32, swizzle_y(rhs))), 0xc)); + rhs = deref(t).val; + } else { + rhs = u2i(expr(ir_unop_unpack_double_2x32, rhs)); + } + break; + default: + assert(!"Unexpected type conversion while lowering varyings"); + break; + } + } + this->out_instructions->push_tail(new (this->mem_ctx) ir_assignment(lhs, rhs)); +} + + +/** + * Make an ir_assignment from \c rhs to \c lhs, performing appropriate + * bitcasts if necessary to match up types. + * + * This function is called when unpacking varyings. + */ +void +lower_packed_varyings_visitor::bitwise_assign_unpack(ir_rvalue *lhs, + ir_rvalue *rhs) +{ + if (lhs->type->base_type != rhs->type->base_type) { + /* Since we only mix types in flat varyings, and we always store flat + * varyings as type ivec4, we need only produce conversions from int to + * (uint or float). + */ + assert(rhs->type->base_type == GLSL_TYPE_INT); + switch (lhs->type->base_type) { + case GLSL_TYPE_UINT: + rhs = new(this->mem_ctx) + ir_expression(ir_unop_i2u, lhs->type, rhs); + break; + case GLSL_TYPE_FLOAT: + rhs = new(this->mem_ctx) + ir_expression(ir_unop_bitcast_i2f, lhs->type, rhs); + break; + case GLSL_TYPE_DOUBLE: + assert(lhs->type->vector_elements <= 2); + if (lhs->type->vector_elements == 2) { + ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "unpack", ir_var_temporary); + assert(rhs->type->vector_elements == 4); + this->out_variables->push_tail(t); + this->out_instructions->push_tail( + assign(t, expr(ir_unop_pack_double_2x32, i2u(swizzle_xy(rhs->clone(mem_ctx, NULL)))), 0x1)); + this->out_instructions->push_tail( + assign(t, expr(ir_unop_pack_double_2x32, i2u(swizzle(rhs->clone(mem_ctx, NULL), SWIZZLE_ZWZW, 2))), 0x2)); + rhs = deref(t).val; + } else { + rhs = expr(ir_unop_pack_double_2x32, i2u(rhs)); + } + break; + default: + assert(!"Unexpected type conversion while lowering varyings"); + break; + } + } + this->out_instructions->push_tail(new(this->mem_ctx) ir_assignment(lhs, rhs)); +} + + +/** + * Recursively pack or unpack the given varying (or portion of a varying) by + * traversing all of its constituent vectors. + * + * \param fine_location is the location where the first constituent vector + * should be packed--the word "fine" indicates that this location is expressed + * in multiples of a float, rather than multiples of a vec4 as is used + * elsewhere in Mesa. + * + * \param gs_input_toplevel should be set to true if we are lowering geometry + * shader inputs, and we are currently lowering the whole input variable + * (i.e. we are lowering the array whose index selects the vertex). + * + * \param vertex_index: if we are lowering geometry shader inputs, and the + * level of the array that we are currently lowering is *not* the top level, + * then this indicates which vertex we are currently lowering. Otherwise it + * is ignored. + * + * \return the location where the next constituent vector (after this one) + * should be packed. + */ +unsigned +lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue, + unsigned fine_location, + ir_variable *unpacked_var, + const char *name, + bool gs_input_toplevel, + unsigned vertex_index) +{ + unsigned dmul = rvalue->type->is_double() ? 2 : 1; + /* When gs_input_toplevel is set, we should be looking at a geometry shader + * input array. + */ + assert(!gs_input_toplevel || rvalue->type->is_array()); + + if (rvalue->type->is_record()) { + for (unsigned i = 0; i < rvalue->type->length; i++) { + if (i != 0) + rvalue = rvalue->clone(this->mem_ctx, NULL); + const char *field_name = rvalue->type->fields.structure[i].name; + ir_dereference_record *dereference_record = new(this->mem_ctx) + ir_dereference_record(rvalue, field_name); + char *deref_name + = ralloc_asprintf(this->mem_ctx, "%s.%s", name, field_name); + fine_location = this->lower_rvalue(dereference_record, fine_location, + unpacked_var, deref_name, false, + vertex_index); + } + return fine_location; + } else if (rvalue->type->is_array()) { + /* Arrays are packed/unpacked by considering each array element in + * sequence. + */ + return this->lower_arraylike(rvalue, rvalue->type->array_size(), + fine_location, unpacked_var, name, + gs_input_toplevel, vertex_index); + } else if (rvalue->type->is_matrix()) { + /* Matrices are packed/unpacked by considering each column vector in + * sequence. + */ + return this->lower_arraylike(rvalue, rvalue->type->matrix_columns, + fine_location, unpacked_var, name, + false, vertex_index); + } else if (rvalue->type->vector_elements * dmul + + fine_location % 4 > 4) { + /* This vector is going to be "double parked" across two varying slots, + * so handle it as two separate assignments. For doubles, a dvec3/dvec4 + * can end up being spread over 3 slots. However the second splitting + * will happen later, here we just always want to split into 2. + */ + unsigned left_components, right_components; + unsigned left_swizzle_values[4] = { 0, 0, 0, 0 }; + unsigned right_swizzle_values[4] = { 0, 0, 0, 0 }; + char left_swizzle_name[4] = { 0, 0, 0, 0 }; + char right_swizzle_name[4] = { 0, 0, 0, 0 }; + + left_components = 4 - fine_location % 4; + if (rvalue->type->is_double()) { + /* We might actually end up with 0 left components! */ + left_components /= 2; + } + right_components = rvalue->type->vector_elements - left_components; + + for (unsigned i = 0; i < left_components; i++) { + left_swizzle_values[i] = i; + left_swizzle_name[i] = "xyzw"[i]; + } + for (unsigned i = 0; i < right_components; i++) { + right_swizzle_values[i] = i + left_components; + right_swizzle_name[i] = "xyzw"[i + left_components]; + } + ir_swizzle *left_swizzle = new(this->mem_ctx) + ir_swizzle(rvalue, left_swizzle_values, left_components); + ir_swizzle *right_swizzle = new(this->mem_ctx) + ir_swizzle(rvalue->clone(this->mem_ctx, NULL), right_swizzle_values, + right_components); + char *left_name + = ralloc_asprintf(this->mem_ctx, "%s.%s", name, left_swizzle_name); + char *right_name + = ralloc_asprintf(this->mem_ctx, "%s.%s", name, right_swizzle_name); + if (left_components) + fine_location = this->lower_rvalue(left_swizzle, fine_location, + unpacked_var, left_name, false, + vertex_index); + else + /* Top up the fine location to the next slot */ + fine_location++; + return this->lower_rvalue(right_swizzle, fine_location, unpacked_var, + right_name, false, vertex_index); + } else { + /* No special handling is necessary; pack the rvalue into the + * varying. + */ + unsigned swizzle_values[4] = { 0, 0, 0, 0 }; + unsigned components = rvalue->type->vector_elements * dmul; + unsigned location = fine_location / 4; + unsigned location_frac = fine_location % 4; + for (unsigned i = 0; i < components; ++i) + swizzle_values[i] = i + location_frac; + ir_dereference *packed_deref = + this->get_packed_varying_deref(location, unpacked_var, name, + vertex_index); + ir_swizzle *swizzle = new(this->mem_ctx) + ir_swizzle(packed_deref, swizzle_values, components); + if (this->mode == ir_var_shader_out) { + this->bitwise_assign_pack(swizzle, rvalue); + } else { + this->bitwise_assign_unpack(rvalue, swizzle); + } + return fine_location + components; + } +} + +/** + * Recursively pack or unpack a varying for which we need to iterate over its + * constituent elements, accessing each one using an ir_dereference_array. + * This takes care of both arrays and matrices, since ir_dereference_array + * treats a matrix like an array of its column vectors. + * + * \param gs_input_toplevel should be set to true if we are lowering geometry + * shader inputs, and we are currently lowering the whole input variable + * (i.e. we are lowering the array whose index selects the vertex). + * + * \param vertex_index: if we are lowering geometry shader inputs, and the + * level of the array that we are currently lowering is *not* the top level, + * then this indicates which vertex we are currently lowering. Otherwise it + * is ignored. + */ +unsigned +lower_packed_varyings_visitor::lower_arraylike(ir_rvalue *rvalue, + unsigned array_size, + unsigned fine_location, + ir_variable *unpacked_var, + const char *name, + bool gs_input_toplevel, + unsigned vertex_index) +{ + for (unsigned i = 0; i < array_size; i++) { + if (i != 0) + rvalue = rvalue->clone(this->mem_ctx, NULL); + ir_constant *constant = new(this->mem_ctx) ir_constant(i); + ir_dereference_array *dereference_array = new(this->mem_ctx) + ir_dereference_array(rvalue, constant); + if (gs_input_toplevel) { + /* Geometry shader inputs are a special case. Instead of storing + * each element of the array at a different location, all elements + * are at the same location, but with a different vertex index. + */ + (void) this->lower_rvalue(dereference_array, fine_location, + unpacked_var, name, false, i); + } else { + char *subscripted_name + = ralloc_asprintf(this->mem_ctx, "%s[%d]", name, i); + fine_location = + this->lower_rvalue(dereference_array, fine_location, + unpacked_var, subscripted_name, + false, vertex_index); + } + } + return fine_location; +} + +/** + * Retrieve the packed varying corresponding to the given varying location. + * If no packed varying has been created for the given varying location yet, + * create it and add it to the shader before returning it. + * + * The newly created varying inherits its interpolation parameters from \c + * unpacked_var. Its base type is ivec4 if we are lowering a flat varying, + * vec4 otherwise. + * + * \param vertex_index: if we are lowering geometry shader inputs, then this + * indicates which vertex we are currently lowering. Otherwise it is ignored. + */ +ir_dereference * +lower_packed_varyings_visitor::get_packed_varying_deref( + unsigned location, ir_variable *unpacked_var, const char *name, + unsigned vertex_index) +{ + unsigned slot = location - VARYING_SLOT_VAR0; + assert(slot < locations_used); + if (this->packed_varyings[slot] == NULL) { + char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name); + const glsl_type *packed_type; + if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT) + packed_type = glsl_type::ivec4_type; + else + packed_type = glsl_type::vec4_type; + if (this->gs_input_vertices != 0) { + packed_type = + glsl_type::get_array_instance(packed_type, + this->gs_input_vertices); + } + ir_variable *packed_var = new(this->mem_ctx) + ir_variable(packed_type, packed_name, this->mode); + if (this->gs_input_vertices != 0) { + /* Prevent update_array_sizes() from messing with the size of the + * array. + */ + packed_var->data.max_array_access = this->gs_input_vertices - 1; + } + packed_var->data.centroid = unpacked_var->data.centroid; + packed_var->data.sample = unpacked_var->data.sample; + packed_var->data.patch = unpacked_var->data.patch; + packed_var->data.interpolation = unpacked_var->data.interpolation; + packed_var->data.location = location; + packed_var->data.precision = unpacked_var->data.precision; + packed_var->data.always_active_io = unpacked_var->data.always_active_io; + unpacked_var->insert_before(packed_var); + this->packed_varyings[slot] = packed_var; + } else { + /* For geometry shader inputs, only update the packed variable name the + * first time we visit each component. + */ + if (this->gs_input_vertices == 0 || vertex_index == 0) { + ralloc_asprintf_append((char **) &this->packed_varyings[slot]->name, + ",%s", name); + } + } + + ir_dereference *deref = new(this->mem_ctx) + ir_dereference_variable(this->packed_varyings[slot]); + if (this->gs_input_vertices != 0) { + /* When lowering GS inputs, the packed variable is an array, so we need + * to dereference it using vertex_index. + */ + ir_constant *constant = new(this->mem_ctx) ir_constant(vertex_index); + deref = new(this->mem_ctx) ir_dereference_array(deref, constant); + } + return deref; +} + +bool +lower_packed_varyings_visitor::needs_lowering(ir_variable *var) +{ + /* Things composed of vec4's and varyings with explicitly assigned + * locations don't need lowering. Everything else does. + */ + if (var->data.explicit_location) + return false; + + const glsl_type *type = var->type->without_array(); + if (type->vector_elements == 4 && !type->is_double()) + return false; + return true; +} + + +/** + * Visitor that splices varying packing code before every use of EmitVertex() + * in a geometry shader. + */ +class lower_packed_varyings_gs_splicer : public ir_hierarchical_visitor +{ +public: + explicit lower_packed_varyings_gs_splicer(void *mem_ctx, + const exec_list *instructions); + + virtual ir_visitor_status visit_leave(ir_emit_vertex *ev); + +private: + /** + * Memory context used to allocate new instructions for the shader. + */ + void * const mem_ctx; + + /** + * Instructions that should be spliced into place before each EmitVertex() + * call. + */ + const exec_list *instructions; +}; + + +lower_packed_varyings_gs_splicer::lower_packed_varyings_gs_splicer( + void *mem_ctx, const exec_list *instructions) + : mem_ctx(mem_ctx), instructions(instructions) +{ +} + + +ir_visitor_status +lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev) +{ + foreach_in_list(ir_instruction, ir, this->instructions) { + ev->insert_before(ir->clone(this->mem_ctx, NULL)); + } + return visit_continue; +} + + +void +lower_packed_varyings(void *mem_ctx, unsigned locations_used, + ir_variable_mode mode, unsigned gs_input_vertices, + gl_shader *shader) +{ + exec_list *instructions = shader->ir; + ir_function *main_func = shader->symbols->get_function("main"); + exec_list void_parameters; + ir_function_signature *main_func_sig + = main_func->matching_signature(NULL, &void_parameters, false); + exec_list new_instructions, new_variables; + lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode, + gs_input_vertices, + &new_instructions, + &new_variables); + visitor.run(shader); + if (mode == ir_var_shader_out) { + if (shader->Stage == MESA_SHADER_GEOMETRY) { + /* For geometry shaders, outputs need to be lowered before each call + * to EmitVertex() + */ + lower_packed_varyings_gs_splicer splicer(mem_ctx, &new_instructions); + + /* Add all the variables in first. */ + main_func_sig->body.head->insert_before(&new_variables); + + /* Now update all the EmitVertex instances */ + splicer.run(instructions); + } else { + /* For other shader types, outputs need to be lowered at the end of + * main() + */ + main_func_sig->body.append_list(&new_variables); + main_func_sig->body.append_list(&new_instructions); + } + } else { + /* Shader inputs need to be lowered at the beginning of main() */ + main_func_sig->body.head->insert_before(&new_instructions); + main_func_sig->body.head->insert_before(&new_variables); + } +} diff --git a/src/compiler/glsl/lower_packing_builtins.cpp b/src/compiler/glsl/lower_packing_builtins.cpp new file mode 100644 index 0000000..7f18238 --- /dev/null +++ b/src/compiler/glsl/lower_packing_builtins.cpp @@ -0,0 +1,1412 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" + +namespace { + +using namespace ir_builder; + +/** + * A visitor that lowers built-in floating-point pack/unpack expressions + * such packSnorm2x16. + */ +class lower_packing_builtins_visitor : public ir_rvalue_visitor { +public: + /** + * \param op_mask is a bitmask of `enum lower_packing_builtins_op` + */ + explicit lower_packing_builtins_visitor(int op_mask) + : op_mask(op_mask), + progress(false) + { + /* Mutually exclusive options. */ + assert(!((op_mask & LOWER_PACK_HALF_2x16) && + (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT))); + + assert(!((op_mask & LOWER_UNPACK_HALF_2x16) && + (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT))); + + factory.instructions = &factory_instructions; + } + + virtual ~lower_packing_builtins_visitor() + { + assert(factory_instructions.is_empty()); + } + + bool get_progress() { return progress; } + + void handle_rvalue(ir_rvalue **rvalue) + { + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr) + return; + + enum lower_packing_builtins_op lowering_op = + choose_lowering_op(expr->operation); + + if (lowering_op == LOWER_PACK_UNPACK_NONE) + return; + + setup_factory(ralloc_parent(expr)); + + ir_rvalue *op0 = expr->operands[0]; + ralloc_steal(factory.mem_ctx, op0); + + switch (lowering_op) { + case LOWER_PACK_SNORM_2x16: + *rvalue = lower_pack_snorm_2x16(op0); + break; + case LOWER_PACK_SNORM_4x8: + *rvalue = lower_pack_snorm_4x8(op0); + break; + case LOWER_PACK_UNORM_2x16: + *rvalue = lower_pack_unorm_2x16(op0); + break; + case LOWER_PACK_UNORM_4x8: + *rvalue = lower_pack_unorm_4x8(op0); + break; + case LOWER_PACK_HALF_2x16: + *rvalue = lower_pack_half_2x16(op0); + break; + case LOWER_PACK_HALF_2x16_TO_SPLIT: + *rvalue = split_pack_half_2x16(op0); + break; + case LOWER_UNPACK_SNORM_2x16: + *rvalue = lower_unpack_snorm_2x16(op0); + break; + case LOWER_UNPACK_SNORM_4x8: + *rvalue = lower_unpack_snorm_4x8(op0); + break; + case LOWER_UNPACK_UNORM_2x16: + *rvalue = lower_unpack_unorm_2x16(op0); + break; + case LOWER_UNPACK_UNORM_4x8: + *rvalue = lower_unpack_unorm_4x8(op0); + break; + case LOWER_UNPACK_HALF_2x16: + *rvalue = lower_unpack_half_2x16(op0); + break; + case LOWER_UNPACK_HALF_2x16_TO_SPLIT: + *rvalue = split_unpack_half_2x16(op0); + break; + case LOWER_PACK_UNPACK_NONE: + case LOWER_PACK_USE_BFI: + case LOWER_PACK_USE_BFE: + assert(!"not reached"); + break; + } + + teardown_factory(); + progress = true; + } + +private: + const int op_mask; + bool progress; + ir_factory factory; + exec_list factory_instructions; + + /** + * Determine the needed lowering operation by filtering \a expr_op + * through \ref op_mask. + */ + enum lower_packing_builtins_op + choose_lowering_op(ir_expression_operation expr_op) + { + /* C++ regards int and enum as fundamentally different types. + * So, we can't simply return from each case; we must cast the return + * value. + */ + int result; + + switch (expr_op) { + case ir_unop_pack_snorm_2x16: + result = op_mask & LOWER_PACK_SNORM_2x16; + break; + case ir_unop_pack_snorm_4x8: + result = op_mask & LOWER_PACK_SNORM_4x8; + break; + case ir_unop_pack_unorm_2x16: + result = op_mask & LOWER_PACK_UNORM_2x16; + break; + case ir_unop_pack_unorm_4x8: + result = op_mask & LOWER_PACK_UNORM_4x8; + break; + case ir_unop_pack_half_2x16: + result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); + break; + case ir_unop_unpack_snorm_2x16: + result = op_mask & LOWER_UNPACK_SNORM_2x16; + break; + case ir_unop_unpack_snorm_4x8: + result = op_mask & LOWER_UNPACK_SNORM_4x8; + break; + case ir_unop_unpack_unorm_2x16: + result = op_mask & LOWER_UNPACK_UNORM_2x16; + break; + case ir_unop_unpack_unorm_4x8: + result = op_mask & LOWER_UNPACK_UNORM_4x8; + break; + case ir_unop_unpack_half_2x16: + result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); + break; + default: + result = LOWER_PACK_UNPACK_NONE; + break; + } + + return static_cast<enum lower_packing_builtins_op>(result); + } + + void + setup_factory(void *mem_ctx) + { + assert(factory.mem_ctx == NULL); + assert(factory.instructions->is_empty()); + + factory.mem_ctx = mem_ctx; + } + + void + teardown_factory() + { + base_ir->insert_before(factory.instructions); + assert(factory.instructions->is_empty()); + factory.mem_ctx = NULL; + } + + template <typename T> + ir_constant* + constant(T x) + { + return factory.constant(x); + } + + /** + * \brief Pack two uint16's into a single uint32. + * + * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32 + * where the least significant bits specify the first element of the pair. + * Return the uint32. + */ + ir_rvalue* + pack_uvec2_to_uint(ir_rvalue *uvec2_rval) + { + assert(uvec2_rval->type == glsl_type::uvec2_type); + + /* uvec2 u = UVEC2_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_uvec2_to_uint"); + factory.emit(assign(u, uvec2_rval)); + + if (op_mask & LOWER_PACK_USE_BFI) { + return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), + swizzle_y(u), + constant(16u), + constant(16u)); + } + + /* return (u.y << 16) | (u.x & 0xffff); */ + return bit_or(lshift(swizzle_y(u), constant(16u)), + bit_and(swizzle_x(u), constant(0xffffu))); + } + + /** + * \brief Pack four uint8's into a single uint32. + * + * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a + * uint32 where the least significant bits specify the first element of the + * 4-tuple. Return the uint32. + */ + ir_rvalue* + pack_uvec4_to_uint(ir_rvalue *uvec4_rval) + { + assert(uvec4_rval->type == glsl_type::uvec4_type); + + ir_variable *u = factory.make_temp(glsl_type::uvec4_type, + "tmp_pack_uvec4_to_uint"); + + if (op_mask & LOWER_PACK_USE_BFI) { + /* uvec4 u = UVEC4_RVAL; */ + factory.emit(assign(u, uvec4_rval)); + + return bitfield_insert(bitfield_insert( + bitfield_insert( + bit_and(swizzle_x(u), constant(0xffu)), + swizzle_y(u), constant(8u), constant(8u)), + swizzle_z(u), constant(16u), constant(8u)), + swizzle_w(u), constant(24u), constant(8u)); + } + + /* uvec4 u = UVEC4_RVAL & 0xff */ + factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); + + /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ + return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)), + lshift(swizzle_z(u), constant(16u))), + bit_or(lshift(swizzle_y(u), constant(8u)), + swizzle_x(u))); + } + + /** + * \brief Unpack a uint32 into two uint16's. + * + * Interpret the given uint32 as a uint16 pair where the uint32's least + * significant bits specify the pair's first element. Return the uint16 + * pair as a uvec2. + */ + ir_rvalue* + unpack_uint_to_uvec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = UINT_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_uint_to_uvec2_u"); + factory.emit(assign(u, uint_rval)); + + /* uvec2 u2; */ + ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_uint_to_uvec2_u2"); + + /* u2.x = u & 0xffffu; */ + factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X)); + + /* u2.y = u >> 16u; */ + factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y)); + + return deref(u2).val; + } + + /** + * \brief Unpack a uint32 into two int16's. + * + * Specifically each 16-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), + constant(16u)), + constant(16u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec2_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec2 i2; */ + ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, + "tmp_unpack_uint_to_ivec2_i2"); + + factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), + WRITEMASK_X)); + factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), + WRITEMASK_Y)); + + return deref(i2).val; + } + + /** + * \brief Unpack a uint32 into four uint8's. + * + * Interpret the given uint32 as a uint8 4-tuple where the uint32's least + * significant bits specify the 4-tuple's first element. Return the uint8 + * 4-tuple as a uvec4. + */ + ir_rvalue* + unpack_uint_to_uvec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = UINT_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_uint_to_uvec4_u"); + factory.emit(assign(u, uint_rval)); + + /* uvec4 u4; */ + ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, + "tmp_unpack_uint_to_uvec4_u4"); + + /* u4.x = u & 0xffu; */ + factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); + + if (op_mask & LOWER_PACK_USE_BFE) { + /* u4.y = bitfield_extract(u, 8, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(8u), constant(8u)), + WRITEMASK_Y)); + + /* u4.z = bitfield_extract(u, 16, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(16u), constant(8u)), + WRITEMASK_Z)); + } else { + /* u4.y = (u >> 8u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), + constant(0xffu)), WRITEMASK_Y)); + + /* u4.z = (u >> 16u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), + constant(0xffu)), WRITEMASK_Z)); + } + + /* u4.w = (u >> 24u) */ + factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); + + return deref(u4).val; + } + + /** + * \brief Unpack a uint32 into four int8's. + * + * Specifically each 8-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), + constant(24u)), + constant(24u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec4_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec4 i4; */ + ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, + "tmp_unpack_uint_to_ivec4_i4"); + + factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), + WRITEMASK_X)); + factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), + WRITEMASK_Y)); + factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), + WRITEMASK_Z)); + factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), + WRITEMASK_W)); + + return deref(i4).val; + } + + /** + * \brief Lower a packSnorm2x16 expression. + * + * \param vec2_rval is packSnorm2x16's input + * \return packSnorm2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_snorm_2x16(ir_rvalue *vec2_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packSnorm2x16(vec2 v) + * -------------------------------- + * First, converts each component of the normalized floating-point value + * v into 16-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec2_to_uint( + * uvec2(ivec2( + * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f)))); + * + * It is necessary to first convert the vec2 to ivec2 rather than directly + * converting vec2 to uvec2 because the latter conversion is undefined. + * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to + * convert a negative floating point value to an uint". + */ + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_rvalue *result = pack_uvec2_to_uint( + i2u(f2i(round_even(mul(clamp(vec2_rval, + constant(-1.0f), + constant(1.0f)), + constant(32767.0f)))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower a packSnorm4x8 expression. + * + * \param vec4_rval is packSnorm4x8's input + * \return packSnorm4x8's output as a uint rvalue + */ + ir_rvalue* + lower_pack_snorm_4x8(ir_rvalue *vec4_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp uint packSnorm4x8(vec4 v) + * ------------------------------- + * First, converts each component of the normalized floating-point value + * v into 8-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec4_to_uint( + * uvec4(ivec4( + * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); + * + * It is necessary to first convert the vec4 to ivec4 rather than directly + * converting vec4 to uvec4 because the latter conversion is undefined. + * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to + * convert a negative floating point value to an uint". + */ + assert(vec4_rval->type == glsl_type::vec4_type); + + ir_rvalue *result = pack_uvec4_to_uint( + i2u(f2i(round_even(mul(clamp(vec4_rval, + constant(-1.0f), + constant(1.0f)), + constant(127.0f)))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower an unpackSnorm2x16 expression. + * + * \param uint_rval is unpackSnorm2x16's input + * \return unpackSnorm2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_snorm_2x16(ir_rvalue *uint_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp vec2 unpackSnorm2x16 (highp uint p) + * ----------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into a pair of + * 16-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm2x16: clamp(f / 32767.0, -1,+1) + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return clamp( + * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f, + * -1.0f, 1.0f); + * + * The above IR may appear unnecessarily complex, but the intermediate + * conversion to ivec2 and the bit shifts are necessary to correctly unpack + * negative floats. + * + * To see why, consider packing and then unpacking vec2(-1.0, 0.0). + * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we + * place that int16 into an int32, which results in the *positive* integer + * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather + * unimportant bit 16. We must now extend the int16's sign bit into bits + * 17-32, which is accomplished by left-shifting then right-shifting. + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = + clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), + constant(32767.0f)), + constant(-1.0f), + constant(1.0f)); + + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Lower an unpackSnorm4x8 expression. + * + * \param uint_rval is unpackSnorm4x8's input + * \return unpackSnorm4x8's output as a vec4 rvalue + */ + ir_rvalue* + lower_unpack_snorm_4x8(ir_rvalue *uint_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp vec4 unpackSnorm4x8 (highp uint p) + * ---------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into four + * 8-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * four-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return clamp( + * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, + * -1.0f, 1.0f); + * + * The above IR may appear unnecessarily complex, but the intermediate + * conversion to ivec4 and the bit shifts are necessary to correctly unpack + * negative floats. + * + * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, + * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we + * place that int8 into an int32, which results in the *positive* integer + * 0x000000ff. The int8's sign bit becomes, in the int32, the rather + * unimportant bit 8. We must now extend the int8's sign bit into bits + * 9-32, which is accomplished by left-shifting then right-shifting. + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = + clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), + constant(127.0f)), + constant(-1.0f), + constant(1.0f)); + + assert(result->type == glsl_type::vec4_type); + return result; + } + + /** + * \brief Lower a packUnorm2x16 expression. + * + * \param vec2_rval is packUnorm2x16's input + * \return packUnorm2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_unorm_2x16(ir_rvalue *vec2_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packUnorm2x16 (vec2 v) + * --------------------------------- + * First, converts each component of the normalized floating-point value + * v into 16-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec2_to_uint(uvec2( + * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f))); + * + * Here it is safe to directly convert the vec2 to uvec2 because the vec2 + * has been clamped to a non-negative range. + */ + + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_rvalue *result = pack_uvec2_to_uint( + f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower a packUnorm4x8 expression. + * + * \param vec4_rval is packUnorm4x8's input + * \return packUnorm4x8's output as a uint rvalue + */ + ir_rvalue* + lower_pack_unorm_4x8(ir_rvalue *vec4_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp uint packUnorm4x8 (vec4 v) + * -------------------------------- + * First, converts each component of the normalized floating-point value + * v into 8-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec4_to_uint(uvec4( + * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); + * + * Here it is safe to directly convert the vec4 to uvec4 because the vec4 + * has been clamped to a non-negative range. + */ + + assert(vec4_rval->type == glsl_type::vec4_type); + + ir_rvalue *result = pack_uvec4_to_uint( + f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower an unpackUnorm2x16 expression. + * + * \param uint_rval is unpackUnorm2x16's input + * \return unpackUnorm2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_unorm_2x16(ir_rvalue *uint_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * highp vec2 unpackUnorm2x16 (highp uint p) + * ----------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into a pair of + * 16-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm2x16: f / 65535.0 + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0; + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)), + constant(65535.0f)); + + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Lower an unpackUnorm4x8 expression. + * + * \param uint_rval is unpackUnorm4x8's input + * \return unpackUnorm4x8's output as a vec4 rvalue + */ + ir_rvalue* + lower_unpack_unorm_4x8(ir_rvalue *uint_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp vec4 unpackUnorm4x8 (highp uint p) + * ---------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into four + * 8-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm4x8: f / 255.0 + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), + constant(255.0f)); + + assert(result->type == glsl_type::vec4_type); + return result; + } + + /** + * \brief Lower the component-wise calculation of packHalf2x16. + * + * \param f_rval is one component of packHafl2x16's input + * \param e_rval is the unshifted exponent bits of f_rval + * \param m_rval is the unshifted mantissa bits of f_rval + * + * \return a uint rvalue that encodes a float16 in its lower 16 bits + */ + ir_rvalue* + pack_half_1x16_nosign(ir_rvalue *f_rval, + ir_rvalue *e_rval, + ir_rvalue *m_rval) + { + assert(e_rval->type == glsl_type::uint_type); + assert(m_rval->type == glsl_type::uint_type); + + /* uint u16; */ + ir_variable *u16 = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_u16"); + + /* float f = FLOAT_RVAL; */ + ir_variable *f = factory.make_temp(glsl_type::float_type, + "tmp_pack_half_1x16_f"); + factory.emit(assign(f, f_rval)); + + /* uint e = E_RVAL; */ + ir_variable *e = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_e"); + factory.emit(assign(e, e_rval)); + + /* uint m = M_RVAL; */ + ir_variable *m = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_m"); + factory.emit(assign(m, m_rval)); + + /* Preliminaries + * ------------- + * + * For a float16, the bit layout is: + * + * sign: 15 + * exponent: 10:14 + * mantissa: 0:9 + * + * Let f16 be a float16 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) + * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) + * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) + * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) + * if e16 = 31 and m16 != 0, then NaN (5) + * + * where 0 <= m16 < 2^10. + * + * For a float32, the bit layout is: + * + * sign: 31 + * exponent: 23:30 + * mantissa: 0:22 + * + * Let f32 be a float32 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) + * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) + * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) + * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) + * if e32 = 255 and m32 != 0, then NaN (14) + * + * where 0 <= m32 < 2^23. + * + * The minimum and maximum normal float16 values are + * + * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20) + * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21) + * + * The step at max_norm16 is + * + * max_step16 = 2^5 (22) + * + * Observe that the float16 boundary values in equations 20-21 lie in the + * range of normal float32 values. + * + * + * Rounding Behavior + * ----------------- + * Not all float32 values can be exactly represented as a float16. We + * round all such intermediate float32 values to the nearest float16; if + * the float32 is exactly between to float16 values, we round to the one + * with an even mantissa. This rounding behavior has several benefits: + * + * - It has no sign bias. + * + * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's + * GPU ISA. + * + * - By reproducing the behavior of the GPU (at least on Intel hardware), + * compile-time evaluation of constant packHalf2x16 GLSL expressions will + * result in the same value as if the expression were executed on the + * GPU. + * + * Calculation + * ----------- + * Our task is to compute s16, e16, m16 given f32. Since this function + * ignores the sign bit, assume that s32 = s16 = 0. There are several + * cases consider. + */ + + factory.emit( + + /* Case 1) f32 is NaN + * + * The resultant f16 will also be NaN. + */ + + /* if (e32 == 255 && m32 != 0) { */ + if_tree(logic_and(equal(e, constant(0xffu << 23u)), + logic_not(equal(m, constant(0u)))), + + assign(u16, constant(0x7fffu)), + + /* Case 2) f32 lies in the range [0, min_norm16). + * + * The resultant float16 will be either zero, subnormal, or normal. + * + * Solving + * + * f32 = min_norm16 (30) + * + * gives + * + * e32 = 113 and m32 = 0 (31) + * + * Therefore this case occurs if and only if + * + * e32 < 113 (32) + */ + + /* } else if (e32 < 113) { */ + if_tree(less(e, constant(113u << 23u)), + + /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */ + assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f), + constant((float) (1 << 24)))))), + + /* Case 3) f32 lies in the range + * [min_norm16, max_norm16 + max_step16). + * + * The resultant float16 will be either normal or infinite. + * + * Solving + * + * f32 = max_norm16 + max_step16 (40) + * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41) + * = 2^16 (42) + * gives + * + * e32 = 143 and m32 = 0 (43) + * + * We already solved the boundary condition f32 = min_norm16 above + * in equation 31. Therefore this case occurs if and only if + * + * 113 <= e32 and e32 < 143 + */ + + /* } else if (e32 < 143) { */ + if_tree(less(e, constant(143u << 23u)), + + /* The addition below handles the case where the mantissa rounds + * up to 1024 and bumps the exponent. + * + * u16 = ((e - (112u << 23u)) >> 13u) + * + round_to_even((float(m) / (1u << 13u)); + */ + assign(u16, add(rshift(sub(e, constant(112u << 23u)), + constant(13u)), + f2u(round_even( + div(u2f(m), constant((float) (1 << 13))))))), + + /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf]. + * + * The resultant float16 will be infinite. + * + * The cases above caught all float32 values in the range + * [0, max_norm16 + max_step16), so this is the fall-through case. + */ + + /* } else { */ + + assign(u16, constant(31u << 10u)))))); + + /* } */ + + return deref(u16).val; + } + + /** + * \brief Lower a packHalf2x16 expression. + * + * \param vec2_rval is packHalf2x16's input + * \return packHalf2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_half_2x16(ir_rvalue *vec2_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packHalf2x16 (mediump vec2 v) + * ---------------------------------------- + * Returns an unsigned integer obtained by converting the components of + * a two-component floating-point vector to the 16-bit floating-point + * representation found in the OpenGL ES Specification, and then packing + * these two 16-bit integers into a 32-bit unsigned integer. + * + * The first vector component specifies the 16 least- significant bits + * of the result; the second component specifies the 16 most-significant + * bits. + */ + + assert(vec2_rval->type == glsl_type::vec2_type); + + /* vec2 f = VEC2_RVAL; */ + ir_variable *f = factory.make_temp(glsl_type::vec2_type, + "tmp_pack_half_2x16_f"); + factory.emit(assign(f, vec2_rval)); + + /* uvec2 f32 = bitcast_f2u(f); */ + ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_f32"); + factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f))); + + /* uvec2 f16; */ + ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_f16"); + + /* Get f32's unshifted exponent bits. + * + * uvec2 e = f32 & 0x7f800000u; + */ + ir_variable *e = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_e"); + factory.emit(assign(e, bit_and(f32, constant(0x7f800000u)))); + + /* Get f32's unshifted mantissa bits. + * + * uvec2 m = f32 & 0x007fffffu; + */ + ir_variable *m = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_m"); + factory.emit(assign(m, bit_and(f32, constant(0x007fffffu)))); + + /* Set f16's exponent and mantissa bits. + * + * f16.x = pack_half_1x16_nosign(e.x, m.x); + * f16.y = pack_half_1y16_nosign(e.y, m.y); + */ + factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f), + swizzle_x(e), + swizzle_x(m)), + WRITEMASK_X)); + factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f), + swizzle_y(e), + swizzle_y(m)), + WRITEMASK_Y)); + + /* Set f16's sign bits. + * + * f16 |= (f32 & (1u << 31u) >> 16u; + */ + factory.emit( + assign(f16, bit_or(f16, + rshift(bit_and(f32, constant(1u << 31u)), + constant(16u))))); + + + /* return (f16.y << 16u) | f16.x; */ + ir_rvalue *result = bit_or(lshift(swizzle_y(f16), + constant(16u)), + swizzle_x(f16)); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Split packHalf2x16's vec2 operand into two floats. + * + * \param vec2_rval is packHalf2x16's input + * \return a uint rvalue + * + * Some code generators, such as the i965 fragment shader, require that all + * vector expressions be lowered to a sequence of scalar expressions. + * However, packHalf2x16 cannot be scalarized by the same mechanism as + * a true vector operation because its input and output have a differing + * number of vector components. + * + * This method scalarizes packHalf2x16 by transforming it from an unary + * operation having vector input to a binary operation having scalar input. + * That is, it transforms + * + * packHalf2x16(VEC2_RVAL); + * + * into + * + * vec2 v = VEC2_RVAL; + * return packHalf2x16_split(v.x, v.y); + */ + ir_rvalue* + split_pack_half_2x16(ir_rvalue *vec2_rval) + { + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_variable *v = factory.make_temp(glsl_type::vec2_type, + "tmp_split_pack_half_2x16_v"); + factory.emit(assign(v, vec2_rval)); + + return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v)); + } + + /** + * \brief Lower the component-wise calculation of unpackHalf2x16. + * + * Given a uint that encodes a float16 in its lower 16 bits, this function + * returns a uint that encodes a float32 with the same value. The sign bit + * of the float16 is ignored. + * + * \param e_rval is the unshifted exponent bits of a float16 + * \param m_rval is the unshifted mantissa bits of a float16 + * \param a uint rvalue that encodes a float32 + */ + ir_rvalue* + unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval) + { + assert(e_rval->type == glsl_type::uint_type); + assert(m_rval->type == glsl_type::uint_type); + + /* uint u32; */ + ir_variable *u32 = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_u32"); + + /* uint e = E_RVAL; */ + ir_variable *e = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_e"); + factory.emit(assign(e, e_rval)); + + /* uint m = M_RVAL; */ + ir_variable *m = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_m"); + factory.emit(assign(m, m_rval)); + + /* Preliminaries + * ------------- + * + * For a float16, the bit layout is: + * + * sign: 15 + * exponent: 10:14 + * mantissa: 0:9 + * + * Let f16 be a float16 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) + * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) + * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) + * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) + * if e16 = 31 and m16 != 0, then NaN (5) + * + * where 0 <= m16 < 2^10. + * + * For a float32, the bit layout is: + * + * sign: 31 + * exponent: 23:30 + * mantissa: 0:22 + * + * Let f32 be a float32 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) + * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) + * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) + * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) + * if e32 = 255 and m32 != 0, then NaN (14) + * + * where 0 <= m32 < 2^23. + * + * Calculation + * ----------- + * Our task is to compute s32, e32, m32 given f16. Since this function + * ignores the sign bit, assume that s32 = s16 = 0. There are several + * cases consider. + */ + + factory.emit( + + /* Case 1) f16 is zero or subnormal. + * + * The simplest method of calcuating f32 in this case is + * + * f32 = f16 (20) + * = 2^(-14) * (m16 / 2^10) (21) + * = m16 / 2^(-24) (22) + */ + + /* if (e16 == 0) { */ + if_tree(equal(e, constant(0u)), + + /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */ + assign(u32, expr(ir_unop_bitcast_f2u, + div(u2f(m), constant((float)(1 << 24))))), + + /* Case 2) f16 is normal. + * + * The equation + * + * f32 = f16 (30) + * 2^(e32 - 127) * (1 + m32 / 2^23) = (31) + * 2^(e16 - 15) * (1 + m16 / 2^10) + * + * can be decomposed into two + * + * 2^(e32 - 127) = 2^(e16 - 15) (32) + * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33) + * + * which solve to + * + * e32 = e16 + 112 (34) + * m32 = m16 * 2^13 (35) + */ + + /* } else if (e16 < 31)) { */ + if_tree(less(e, constant(31u << 10u)), + + /* u32 = ((e + (112 << 10)) | m) << 13; + */ + assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m), + constant(13u))), + + + /* Case 3) f16 is infinite. */ + if_tree(equal(m, constant(0u)), + + assign(u32, constant(255u << 23u)), + + /* Case 4) f16 is NaN. */ + /* } else { */ + + assign(u32, constant(0x7fffffffu)))))); + + /* } */ + + return deref(u32).val; + } + + /** + * \brief Lower an unpackHalf2x16 expression. + * + * \param uint_rval is unpackHalf2x16's input + * \return unpackHalf2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_half_2x16(ir_rvalue *uint_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * mediump vec2 unpackHalf2x16 (highp uint v) + * ------------------------------------------ + * Returns a two-component floating-point vector with components + * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit + * values, interpreting those values as 16-bit floating-point numbers + * according to the OpenGL ES Specification, and converting them to + * 32-bit floating-point values. + * + * The first component of the vector is obtained from the + * 16 least-significant bits of v; the second component is obtained + * from the 16 most-significant bits of v. + */ + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = RVALUE; + * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16); + */ + ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_f16"); + factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval))); + + /* uvec2 f32; */ + ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_f32"); + + /* Get f16's unshifted exponent bits. + * + * uvec2 e = f16 & 0x7c00u; + */ + ir_variable *e = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_e"); + factory.emit(assign(e, bit_and(f16, constant(0x7c00u)))); + + /* Get f16's unshifted mantissa bits. + * + * uvec2 m = f16 & 0x03ffu; + */ + ir_variable *m = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_m"); + factory.emit(assign(m, bit_and(f16, constant(0x03ffu)))); + + /* Set f32's exponent and mantissa bits. + * + * f32.x = unpack_half_1x16_nosign(e.x, m.x); + * f32.y = unpack_half_1x16_nosign(e.y, m.y); + */ + factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e), + swizzle_x(m)), + WRITEMASK_X)); + factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e), + swizzle_y(m)), + WRITEMASK_Y)); + + /* Set f32's sign bit. + * + * f32 |= (f16 & 0x8000u) << 16u; + */ + factory.emit(assign(f32, bit_or(f32, + lshift(bit_and(f16, + constant(0x8000u)), + constant(16u))))); + + /* return bitcast_u2f(f32); */ + ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32); + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Split unpackHalf2x16 into two operations. + * + * \param uint_rval is unpackHalf2x16's input + * \return a vec2 rvalue + * + * Some code generators, such as the i965 fragment shader, require that all + * vector expressions be lowered to a sequence of scalar expressions. + * However, unpackHalf2x16 cannot be scalarized by the same method as + * a true vector operation because the number of components of its input + * and output differ. + * + * This method scalarizes unpackHalf2x16 by transforming it from a single + * operation having vec2 output to a pair of operations each having float + * output. That is, it transforms + * + * unpackHalf2x16(UINT_RVAL) + * + * into + * + * uint u = UINT_RVAL; + * vec2 v; + * + * v.x = unpackHalf2x16_split_x(u); + * v.y = unpackHalf2x16_split_y(u); + * + * return v; + */ + ir_rvalue* + split_unpack_half_2x16(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = uint_rval; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_split_unpack_half_2x16_u"); + factory.emit(assign(u, uint_rval)); + + /* vec2 v; */ + ir_variable *v = factory.make_temp(glsl_type::vec2_type, + "tmp_split_unpack_half_2x16_v"); + + /* v.x = unpack_half_2x16_split_x(u); */ + factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u), + WRITEMASK_X)); + + /* v.y = unpack_half_2x16_split_y(u); */ + factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u), + WRITEMASK_Y)); + + return deref(v).val; + } +}; + +} // namespace anonymous + +/** + * \brief Lower the builtin packing functions. + * + * \param op_mask is a bitmask of `enum lower_packing_builtins_op`. + */ +bool +lower_packing_builtins(exec_list *instructions, int op_mask) +{ + lower_packing_builtins_visitor v(op_mask); + visit_list_elements(&v, instructions, true); + return v.get_progress(); +} diff --git a/src/compiler/glsl/lower_shared_reference.cpp b/src/compiler/glsl/lower_shared_reference.cpp new file mode 100644 index 0000000..533cd92 --- /dev/null +++ b/src/compiler/glsl/lower_shared_reference.cpp @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_shared_reference.cpp + * + * IR lower pass to replace dereferences of compute shader shared variables + * with intrinsic function calls. + * + * This relieves drivers of the responsibility of allocating space for the + * shared variables in the shared memory region. + */ + +#include "lower_buffer_access.h" +#include "ir_builder.h" +#include "main/macros.h" +#include "util/list.h" +#include "glsl_parser_extras.h" + +using namespace ir_builder; + +namespace { + +struct var_offset { + struct list_head node; + const ir_variable *var; + unsigned offset; +}; + +class lower_shared_reference_visitor : + public lower_buffer_access::lower_buffer_access { +public: + + lower_shared_reference_visitor(struct gl_shader *shader) + : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u) + { + list_inithead(&var_offsets); + } + + ~lower_shared_reference_visitor() + { + ralloc_free(list_ctx); + } + + enum { + shared_load_access, + shared_store_access, + shared_atomic_access, + } buffer_access_type; + + void insert_buffer_access(void *mem_ctx, ir_dereference *deref, + const glsl_type *type, ir_rvalue *offset, + unsigned mask, int channel); + + void handle_rvalue(ir_rvalue **rvalue); + ir_visitor_status visit_enter(ir_assignment *ir); + void handle_assignment(ir_assignment *ir); + + ir_call *lower_shared_atomic_intrinsic(ir_call *ir); + ir_call *check_for_shared_atomic_intrinsic(ir_call *ir); + ir_visitor_status visit_enter(ir_call *ir); + + unsigned get_shared_offset(const ir_variable *); + + ir_call *shared_load(void *mem_ctx, const struct glsl_type *type, + ir_rvalue *offset); + ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, + unsigned write_mask); + + void *list_ctx; + struct gl_shader *shader; + struct list_head var_offsets; + unsigned shared_size; + bool progress; +}; + +unsigned +lower_shared_reference_visitor::get_shared_offset(const ir_variable *var) +{ + list_for_each_entry(var_offset, var_entry, &var_offsets, node) { + if (var_entry->var == var) + return var_entry->offset; + } + + struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset); + list_add(&new_entry->node, &var_offsets); + new_entry->var = var; + + unsigned var_align = var->type->std430_base_alignment(false); + new_entry->offset = glsl_align(shared_size, var_align); + + unsigned var_size = var->type->std430_size(false); + shared_size = new_entry->offset + var_size; + + return new_entry->offset; +} + +void +lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + if (!deref) + return; + + ir_variable *var = deref->variable_referenced(); + if (!var || var->data.mode != ir_var_shader_shared) + return; + + buffer_access_type = shared_load_access; + + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset = get_shared_offset(var); + bool row_major; + int matrix_columns; + assert(var->get_interface_type() == NULL); + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; + + setup_buffer_access(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, packing); + + /* Now that we've calculated the offset to the start of the + * dereference, walk over the type and emit loads into a temporary. + */ + const glsl_type *type = (*rvalue)->type; + ir_variable *load_var = new(mem_ctx) ir_variable(type, + "shared_load_temp", + ir_var_temporary); + base_ir->insert_before(load_var); + + ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, + "shared_load_temp_offset", + ir_var_temporary); + base_ir->insert_before(load_offset); + base_ir->insert_before(assign(load_offset, offset)); + + deref = new(mem_ctx) ir_dereference_variable(load_var); + + emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major, + matrix_columns, packing, 0); + + *rvalue = deref; + + progress = true; +} + +void +lower_shared_reference_visitor::handle_assignment(ir_assignment *ir) +{ + if (!ir || !ir->lhs) + return; + + ir_rvalue *rvalue = ir->lhs->as_rvalue(); + if (!rvalue) + return; + + ir_dereference *deref = ir->lhs->as_dereference(); + if (!deref) + return; + + ir_variable *var = ir->lhs->variable_referenced(); + if (!var || var->data.mode != ir_var_shader_shared) + return; + + buffer_access_type = shared_store_access; + + /* We have a write to a shared variable, so declare a temporary and rewrite + * the assignment so that the temporary is the LHS. + */ + void *mem_ctx = ralloc_parent(shader->ir); + + const glsl_type *type = rvalue->type; + ir_variable *store_var = new(mem_ctx) ir_variable(type, + "shared_store_temp", + ir_var_temporary); + base_ir->insert_before(store_var); + ir->lhs = new(mem_ctx) ir_dereference_variable(store_var); + + ir_rvalue *offset = NULL; + unsigned const_offset = get_shared_offset(var); + bool row_major; + int matrix_columns; + assert(var->get_interface_type() == NULL); + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; + + setup_buffer_access(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, packing); + + deref = new(mem_ctx) ir_dereference_variable(store_var); + + ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, + "shared_store_temp_offset", + ir_var_temporary); + base_ir->insert_before(store_offset); + base_ir->insert_before(assign(store_offset, offset)); + + /* Now we have to write the value assigned to the temporary back to memory */ + emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major, + matrix_columns, packing, ir->write_mask); + + progress = true; +} + +ir_visitor_status +lower_shared_reference_visitor::visit_enter(ir_assignment *ir) +{ + handle_assignment(ir); + return rvalue_visit(ir); +} + +void +lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx, + ir_dereference *deref, + const glsl_type *type, + ir_rvalue *offset, + unsigned mask, + int channel) +{ + if (buffer_access_type == shared_store_access) { + ir_call *store = shared_store(mem_ctx, deref, offset, mask); + base_ir->insert_after(store); + } else { + ir_call *load = shared_load(mem_ctx, type, offset); + base_ir->insert_before(load); + ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL); + base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), + value)); + } +} + +static bool +compute_shader_enabled(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_COMPUTE; +} + +ir_call * +lower_shared_reference_visitor::shared_store(void *mem_ctx, + ir_rvalue *deref, + ir_rvalue *offset, + unsigned write_mask) +{ + exec_list sig_params; + + ir_variable *offset_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(offset_ref); + + ir_variable *val_ref = new(mem_ctx) + ir_variable(deref->type, "value" , ir_var_function_in); + sig_params.push_tail(val_ref); + + ir_variable *writemask_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); + sig_params.push_tail(writemask_ref); + + ir_function_signature *sig = new(mem_ctx) + ir_function_signature(glsl_type::void_type, compute_shader_enabled); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared"); + f->add_signature(sig); + + exec_list call_params; + call_params.push_tail(offset->clone(mem_ctx, NULL)); + call_params.push_tail(deref->clone(mem_ctx, NULL)); + call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); + return new(mem_ctx) ir_call(sig, NULL, &call_params); +} + +ir_call * +lower_shared_reference_visitor::shared_load(void *mem_ctx, + const struct glsl_type *type, + ir_rvalue *offset) +{ + exec_list sig_params; + + ir_variable *offset_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); + sig_params.push_tail(offset_ref); + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(type, compute_shader_enabled); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared"); + f->add_signature(sig); + + ir_variable *result = new(mem_ctx) + ir_variable(type, "shared_load_result", ir_var_temporary); + base_ir->insert_before(result); + ir_dereference_variable *deref_result = new(mem_ctx) + ir_dereference_variable(result); + + exec_list call_params; + call_params.push_tail(offset->clone(mem_ctx, NULL)); + + return new(mem_ctx) ir_call(sig, deref_result, &call_params); +} + +/* Lowers the intrinsic call to a new internal intrinsic that swaps the access + * to the shared variable in the first parameter by an offset. This involves + * creating the new internal intrinsic (i.e. the new function signature). + */ +ir_call * +lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) +{ + /* Shared atomics usually have 2 parameters, the shared variable and an + * integer argument. The exception is CompSwap, that has an additional + * integer parameter. + */ + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* First argument must be a scalar integer shared variable */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + assert(inst->ir_type == ir_type_dereference_variable || + inst->ir_type == ir_type_dereference_array || + inst->ir_type == ir_type_dereference_record || + inst->ir_type == ir_type_swizzle); + + ir_rvalue *deref = (ir_rvalue *) inst; + assert(deref->type->is_scalar() && deref->type->is_integer()); + + ir_variable *var = deref->variable_referenced(); + assert(var); + + /* Compute the offset to the start if the dereference + */ + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset = get_shared_offset(var); + bool row_major; + int matrix_columns; + assert(var->get_interface_type() == NULL); + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; + buffer_access_type = shared_atomic_access; + + setup_buffer_access(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, packing); + + assert(offset); + assert(!row_major); + assert(matrix_columns == 1); + + ir_rvalue *deref_offset = + add(offset, new(mem_ctx) ir_constant(const_offset)); + + /* Create the new internal function signature that will take an offset + * instead of a shared variable + */ + exec_list sig_params; + ir_variable *sig_param = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(sig_param); + + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? + glsl_type::int_type : glsl_type::uint_type; + sig_param = new(mem_ctx) + ir_variable(type, "data1", ir_var_function_in); + sig_params.push_tail(sig_param); + + if (param_count == 3) { + sig_param = new(mem_ctx) + ir_variable(type, "data2", ir_var_function_in); + sig_params.push_tail(sig_param); + } + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(deref->type, + compute_shader_enabled); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + char func_name[64]; + sprintf(func_name, "%s_shared", ir->callee_name()); + ir_function *f = new(mem_ctx) ir_function(func_name); + f->add_signature(sig); + + /* Now, create the call to the internal intrinsic */ + exec_list call_params; + call_params.push_tail(deref_offset); + param = ir->actual_parameters.get_head()->get_next(); + ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + if (param_count == 3) { + param = param->get_next(); + param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + } + ir_dereference_variable *return_deref = + ir->return_deref->clone(mem_ctx, NULL); + return new(mem_ctx) ir_call(sig, return_deref, &call_params); +} + +ir_call * +lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir) +{ + exec_list& params = ir->actual_parameters; + + if (params.length() < 2 || params.length() > 3) + return ir; + + ir_rvalue *rvalue = + ((ir_instruction *) params.get_head())->as_rvalue(); + if (!rvalue) + return ir; + + ir_variable *var = rvalue->variable_referenced(); + if (!var || var->data.mode != ir_var_shader_shared) + return ir; + + const char *callee = ir->callee_name(); + if (!strcmp("__intrinsic_atomic_add", callee) || + !strcmp("__intrinsic_atomic_min", callee) || + !strcmp("__intrinsic_atomic_max", callee) || + !strcmp("__intrinsic_atomic_and", callee) || + !strcmp("__intrinsic_atomic_or", callee) || + !strcmp("__intrinsic_atomic_xor", callee) || + !strcmp("__intrinsic_atomic_exchange", callee) || + !strcmp("__intrinsic_atomic_comp_swap", callee)) { + return lower_shared_atomic_intrinsic(ir); + } + + return ir; +} + +ir_visitor_status +lower_shared_reference_visitor::visit_enter(ir_call *ir) +{ + ir_call *new_ir = check_for_shared_atomic_intrinsic(ir); + if (new_ir != ir) { + progress = true; + base_ir->replace_with(new_ir); + return visit_continue_with_parent; + } + + return rvalue_visit(ir); +} + +} /* unnamed namespace */ + +void +lower_shared_reference(struct gl_shader *shader, unsigned *shared_size) +{ + if (shader->Stage != MESA_SHADER_COMPUTE) + return; + + lower_shared_reference_visitor v(shader); + + /* Loop over the instructions lowering references, because we take a deref + * of an shared variable array using a shared variable dereference as the + * index will produce a collection of instructions all of which have cloned + * shared variable dereferences for that array index. + */ + do { + v.progress = false; + visit_list_elements(&v, shader->ir); + } while (v.progress); + + *shared_size = v.shared_size; +} diff --git a/src/compiler/glsl/lower_subroutine.cpp b/src/compiler/glsl/lower_subroutine.cpp new file mode 100644 index 0000000..e80c1be --- /dev/null +++ b/src/compiler/glsl/lower_subroutine.cpp @@ -0,0 +1,123 @@ +/* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_subroutine.cpp + * + * lowers subroutines to an if ladder. + */ + +#include "compiler/glsl_types.h" +#include "glsl_parser_extras.h" +#include "ir.h" +#include "ir_builder.h" + +using namespace ir_builder; +namespace { + +class lower_subroutine_visitor : public ir_hierarchical_visitor { +public: + lower_subroutine_visitor(struct _mesa_glsl_parse_state *state) + : state(state) + { + this->progress = false; + } + + ir_visitor_status visit_leave(ir_call *); + ir_call *call_clone(ir_call *call, ir_function_signature *callee); + bool progress; + struct _mesa_glsl_parse_state *state; +}; + +} + +bool +lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state) +{ + lower_subroutine_visitor v(state); + visit_list_elements(&v, instructions); + return v.progress; +} + +ir_call * +lower_subroutine_visitor::call_clone(ir_call *call, ir_function_signature *callee) +{ + void *mem_ctx = ralloc_parent(call); + ir_dereference_variable *new_return_ref = NULL; + if (call->return_deref != NULL) + new_return_ref = call->return_deref->clone(mem_ctx, NULL); + + exec_list new_parameters; + + foreach_in_list(ir_instruction, ir, &call->actual_parameters) { + new_parameters.push_tail(ir->clone(mem_ctx, NULL)); + } + + return new(mem_ctx) ir_call(callee, new_return_ref, &new_parameters); +} + +ir_visitor_status +lower_subroutine_visitor::visit_leave(ir_call *ir) +{ + if (!ir->sub_var) + return visit_continue; + + void *mem_ctx = ralloc_parent(ir); + ir_if *last_branch = NULL; + + for (int s = this->state->num_subroutines - 1; s >= 0; s--) { + ir_rvalue *var; + ir_constant *lc = new(mem_ctx)ir_constant(s); + ir_function *fn = this->state->subroutines[s]; + bool is_compat = false; + + for (int i = 0; i < fn->num_subroutine_types; i++) { + if (ir->sub_var->type->without_array() == fn->subroutine_types[i]) { + is_compat = true; + break; + } + } + if (is_compat == false) + continue; + + if (ir->array_idx != NULL) + var = ir->array_idx->clone(mem_ctx, NULL); + else + var = new(mem_ctx) ir_dereference_variable(ir->sub_var); + + ir_function_signature *sub_sig = + fn->exact_matching_signature(this->state, + &ir->actual_parameters); + + ir_call *new_call = call_clone(ir, sub_sig); + if (!last_branch) + last_branch = if_tree(equal(subr_to_int(var), lc), new_call); + else + last_branch = if_tree(equal(subr_to_int(var), lc), new_call, last_branch); + } + if (last_branch) + ir->insert_before(last_branch); + ir->remove(); + + return visit_continue; +} diff --git a/src/compiler/glsl/lower_tess_level.cpp b/src/compiler/glsl/lower_tess_level.cpp new file mode 100644 index 0000000..bed2553 --- /dev/null +++ b/src/compiler/glsl/lower_tess_level.cpp @@ -0,0 +1,459 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_tess_level.cpp + * + * This pass accounts for the difference between the way gl_TessLevelOuter + * and gl_TessLevelInner is declared in standard GLSL (as an array of + * floats), and the way it is frequently implemented in hardware (as a vec4 + * and vec2). + * + * The declaration of gl_TessLevel* is replaced with a declaration + * of gl_TessLevel*MESA, and any references to gl_TessLevel* are + * translated to refer to gl_TessLevel*MESA with the appropriate + * swizzling of array indices. For instance: + * + * gl_TessLevelOuter[i] + * + * is translated into: + * + * gl_TessLevelOuterMESA[i] + * + * Since some hardware may not internally represent gl_TessLevel* as a pair + * of vec4's, this lowering pass is optional. To enable it, set the + * LowerTessLevel flag in gl_shader_compiler_options to true. + */ + +#include "glsl_symbol_table.h" +#include "ir_rvalue_visitor.h" +#include "ir.h" +#include "program/prog_instruction.h" /* For WRITEMASK_* */ + +namespace { + +class lower_tess_level_visitor : public ir_rvalue_visitor { +public: + explicit lower_tess_level_visitor(gl_shader_stage shader_stage) + : progress(false), old_tess_level_outer_var(NULL), + old_tess_level_inner_var(NULL), new_tess_level_outer_var(NULL), + new_tess_level_inner_var(NULL), shader_stage(shader_stage) + { + } + + virtual ir_visitor_status visit(ir_variable *); + bool is_tess_level_array(ir_rvalue *ir); + ir_rvalue *lower_tess_level_array(ir_rvalue *ir); + virtual ir_visitor_status visit_leave(ir_assignment *); + void visit_new_assignment(ir_assignment *ir); + virtual ir_visitor_status visit_leave(ir_call *); + + virtual void handle_rvalue(ir_rvalue **rvalue); + + void fix_lhs(ir_assignment *); + + bool progress; + + /** + * Pointer to the declaration of gl_TessLevel*, if found. + */ + ir_variable *old_tess_level_outer_var; + ir_variable *old_tess_level_inner_var; + + /** + * Pointer to the newly-created gl_TessLevel*MESA variables. + */ + ir_variable *new_tess_level_outer_var; + ir_variable *new_tess_level_inner_var; + + /** + * Type of shader we are compiling (e.g. MESA_SHADER_TESS_CTRL) + */ + const gl_shader_stage shader_stage; +}; + +} /* anonymous namespace */ + +/** + * Replace any declaration of gl_TessLevel* as an array of floats with a + * declaration of gl_TessLevel*MESA as a vec4. + */ +ir_visitor_status +lower_tess_level_visitor::visit(ir_variable *ir) +{ + if ((!ir->name) || + ((strcmp(ir->name, "gl_TessLevelInner") != 0) && + (strcmp(ir->name, "gl_TessLevelOuter") != 0))) + return visit_continue; + + assert (ir->type->is_array()); + + if (strcmp(ir->name, "gl_TessLevelOuter") == 0) { + if (this->old_tess_level_outer_var) + return visit_continue; + + old_tess_level_outer_var = ir; + assert(ir->type->fields.array == glsl_type::float_type); + + /* Clone the old var so that we inherit all of its properties */ + new_tess_level_outer_var = ir->clone(ralloc_parent(ir), NULL); + + /* And change the properties that we need to change */ + new_tess_level_outer_var->name = ralloc_strdup(new_tess_level_outer_var, + "gl_TessLevelOuterMESA"); + new_tess_level_outer_var->type = glsl_type::vec4_type; + new_tess_level_outer_var->data.max_array_access = 0; + + ir->replace_with(new_tess_level_outer_var); + } else if (strcmp(ir->name, "gl_TessLevelInner") == 0) { + if (this->old_tess_level_inner_var) + return visit_continue; + + old_tess_level_inner_var = ir; + assert(ir->type->fields.array == glsl_type::float_type); + + /* Clone the old var so that we inherit all of its properties */ + new_tess_level_inner_var = ir->clone(ralloc_parent(ir), NULL); + + /* And change the properties that we need to change */ + new_tess_level_inner_var->name = ralloc_strdup(new_tess_level_inner_var, + "gl_TessLevelInnerMESA"); + new_tess_level_inner_var->type = glsl_type::vec2_type; + new_tess_level_inner_var->data.max_array_access = 0; + + ir->replace_with(new_tess_level_inner_var); + } else { + assert(0); + } + + this->progress = true; + + return visit_continue; +} + + +/** + * Determine whether the given rvalue describes an array of floats that + * needs to be lowered to a vec4; that is, determine whether it + * matches one of the following patterns: + * + * - gl_TessLevelOuter + * - gl_TessLevelInner + */ +bool +lower_tess_level_visitor::is_tess_level_array(ir_rvalue *ir) +{ + if (!ir->type->is_array()) + return false; + if (ir->type->fields.array != glsl_type::float_type) + return false; + + if (this->old_tess_level_outer_var) { + if (ir->variable_referenced() == this->old_tess_level_outer_var) + return true; + } + if (this->old_tess_level_inner_var) { + if (ir->variable_referenced() == this->old_tess_level_inner_var) + return true; + } + return false; +} + + +/** + * If the given ir satisfies is_tess_level_array(), return new ir + * representing its lowered equivalent. That is, map: + * + * - gl_TessLevelOuter => gl_TessLevelOuterMESA + * - gl_TessLevelInner => gl_TessLevelInnerMESA + * + * Otherwise return NULL. + */ +ir_rvalue * +lower_tess_level_visitor::lower_tess_level_array(ir_rvalue *ir) +{ + if (!ir->type->is_array()) + return NULL; + if (ir->type->fields.array != glsl_type::float_type) + return NULL; + + ir_variable **new_var = NULL; + + if (this->old_tess_level_outer_var) { + if (ir->variable_referenced() == this->old_tess_level_outer_var) + new_var = &this->new_tess_level_outer_var; + } + if (this->old_tess_level_inner_var) { + if (ir->variable_referenced() == this->old_tess_level_inner_var) + new_var = &this->new_tess_level_inner_var; + } + + if (new_var == NULL) + return NULL; + + assert(ir->as_dereference_variable()); + return new(ralloc_parent(ir)) ir_dereference_variable(*new_var); +} + + +void +lower_tess_level_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL) + return; + + ir_dereference_array *const array_deref = (*rv)->as_dereference_array(); + if (array_deref == NULL) + return; + + /* Replace any expression that indexes one of the floats in gl_TessLevel* + * with an expression that indexes into one of the vec4's + * gl_TessLevel*MESA and accesses the appropriate component. + */ + ir_rvalue *lowered_vec4 = + this->lower_tess_level_array(array_deref->array); + if (lowered_vec4 != NULL) { + this->progress = true; + void *mem_ctx = ralloc_parent(array_deref); + + ir_expression *const expr = + new(mem_ctx) ir_expression(ir_binop_vector_extract, + lowered_vec4, + array_deref->array_index); + + *rv = expr; + } +} + +void +lower_tess_level_visitor::fix_lhs(ir_assignment *ir) +{ + if (ir->lhs->ir_type != ir_type_expression) + return; + void *mem_ctx = ralloc_parent(ir); + ir_expression *const expr = (ir_expression *) ir->lhs; + + /* The expression must be of the form: + * + * (vector_extract gl_TessLevel*MESA, j). + */ + assert(expr->operation == ir_binop_vector_extract); + assert(expr->operands[0]->ir_type == ir_type_dereference_variable); + assert((expr->operands[0]->type == glsl_type::vec4_type) || + (expr->operands[0]->type == glsl_type::vec2_type)); + + ir_dereference *const new_lhs = (ir_dereference *) expr->operands[0]; + + ir_constant *old_index_constant = expr->operands[1]->constant_expression_value(); + if (!old_index_constant) { + ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, + expr->operands[0]->type, + new_lhs->clone(mem_ctx, NULL), + ir->rhs, + expr->operands[1]); + } + ir->set_lhs(new_lhs); + + if (old_index_constant) { + /* gl_TessLevel* is being accessed via a constant index. Don't bother + * creating a vector insert op. Just use a write mask. + */ + ir->write_mask = 1 << old_index_constant->get_int_component(0); + } else { + ir->write_mask = (1 << expr->operands[0]->type->vector_elements) - 1; + } +} + +/** + * Replace any assignment having a gl_TessLevel* (undereferenced) as + * its LHS or RHS with a sequence of assignments, one for each component of + * the array. Each of these assignments is lowered to refer to + * gl_TessLevel*MESA as appropriate. + */ +ir_visitor_status +lower_tess_level_visitor::visit_leave(ir_assignment *ir) +{ + /* First invoke the base class visitor. This causes handle_rvalue() to be + * called on ir->rhs and ir->condition. + */ + ir_rvalue_visitor::visit_leave(ir); + + if (this->is_tess_level_array(ir->lhs) || + this->is_tess_level_array(ir->rhs)) { + /* LHS or RHS of the assignment is the entire gl_TessLevel* array. + * Since we are + * reshaping gl_TessLevel* from an array of floats to a + * vec4, this isn't going to work as a bulk assignment anymore, so + * unroll it to element-by-element assignments and lower each of them. + * + * Note: to unroll into element-by-element assignments, we need to make + * clones of the LHS and RHS. This is safe because expressions and + * l-values are side-effect free. + */ + void *ctx = ralloc_parent(ir); + int array_size = ir->lhs->type->array_size(); + for (int i = 0; i < array_size; ++i) { + ir_dereference_array *new_lhs = new(ctx) ir_dereference_array( + ir->lhs->clone(ctx, NULL), new(ctx) ir_constant(i)); + ir_dereference_array *new_rhs = new(ctx) ir_dereference_array( + ir->rhs->clone(ctx, NULL), new(ctx) ir_constant(i)); + this->handle_rvalue((ir_rvalue **) &new_rhs); + + /* Handle the LHS after creating the new assignment. This must + * happen in this order because handle_rvalue may replace the old LHS + * with an ir_expression of ir_binop_vector_extract. Since this is + * not a valide l-value, this will cause an assertion in the + * ir_assignment constructor to fail. + * + * If this occurs, replace the mangled LHS with a dereference of the + * vector, and replace the RHS with an ir_triop_vector_insert. + */ + ir_assignment *const assign = new(ctx) ir_assignment(new_lhs, new_rhs); + this->handle_rvalue((ir_rvalue **) &assign->lhs); + this->fix_lhs(assign); + + this->base_ir->insert_before(assign); + } + ir->remove(); + + return visit_continue; + } + + /* Handle the LHS as if it were an r-value. Normally + * rvalue_visit(ir_assignment *) only visits the RHS, but we need to lower + * expressions in the LHS as well. + * + * This may cause the LHS to get replaced with an ir_expression of + * ir_binop_vector_extract. If this occurs, replace it with a dereference + * of the vector, and replace the RHS with an ir_triop_vector_insert. + */ + handle_rvalue((ir_rvalue **)&ir->lhs); + this->fix_lhs(ir); + + return rvalue_visit(ir); +} + + +/** + * Set up base_ir properly and call visit_leave() on a newly created + * ir_assignment node. This is used in cases where we have to insert an + * ir_assignment in a place where we know the hierarchical visitor won't see + * it. + */ +void +lower_tess_level_visitor::visit_new_assignment(ir_assignment *ir) +{ + ir_instruction *old_base_ir = this->base_ir; + this->base_ir = ir; + ir->accept(this); + this->base_ir = old_base_ir; +} + + +/** + * If a gl_TessLevel* variable appears as an argument in an ir_call + * expression, replace it with a temporary variable, and make sure the ir_call + * is preceded and/or followed by assignments that copy the contents of the + * temporary variable to and/or from gl_TessLevel*. Each of these + * assignments is then lowered to refer to gl_TessLevel*MESA. + */ +ir_visitor_status +lower_tess_level_visitor::visit_leave(ir_call *ir) +{ + void *ctx = ralloc_parent(ir); + + const exec_node *formal_param_node = ir->callee->parameters.head; + const exec_node *actual_param_node = ir->actual_parameters.head; + while (!actual_param_node->is_tail_sentinel()) { + ir_variable *formal_param = (ir_variable *) formal_param_node; + ir_rvalue *actual_param = (ir_rvalue *) actual_param_node; + + /* Advance formal_param_node and actual_param_node now so that we can + * safely replace actual_param with another node, if necessary, below. + */ + formal_param_node = formal_param_node->next; + actual_param_node = actual_param_node->next; + + if (!this->is_tess_level_array(actual_param)) + continue; + + /* User is trying to pass a whole gl_TessLevel* array to a function + * call. Since we are reshaping gl_TessLevel* from an array of floats + * to a vec4, this isn't going to work anymore, so use a temporary + * array instead. + */ + ir_variable *temp = new(ctx) ir_variable( + actual_param->type, "temp_tess_level", ir_var_temporary); + this->base_ir->insert_before(temp); + actual_param->replace_with( + new(ctx) ir_dereference_variable(temp)); + if (formal_param->data.mode == ir_var_function_in + || formal_param->data.mode == ir_var_function_inout) { + /* Copy from gl_TessLevel* to the temporary before the call. + * Since we are going to insert this copy before the current + * instruction, we need to visit it afterwards to make sure it + * gets lowered. + */ + ir_assignment *new_assignment = new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(temp), + actual_param->clone(ctx, NULL)); + this->base_ir->insert_before(new_assignment); + this->visit_new_assignment(new_assignment); + } + if (formal_param->data.mode == ir_var_function_out + || formal_param->data.mode == ir_var_function_inout) { + /* Copy from the temporary to gl_TessLevel* after the call. + * Since visit_list_elements() has already decided which + * instruction it's going to visit next, we need to visit + * afterwards to make sure it gets lowered. + */ + ir_assignment *new_assignment = new(ctx) ir_assignment( + actual_param->clone(ctx, NULL), + new(ctx) ir_dereference_variable(temp)); + this->base_ir->insert_after(new_assignment); + this->visit_new_assignment(new_assignment); + } + } + + return rvalue_visit(ir); +} + + +bool +lower_tess_level(gl_shader *shader) +{ + if ((shader->Stage != MESA_SHADER_TESS_CTRL) && + (shader->Stage != MESA_SHADER_TESS_EVAL)) + return false; + + lower_tess_level_visitor v(shader->Stage); + + visit_list_elements(&v, shader->ir); + + if (v.new_tess_level_outer_var) + shader->symbols->add_variable(v.new_tess_level_outer_var); + if (v.new_tess_level_inner_var) + shader->symbols->add_variable(v.new_tess_level_inner_var); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_texture_projection.cpp b/src/compiler/glsl/lower_texture_projection.cpp new file mode 100644 index 0000000..95df106 --- /dev/null +++ b/src/compiler/glsl/lower_texture_projection.cpp @@ -0,0 +1,103 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_texture_projection.cpp + * + * IR lower pass to perform the division of texture coordinates by the texture + * projector if present. + * + * Many GPUs have a texture sampling opcode that takes the projector + * and does the divide internally, thus the presence of the projector + * in the IR. For GPUs that don't, this saves the driver needing the + * logic for handling the divide. + * + * \author Eric Anholt <eric@anholt.net> + */ + +#include "ir.h" + +namespace { + +class lower_texture_projection_visitor : public ir_hierarchical_visitor { +public: + lower_texture_projection_visitor() + { + progress = false; + } + + ir_visitor_status visit_leave(ir_texture *ir); + + bool progress; +}; + +} /* anonymous namespace */ + +ir_visitor_status +lower_texture_projection_visitor::visit_leave(ir_texture *ir) +{ + if (!ir->projector) + return visit_continue; + + void *mem_ctx = ralloc_parent(ir); + + ir_variable *var = new(mem_ctx) ir_variable(ir->projector->type, + "projector", ir_var_temporary); + base_ir->insert_before(var); + ir_dereference *deref = new(mem_ctx) ir_dereference_variable(var); + ir_expression *expr = new(mem_ctx) ir_expression(ir_unop_rcp, + ir->projector->type, + ir->projector, + NULL); + ir_assignment *assign = new(mem_ctx) ir_assignment(deref, expr, NULL); + base_ir->insert_before(assign); + + deref = new(mem_ctx) ir_dereference_variable(var); + ir->coordinate = new(mem_ctx) ir_expression(ir_binop_mul, + ir->coordinate->type, + ir->coordinate, + deref); + + if (ir->shadow_comparitor) { + deref = new(mem_ctx) ir_dereference_variable(var); + ir->shadow_comparitor = new(mem_ctx) ir_expression(ir_binop_mul, + ir->shadow_comparitor->type, + ir->shadow_comparitor, + deref); + } + + ir->projector = NULL; + + progress = true; + return visit_continue; +} + +bool +do_lower_texture_projection(exec_list *instructions) +{ + lower_texture_projection_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_ubo_reference.cpp b/src/compiler/glsl/lower_ubo_reference.cpp new file mode 100644 index 0000000..a172054 --- /dev/null +++ b/src/compiler/glsl/lower_ubo_reference.cpp @@ -0,0 +1,1042 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_ubo_reference.cpp + * + * IR lower pass to replace dereferences of variables in a uniform + * buffer object with usage of ir_binop_ubo_load expressions, each of + * which can read data up to the size of a vec4. + * + * This relieves drivers of the responsibility to deal with tricky UBO + * layout issues like std140 structures and row_major matrices on + * their own. + */ + +#include "lower_buffer_access.h" +#include "ir_builder.h" +#include "main/macros.h" +#include "glsl_parser_extras.h" + +using namespace ir_builder; + +namespace { +class lower_ubo_reference_visitor : + public lower_buffer_access::lower_buffer_access { +public: + lower_ubo_reference_visitor(struct gl_shader *shader) + : shader(shader) + { + } + + void handle_rvalue(ir_rvalue **rvalue); + ir_visitor_status visit_enter(ir_assignment *ir); + + void setup_for_load_or_store(void *mem_ctx, + ir_variable *var, + ir_rvalue *deref, + ir_rvalue **offset, + unsigned *const_offset, + bool *row_major, + int *matrix_columns, + unsigned packing); + ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, + ir_rvalue *offset); + ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, + ir_rvalue *offset); + + bool check_for_buffer_array_copy(ir_assignment *ir); + bool check_for_buffer_struct_copy(ir_assignment *ir); + void check_for_ssbo_store(ir_assignment *ir); + void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, + ir_variable *write_var, unsigned write_mask); + ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, + unsigned write_mask); + + enum { + ubo_load_access, + ssbo_load_access, + ssbo_store_access, + ssbo_unsized_array_length_access, + ssbo_atomic_access, + } buffer_access_type; + + void insert_buffer_access(void *mem_ctx, ir_dereference *deref, + const glsl_type *type, ir_rvalue *offset, + unsigned mask, int channel); + + ir_visitor_status visit_enter(class ir_expression *); + ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr); + void check_ssbo_unsized_array_length_expression(class ir_expression *); + void check_ssbo_unsized_array_length_assignment(ir_assignment *ir); + + ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, + ir_dereference *, + ir_variable *); + ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); + + unsigned calculate_unsized_array_stride(ir_dereference *deref, + unsigned packing); + + ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir); + ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); + ir_visitor_status visit_enter(ir_call *ir); + + struct gl_shader *shader; + struct gl_uniform_buffer_variable *ubo_var; + ir_rvalue *uniform_block; + bool progress; +}; + +/** + * Determine the name of the interface block field + * + * This is the name of the specific member as it would appear in the + * \c gl_uniform_buffer_variable::Name field in the shader's + * \c UniformBlocks array. + */ +static const char * +interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, + ir_rvalue **nonconst_block_index) +{ + *nonconst_block_index = NULL; + char *name_copy = NULL; + size_t base_length = 0; + + /* Loop back through the IR until we find the uniform block */ + ir_rvalue *ir = d; + while (ir != NULL) { + switch (ir->ir_type) { + case ir_type_dereference_variable: { + /* Exit loop */ + ir = NULL; + break; + } + + case ir_type_dereference_record: { + ir_dereference_record *r = (ir_dereference_record *) ir; + ir = r->record->as_dereference(); + + /* If we got here it means any previous array subscripts belong to + * block members and not the block itself so skip over them in the + * next pass. + */ + d = ir; + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *a = (ir_dereference_array *) ir; + ir = a->array->as_dereference(); + break; + } + + case ir_type_swizzle: { + ir_swizzle *s = (ir_swizzle *) ir; + ir = s->val->as_dereference(); + /* Skip swizzle in the next pass */ + d = ir; + break; + } + + default: + assert(!"Should not get here."); + break; + } + } + + while (d != NULL) { + switch (d->ir_type) { + case ir_type_dereference_variable: { + ir_dereference_variable *v = (ir_dereference_variable *) d; + if (name_copy != NULL && + v->var->is_interface_instance() && + v->var->type->is_array()) { + return name_copy; + } else { + *nonconst_block_index = NULL; + return base_name; + } + + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *a = (ir_dereference_array *) d; + size_t new_length; + + if (name_copy == NULL) { + name_copy = ralloc_strdup(mem_ctx, base_name); + base_length = strlen(name_copy); + } + + /* For arrays of arrays we start at the innermost array and work our + * way out so we need to insert the subscript at the base of the + * name string rather than just attaching it to the end. + */ + new_length = base_length; + ir_constant *const_index = a->array_index->as_constant(); + char *end = ralloc_strdup(NULL, &name_copy[new_length]); + if (!const_index) { + ir_rvalue *array_index = a->array_index; + if (array_index->type != glsl_type::uint_type) + array_index = i2u(array_index); + + if (a->array->type->is_array() && + a->array->type->fields.array->is_array()) { + ir_constant *base_size = new(mem_ctx) + ir_constant(a->array->type->fields.array->arrays_of_arrays_size()); + array_index = mul(array_index, base_size); + } + + if (*nonconst_block_index) { + *nonconst_block_index = add(*nonconst_block_index, array_index); + } else { + *nonconst_block_index = array_index; + } + + ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s", + end); + } else { + ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s", + const_index->get_uint_component(0), + end); + } + ralloc_free(end); + + d = a->array->as_dereference(); + + break; + } + + default: + assert(!"Should not get here."); + break; + } + } + + assert(!"Should not get here."); + return NULL; +} + +void +lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, + ir_variable *var, + ir_rvalue *deref, + ir_rvalue **offset, + unsigned *const_offset, + bool *row_major, + int *matrix_columns, + unsigned packing) +{ + /* Determine the name of the interface block */ + ir_rvalue *nonconst_block_index; + const char *const field_name = + interface_field_name(mem_ctx, (char *) var->get_interface_type()->name, + deref, &nonconst_block_index); + + /* Locate the block by interface name */ + unsigned num_blocks; + struct gl_uniform_block **blocks; + if (this->buffer_access_type != ubo_load_access) { + num_blocks = shader->NumShaderStorageBlocks; + blocks = shader->ShaderStorageBlocks; + } else { + num_blocks = shader->NumUniformBlocks; + blocks = shader->UniformBlocks; + } + this->uniform_block = NULL; + for (unsigned i = 0; i < num_blocks; i++) { + if (strcmp(field_name, blocks[i]->Name) == 0) { + + ir_constant *index = new(mem_ctx) ir_constant(i); + + if (nonconst_block_index) { + this->uniform_block = add(nonconst_block_index, index); + } else { + this->uniform_block = index; + } + + this->ubo_var = var->is_interface_instance() + ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location]; + + break; + } + } + + assert(this->uniform_block); + + *const_offset = ubo_var->Offset; + + setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major, + matrix_columns, packing); +} + +void +lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + if (!deref) + return; + + ir_variable *var = deref->variable_referenced(); + if (!var || !var->is_in_buffer_block()) + return; + + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset; + bool row_major; + int matrix_columns; + unsigned packing = var->get_interface_type()->interface_packing; + + this->buffer_access_type = + var->is_in_shader_storage_block() ? + ssbo_load_access : ubo_load_access; + + /* Compute the offset to the start if the dereference as well as other + * information we need to configure the write + */ + setup_for_load_or_store(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, + packing); + assert(offset); + + /* Now that we've calculated the offset to the start of the + * dereference, walk over the type and emit loads into a temporary. + */ + const glsl_type *type = (*rvalue)->type; + ir_variable *load_var = new(mem_ctx) ir_variable(type, + "ubo_load_temp", + ir_var_temporary); + base_ir->insert_before(load_var); + + ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, + "ubo_load_temp_offset", + ir_var_temporary); + base_ir->insert_before(load_offset); + base_ir->insert_before(assign(load_offset, offset)); + + deref = new(mem_ctx) ir_dereference_variable(load_var); + emit_access(mem_ctx, false, deref, load_offset, const_offset, + row_major, matrix_columns, packing, 0); + *rvalue = deref; + + progress = true; +} + +ir_expression * +lower_ubo_reference_visitor::ubo_load(void *mem_ctx, + const glsl_type *type, + ir_rvalue *offset) +{ + ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); + return new(mem_ctx) + ir_expression(ir_binop_ubo_load, + type, + block_ref, + offset); + +} + +static bool +shader_storage_buffer_object(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_storage_buffer_object_enable; +} + +ir_call * +lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, + ir_rvalue *deref, + ir_rvalue *offset, + unsigned write_mask) +{ + exec_list sig_params; + + ir_variable *block_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); + sig_params.push_tail(block_ref); + + ir_variable *offset_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(offset_ref); + + ir_variable *val_ref = new(mem_ctx) + ir_variable(deref->type, "value" , ir_var_function_in); + sig_params.push_tail(val_ref); + + ir_variable *writemask_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); + sig_params.push_tail(writemask_ref); + + ir_function_signature *sig = new(mem_ctx) + ir_function_signature(glsl_type::void_type, shader_storage_buffer_object); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo"); + f->add_signature(sig); + + exec_list call_params; + call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); + call_params.push_tail(offset->clone(mem_ctx, NULL)); + call_params.push_tail(deref->clone(mem_ctx, NULL)); + call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); + return new(mem_ctx) ir_call(sig, NULL, &call_params); +} + +ir_call * +lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, + const struct glsl_type *type, + ir_rvalue *offset) +{ + exec_list sig_params; + + ir_variable *block_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); + sig_params.push_tail(block_ref); + + ir_variable *offset_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); + sig_params.push_tail(offset_ref); + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo"); + f->add_signature(sig); + + ir_variable *result = new(mem_ctx) + ir_variable(type, "ssbo_load_result", ir_var_temporary); + base_ir->insert_before(result); + ir_dereference_variable *deref_result = new(mem_ctx) + ir_dereference_variable(result); + + exec_list call_params; + call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); + call_params.push_tail(offset->clone(mem_ctx, NULL)); + + return new(mem_ctx) ir_call(sig, deref_result, &call_params); +} + +void +lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx, + ir_dereference *deref, + const glsl_type *type, + ir_rvalue *offset, + unsigned mask, + int channel) +{ + switch (this->buffer_access_type) { + case ubo_load_access: + base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), + ubo_load(mem_ctx, type, offset), + mask)); + break; + case ssbo_load_access: { + ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset); + base_ir->insert_before(load_ssbo); + ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); + ir_assignment *assignment = + assign(deref->clone(mem_ctx, NULL), value, mask); + base_ir->insert_before(assignment); + break; + } + case ssbo_store_access: + if (channel >= 0) { + base_ir->insert_after(ssbo_store(mem_ctx, + swizzle(deref, channel, 1), + offset, 1)); + } else { + base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask)); + } + break; + default: + unreachable("invalid buffer_access_type in insert_buffer_access"); + } +} + +void +lower_ubo_reference_visitor::write_to_memory(void *mem_ctx, + ir_dereference *deref, + ir_variable *var, + ir_variable *write_var, + unsigned write_mask) +{ + ir_rvalue *offset = NULL; + unsigned const_offset; + bool row_major; + int matrix_columns; + unsigned packing = var->get_interface_type()->interface_packing; + + this->buffer_access_type = ssbo_store_access; + + /* Compute the offset to the start if the dereference as well as other + * information we need to configure the write + */ + setup_for_load_or_store(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, + packing); + assert(offset); + + /* Now emit writes from the temporary to memory */ + ir_variable *write_offset = + new(mem_ctx) ir_variable(glsl_type::uint_type, + "ssbo_store_temp_offset", + ir_var_temporary); + + base_ir->insert_before(write_offset); + base_ir->insert_before(assign(write_offset, offset)); + + deref = new(mem_ctx) ir_dereference_variable(write_var); + emit_access(mem_ctx, true, deref, write_offset, const_offset, + row_major, matrix_columns, packing, write_mask); +} + +ir_visitor_status +lower_ubo_reference_visitor::visit_enter(ir_expression *ir) +{ + check_ssbo_unsized_array_length_expression(ir); + return rvalue_visit(ir); +} + +ir_expression * +lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr) +{ + if (expr->operation != + ir_expression_operation(ir_unop_ssbo_unsized_array_length)) + return NULL; + + ir_rvalue *rvalue = expr->operands[0]->as_rvalue(); + if (!rvalue || + !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) + return NULL; + + ir_dereference *deref = expr->operands[0]->as_dereference(); + if (!deref) + return NULL; + + ir_variable *var = expr->operands[0]->variable_referenced(); + if (!var || !var->is_in_shader_storage_block()) + return NULL; + return process_ssbo_unsized_array_length(&rvalue, deref, var); +} + +void +lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir) +{ + if (ir->operation == + ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { + /* Don't replace this unop if it is found alone. It is going to be + * removed by the optimization passes or replaced if it is part of + * an ir_assignment or another ir_expression. + */ + return; + } + + for (unsigned i = 0; i < ir->get_num_operands(); i++) { + if (ir->operands[i]->ir_type != ir_type_expression) + continue; + ir_expression *expr = (ir_expression *) ir->operands[i]; + ir_expression *temp = calculate_ssbo_unsized_array_length(expr); + if (!temp) + continue; + + delete expr; + ir->operands[i] = temp; + } +} + +void +lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir) +{ + if (!ir->rhs || ir->rhs->ir_type != ir_type_expression) + return; + + ir_expression *expr = (ir_expression *) ir->rhs; + ir_expression *temp = calculate_ssbo_unsized_array_length(expr); + if (!temp) + return; + + delete expr; + ir->rhs = temp; + return; +} + +ir_expression * +lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx) +{ + ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); + return new(mem_ctx) ir_expression(ir_unop_get_buffer_size, + glsl_type::int_type, + block_ref); +} + +unsigned +lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref, + unsigned packing) +{ + unsigned array_stride = 0; + + switch (deref->ir_type) { + case ir_type_dereference_variable: + { + ir_dereference_variable *deref_var = (ir_dereference_variable *)deref; + const struct glsl_type *unsized_array_type = NULL; + /* An unsized array can be sized by other lowering passes, so pick + * the first field of the array which has the data type of the unsized + * array. + */ + unsized_array_type = deref_var->var->type->fields.array; + + /* Whether or not the field is row-major (because it might be a + * bvec2 or something) does not affect the array itself. We need + * to know whether an array element in its entirety is row-major. + */ + const bool array_row_major = + is_dereferenced_thing_row_major(deref_var); + + if (packing == GLSL_INTERFACE_PACKING_STD430) { + array_stride = unsized_array_type->std430_array_stride(array_row_major); + } else { + array_stride = unsized_array_type->std140_size(array_row_major); + array_stride = glsl_align(array_stride, 16); + } + break; + } + case ir_type_dereference_record: + { + ir_dereference_record *deref_record = (ir_dereference_record *) deref; + ir_dereference *interface_deref = + deref_record->record->as_dereference(); + assert(interface_deref != NULL); + const struct glsl_type *interface_type = interface_deref->type; + unsigned record_length = interface_type->length; + /* Unsized array is always the last element of the interface */ + const struct glsl_type *unsized_array_type = + interface_type->fields.structure[record_length - 1].type->fields.array; + + const bool array_row_major = + is_dereferenced_thing_row_major(deref_record); + + if (packing == GLSL_INTERFACE_PACKING_STD430) { + array_stride = unsized_array_type->std430_array_stride(array_row_major); + } else { + array_stride = unsized_array_type->std140_size(array_row_major); + array_stride = glsl_align(array_stride, 16); + } + break; + } + default: + unreachable("Unsupported dereference type"); + } + return array_stride; +} + +ir_expression * +lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue, + ir_dereference *deref, + ir_variable *var) +{ + void *mem_ctx = ralloc_parent(*rvalue); + + ir_rvalue *base_offset = NULL; + unsigned const_offset; + bool row_major; + int matrix_columns; + unsigned packing = var->get_interface_type()->interface_packing; + int unsized_array_stride = calculate_unsized_array_stride(deref, packing); + + this->buffer_access_type = ssbo_unsized_array_length_access; + + /* Compute the offset to the start if the dereference as well as other + * information we need to calculate the length. + */ + setup_for_load_or_store(mem_ctx, var, deref, + &base_offset, &const_offset, + &row_major, &matrix_columns, + packing); + /* array.length() = + * max((buffer_object_size - offset_of_array) / stride_of_array, 0) + */ + ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx); + + ir_expression *offset_of_array = new(mem_ctx) + ir_expression(ir_binop_add, base_offset, + new(mem_ctx) ir_constant(const_offset)); + ir_expression *offset_of_array_int = new(mem_ctx) + ir_expression(ir_unop_u2i, offset_of_array); + + ir_expression *sub = new(mem_ctx) + ir_expression(ir_binop_sub, buffer_size, offset_of_array_int); + ir_expression *div = new(mem_ctx) + ir_expression(ir_binop_div, sub, + new(mem_ctx) ir_constant(unsized_array_stride)); + ir_expression *max = new(mem_ctx) + ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0)); + + return max; +} + +void +lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) +{ + if (!ir || !ir->lhs) + return; + + ir_rvalue *rvalue = ir->lhs->as_rvalue(); + if (!rvalue) + return; + + ir_dereference *deref = ir->lhs->as_dereference(); + if (!deref) + return; + + ir_variable *var = ir->lhs->variable_referenced(); + if (!var || !var->is_in_shader_storage_block()) + return; + + /* We have a write to a buffer variable, so declare a temporary and rewrite + * the assignment so that the temporary is the LHS. + */ + void *mem_ctx = ralloc_parent(shader->ir); + + const glsl_type *type = rvalue->type; + ir_variable *write_var = new(mem_ctx) ir_variable(type, + "ssbo_store_temp", + ir_var_temporary); + base_ir->insert_before(write_var); + ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); + + /* Now we have to write the value assigned to the temporary back to memory */ + write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask); + progress = true; +} + +static bool +is_buffer_backed_variable(ir_variable *var) +{ + return var->is_in_buffer_block() || + var->data.mode == ir_var_shader_shared; +} + +bool +lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir) +{ + if (!ir || !ir->lhs || !ir->rhs) + return false; + + /* LHS and RHS must be arrays + * FIXME: arrays of arrays? + */ + if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array()) + return false; + + /* RHS must be a buffer-backed variable. This is what can cause the problem + * since it would lead to a series of loads that need to live until we + * see the writes to the LHS. + */ + ir_variable *rhs_var = ir->rhs->variable_referenced(); + if (!rhs_var || !is_buffer_backed_variable(rhs_var)) + return false; + + /* Split the array copy into individual element copies to reduce + * register pressure + */ + ir_dereference *rhs_deref = ir->rhs->as_dereference(); + if (!rhs_deref) + return false; + + ir_dereference *lhs_deref = ir->lhs->as_dereference(); + if (!lhs_deref) + return false; + + assert(lhs_deref->type->length == rhs_deref->type->length); + void *mem_ctx = ralloc_parent(shader->ir); + + for (unsigned i = 0; i < lhs_deref->type->length; i++) { + ir_dereference *lhs_i = + new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + + ir_dereference *rhs_i = + new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + ir->insert_after(assign(lhs_i, rhs_i)); + } + + ir->remove(); + progress = true; + return true; +} + +bool +lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir) +{ + if (!ir || !ir->lhs || !ir->rhs) + return false; + + /* LHS and RHS must be records */ + if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record()) + return false; + + /* RHS must be a buffer-backed variable. This is what can cause the problem + * since it would lead to a series of loads that need to live until we + * see the writes to the LHS. + */ + ir_variable *rhs_var = ir->rhs->variable_referenced(); + if (!rhs_var || !is_buffer_backed_variable(rhs_var)) + return false; + + /* Split the struct copy into individual element copies to reduce + * register pressure + */ + ir_dereference *rhs_deref = ir->rhs->as_dereference(); + if (!rhs_deref) + return false; + + ir_dereference *lhs_deref = ir->lhs->as_dereference(); + if (!lhs_deref) + return false; + + assert(lhs_deref->type->record_compare(rhs_deref->type)); + void *mem_ctx = ralloc_parent(shader->ir); + + for (unsigned i = 0; i < lhs_deref->type->length; i++) { + const char *field_name = lhs_deref->type->fields.structure[i].name; + ir_dereference *lhs_field = + new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL), + field_name); + ir_dereference *rhs_field = + new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL), + field_name); + ir->insert_after(assign(lhs_field, rhs_field)); + } + + ir->remove(); + progress = true; + return true; +} + +ir_visitor_status +lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) +{ + /* Array and struct copies could involve large amounts of load/store + * operations. To improve register pressure we want to special-case + * these and split them into individual element copies. + * This way we avoid emitting all the loads for the RHS first and + * all the writes for the LHS second and register usage is more + * efficient. + */ + if (check_for_buffer_array_copy(ir)) + return visit_continue_with_parent; + + if (check_for_buffer_struct_copy(ir)) + return visit_continue_with_parent; + + check_ssbo_unsized_array_length_assignment(ir); + check_for_ssbo_store(ir); + return rvalue_visit(ir); +} + +/* Lowers the intrinsic call to a new internal intrinsic that swaps the + * access to the buffer variable in the first parameter by an offset + * and block index. This involves creating the new internal intrinsic + * (i.e. the new function signature). + */ +ir_call * +lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) +{ + /* SSBO atomics usually have 2 parameters, the buffer variable and an + * integer argument. The exception is CompSwap, that has an additional + * integer parameter. + */ + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* First argument must be a scalar integer buffer variable */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + assert(inst->ir_type == ir_type_dereference_variable || + inst->ir_type == ir_type_dereference_array || + inst->ir_type == ir_type_dereference_record || + inst->ir_type == ir_type_swizzle); + + ir_rvalue *deref = (ir_rvalue *) inst; + assert(deref->type->is_scalar() && deref->type->is_integer()); + + ir_variable *var = deref->variable_referenced(); + assert(var); + + /* Compute the offset to the start if the dereference and the + * block index + */ + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset; + bool row_major; + int matrix_columns; + unsigned packing = var->get_interface_type()->interface_packing; + + this->buffer_access_type = ssbo_atomic_access; + + setup_for_load_or_store(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, + packing); + assert(offset); + assert(!row_major); + assert(matrix_columns == 1); + + ir_rvalue *deref_offset = + add(offset, new(mem_ctx) ir_constant(const_offset)); + ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL); + + /* Create the new internal function signature that will take a block + * index and offset instead of a buffer variable + */ + exec_list sig_params; + ir_variable *sig_param = new(mem_ctx) + ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); + sig_params.push_tail(sig_param); + + sig_param = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(sig_param); + + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? + glsl_type::int_type : glsl_type::uint_type; + sig_param = new(mem_ctx) + ir_variable(type, "data1", ir_var_function_in); + sig_params.push_tail(sig_param); + + if (param_count == 3) { + sig_param = new(mem_ctx) + ir_variable(type, "data2", ir_var_function_in); + sig_params.push_tail(sig_param); + } + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(deref->type, + shader_storage_buffer_object); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + char func_name[64]; + sprintf(func_name, "%s_ssbo", ir->callee_name()); + ir_function *f = new(mem_ctx) ir_function(func_name); + f->add_signature(sig); + + /* Now, create the call to the internal intrinsic */ + exec_list call_params; + call_params.push_tail(block_index); + call_params.push_tail(deref_offset); + param = ir->actual_parameters.get_head()->get_next(); + ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + if (param_count == 3) { + param = param->get_next(); + param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + } + ir_dereference_variable *return_deref = + ir->return_deref->clone(mem_ctx, NULL); + return new(mem_ctx) ir_call(sig, return_deref, &call_params); +} + +ir_call * +lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) +{ + exec_list& params = ir->actual_parameters; + + if (params.length() < 2 || params.length() > 3) + return ir; + + ir_rvalue *rvalue = + ((ir_instruction *) params.get_head())->as_rvalue(); + if (!rvalue) + return ir; + + ir_variable *var = rvalue->variable_referenced(); + if (!var || !var->is_in_shader_storage_block()) + return ir; + + const char *callee = ir->callee_name(); + if (!strcmp("__intrinsic_atomic_add", callee) || + !strcmp("__intrinsic_atomic_min", callee) || + !strcmp("__intrinsic_atomic_max", callee) || + !strcmp("__intrinsic_atomic_and", callee) || + !strcmp("__intrinsic_atomic_or", callee) || + !strcmp("__intrinsic_atomic_xor", callee) || + !strcmp("__intrinsic_atomic_exchange", callee) || + !strcmp("__intrinsic_atomic_comp_swap", callee)) { + return lower_ssbo_atomic_intrinsic(ir); + } + + return ir; +} + + +ir_visitor_status +lower_ubo_reference_visitor::visit_enter(ir_call *ir) +{ + ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir); + if (new_ir != ir) { + progress = true; + base_ir->replace_with(new_ir); + return visit_continue_with_parent; + } + + return rvalue_visit(ir); +} + + +} /* unnamed namespace */ + +void +lower_ubo_reference(struct gl_shader *shader) +{ + lower_ubo_reference_visitor v(shader); + + /* Loop over the instructions lowering references, because we take + * a deref of a UBO array using a UBO dereference as the index will + * produce a collection of instructions all of which have cloned + * UBO dereferences for that array index. + */ + do { + v.progress = false; + visit_list_elements(&v, shader->ir); + } while (v.progress); +} diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp new file mode 100644 index 0000000..278d545 --- /dev/null +++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp @@ -0,0 +1,585 @@ +/* + * Copyright © 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_variable_index_to_cond_assign.cpp + * + * Turns non-constant indexing into array types to a series of + * conditional moves of each element into a temporary. + * + * Pre-DX10 GPUs often don't have a native way to do this operation, + * and this works around that. + * + * The lowering process proceeds as follows. Each non-constant index + * found in an r-value is converted to a canonical form \c array[i]. Each + * element of the array is conditionally assigned to a temporary by comparing + * \c i to a constant index. This is done by cloning the canonical form and + * replacing all occurances of \c i with a constant. Each remaining occurance + * of the canonical form in the IR is replaced with a dereference of the + * temporary variable. + * + * L-values with non-constant indices are handled similarly. In this case, + * the RHS of the assignment is assigned to a temporary. The non-constant + * index is replace with the canonical form (just like for r-values). The + * temporary is conditionally assigned to each element of the canonical form + * by comparing \c i with each index. The same clone-and-replace scheme is + * used. + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" +#include "main/macros.h" + +/** + * Generate a comparison value for a block of indices + * + * Lowering passes for non-constant indexing of arrays, matrices, or vectors + * can use this to generate blocks of index comparison values. + * + * \param instructions List where new instructions will be appended + * \param index \c ir_variable containing the desired index + * \param base Base value for this block of comparisons + * \param components Number of unique index values to compare. This must + * be on the range [1, 4]. + * \param mem_ctx ralloc memory context to be used for all allocations. + * + * \returns + * An \c ir_rvalue that \b must be cloned for each use in conditional + * assignments, etc. + */ +ir_rvalue * +compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, void *mem_ctx) +{ + ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index); + + assert(index->type->is_scalar()); + assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT); + assert(components >= 1 && components <= 4); + + if (components > 1) { + const ir_swizzle_mask m = { 0, 0, 0, 0, components, false }; + broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m); + } + + /* Compare the desired index value with the next block of four indices. + */ + ir_constant_data test_indices_data; + memset(&test_indices_data, 0, sizeof(test_indices_data)); + test_indices_data.i[0] = base; + test_indices_data.i[1] = base + 1; + test_indices_data.i[2] = base + 2; + test_indices_data.i[3] = base + 3; + + ir_constant *const test_indices = + new(mem_ctx) ir_constant(broadcast_index->type, + &test_indices_data); + + ir_rvalue *const condition_val = + new(mem_ctx) ir_expression(ir_binop_equal, + glsl_type::bvec(components), + broadcast_index, + test_indices); + + ir_variable *const condition = + new(mem_ctx) ir_variable(condition_val->type, + "dereference_condition", + ir_var_temporary); + instructions->push_tail(condition); + + ir_rvalue *const cond_deref = + new(mem_ctx) ir_dereference_variable(condition); + instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0)); + + return cond_deref; +} + +static inline bool +is_array_or_matrix(const ir_rvalue *ir) +{ + return (ir->type->is_array() || ir->type->is_matrix()); +} + +namespace { +/** + * Replace a dereference of a variable with a specified r-value + * + * Each time a dereference of the specified value is replaced, the r-value + * tree is cloned. + */ +class deref_replacer : public ir_rvalue_visitor { +public: + deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value) + : variable_to_replace(variable_to_replace), value(value), + progress(false) + { + assert(this->variable_to_replace != NULL); + assert(this->value != NULL); + } + + virtual void handle_rvalue(ir_rvalue **rvalue) + { + ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); + + if ((dv != NULL) && (dv->var == this->variable_to_replace)) { + this->progress = true; + *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); + } + } + + const ir_variable *variable_to_replace; + ir_rvalue *value; + bool progress; +}; + +/** + * Find a variable index dereference of an array in an rvalue tree + */ +class find_variable_index : public ir_hierarchical_visitor { +public: + find_variable_index() + : deref(NULL) + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *ir) + { + if (is_array_or_matrix(ir->array) + && (ir->array_index->as_constant() == NULL)) { + this->deref = ir; + return visit_stop; + } + + return visit_continue; + } + + /** + * First array dereference found in the tree that has a non-constant index. + */ + ir_dereference_array *deref; +}; + +struct assignment_generator +{ + ir_instruction* base_ir; + ir_dereference *rvalue; + ir_variable *old_index; + bool is_write; + unsigned int write_mask; + ir_variable* var; + + assignment_generator() + : base_ir(NULL), + rvalue(NULL), + old_index(NULL), + is_write(false), + write_mask(0), + var(NULL) + { + } + + void generate(unsigned i, ir_rvalue* condition, exec_list *list) const + { + /* Just clone the rest of the deref chain when trying to get at the + * underlying variable. + */ + void *mem_ctx = ralloc_parent(base_ir); + + /* Clone the old r-value in its entirety. Then replace any occurances of + * the old variable index with the new constant index. + */ + ir_dereference *element = this->rvalue->clone(mem_ctx, NULL); + ir_constant *const index = new(mem_ctx) ir_constant(i); + deref_replacer r(this->old_index, index); + element->accept(&r); + assert(r.progress); + + /* Generate a conditional assignment to (or from) the constant indexed + * array dereference. + */ + ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); + ir_assignment *const assignment = (is_write) + ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask) + : new(mem_ctx) ir_assignment(variable, element, condition); + + list->push_tail(assignment); + } +}; + +struct switch_generator +{ + /* make TFunction a template parameter if you need to use other generators */ + typedef assignment_generator TFunction; + const TFunction& generator; + + ir_variable* index; + unsigned linear_sequence_max_length; + unsigned condition_components; + + void *mem_ctx; + + switch_generator(const TFunction& generator, ir_variable *index, + unsigned linear_sequence_max_length, + unsigned condition_components) + : generator(generator), index(index), + linear_sequence_max_length(linear_sequence_max_length), + condition_components(condition_components) + { + this->mem_ctx = ralloc_parent(index); + } + + void linear_sequence(unsigned begin, unsigned end, exec_list *list) + { + if (begin == end) + return; + + /* If the array access is a read, read the first element of this subregion + * unconditionally. The remaining tests will possibly overwrite this + * value with one of the other array elements. + * + * This optimization cannot be done for writes because it will cause the + * first element of the subregion to be written possibly *in addition* to + * one of the other elements. + */ + unsigned first; + if (!this->generator.is_write) { + this->generator.generate(begin, 0, list); + first = begin + 1; + } else { + first = begin; + } + + for (unsigned i = first; i < end; i += 4) { + const unsigned comps = MIN2(condition_components, end - i); + + ir_rvalue *const cond_deref = + compare_index_block(list, index, i, comps, this->mem_ctx); + + if (comps == 1) { + this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL), + list); + } else { + for (unsigned j = 0; j < comps; j++) { + ir_rvalue *const cond_swiz = + new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL), + j, 0, 0, 0, 1); + + this->generator.generate(i + j, cond_swiz, list); + } + } + } + } + + void bisect(unsigned begin, unsigned end, exec_list *list) + { + unsigned middle = (begin + end) >> 1; + + assert(index->type->is_integer()); + + ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT) + ? new(this->mem_ctx) ir_constant((unsigned)middle) + : new(this->mem_ctx) ir_constant((int)middle); + + + ir_dereference_variable *deref = + new(this->mem_ctx) ir_dereference_variable(this->index); + + ir_expression *less = + new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type, + deref, middle_c); + + ir_if *if_less = new(this->mem_ctx) ir_if(less); + + generate(begin, middle, &if_less->then_instructions); + generate(middle, end, &if_less->else_instructions); + + list->push_tail(if_less); + } + + void generate(unsigned begin, unsigned end, exec_list *list) + { + unsigned length = end - begin; + if (length <= this->linear_sequence_max_length) + return linear_sequence(begin, end, list); + else + return bisect(begin, end, list); + } +}; + +/** + * Visitor class for replacing expressions with ir_constant values. + */ + +class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor { +public: + variable_index_to_cond_assign_visitor(gl_shader_stage stage, + bool lower_input, + bool lower_output, + bool lower_temp, + bool lower_uniform) + { + this->progress = false; + this->stage = stage; + this->lower_inputs = lower_input; + this->lower_outputs = lower_output; + this->lower_temps = lower_temp; + this->lower_uniforms = lower_uniform; + } + + bool progress; + + gl_shader_stage stage; + bool lower_inputs; + bool lower_outputs; + bool lower_temps; + bool lower_uniforms; + + bool storage_type_needs_lowering(ir_dereference_array *deref) const + { + /* If a variable isn't eventually the target of this dereference, then + * it must be a constant or some sort of anonymous temporary storage. + * + * FINISHME: Is this correct? Most drivers treat arrays of constants as + * FINISHME: uniforms. It seems like this should do the same. + */ + const ir_variable *const var = deref->array->variable_referenced(); + if (var == NULL) + return this->lower_temps; + + switch (var->data.mode) { + case ir_var_auto: + case ir_var_temporary: + return this->lower_temps; + + case ir_var_uniform: + case ir_var_shader_storage: + return this->lower_uniforms; + + case ir_var_shader_shared: + return false; + + case ir_var_function_in: + case ir_var_const_in: + return this->lower_temps; + + case ir_var_shader_in: + /* The input array size is unknown at compiler time for non-patch + * inputs in TCS and TES. The arrays are sized to + * the implementation-dependent limit "gl_MaxPatchVertices", but + * the real size is stored in the "gl_PatchVerticesIn" built-in + * uniform. + * + * The TCS input array size is specified by + * glPatchParameteri(GL_PATCH_VERTICES). + * + * The TES input array size is specified by the "vertices" output + * layout qualifier in TCS. + */ + if ((stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL) && !var->data.patch) + return false; + return this->lower_inputs; + + case ir_var_function_out: + /* TCS non-patch outputs can only be indexed with "gl_InvocationID". + * Other expressions are not allowed. + */ + if (stage == MESA_SHADER_TESS_CTRL && !var->data.patch) + return false; + return this->lower_temps; + + case ir_var_shader_out: + return this->lower_outputs; + + case ir_var_function_inout: + return this->lower_temps; + } + + assert(!"Should not get here."); + return false; + } + + bool needs_lowering(ir_dereference_array *deref) const + { + if (deref == NULL || deref->array_index->as_constant() + || !is_array_or_matrix(deref->array)) + return false; + + return this->storage_type_needs_lowering(deref); + } + + ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, + ir_assignment* orig_assign, + ir_dereference *orig_base) + { + assert(is_array_or_matrix(orig_deref->array)); + + const unsigned length = (orig_deref->array->type->is_array()) + ? orig_deref->array->type->length + : orig_deref->array->type->matrix_columns; + + void *const mem_ctx = ralloc_parent(base_ir); + + /* Temporary storage for either the result of the dereference of + * the array, or the RHS that's being assigned into the + * dereference of the array. + */ + ir_variable *var; + + if (orig_assign) { + var = new(mem_ctx) ir_variable(orig_assign->rhs->type, + "dereference_array_value", + ir_var_temporary); + base_ir->insert_before(var); + + ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var); + ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, + orig_assign->rhs, + NULL); + + base_ir->insert_before(assign); + } else { + var = new(mem_ctx) ir_variable(orig_deref->type, + "dereference_array_value", + ir_var_temporary); + base_ir->insert_before(var); + } + + /* Store the index to a temporary to avoid reusing its tree. */ + ir_variable *index = + new(mem_ctx) ir_variable(orig_deref->array_index->type, + "dereference_array_index", ir_var_temporary); + base_ir->insert_before(index); + + ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index); + ir_assignment *assign = + new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL); + base_ir->insert_before(assign); + + orig_deref->array_index = lhs->clone(mem_ctx, NULL); + + assignment_generator ag; + ag.rvalue = orig_base; + ag.base_ir = base_ir; + ag.old_index = index; + ag.var = var; + if (orig_assign) { + ag.is_write = true; + ag.write_mask = orig_assign->write_mask; + } else { + ag.is_write = false; + } + + switch_generator sg(ag, index, 4, 4); + + /* If the original assignment has a condition, respect that original + * condition! This is acomplished by wrapping the new conditional + * assignments in an if-statement that uses the original condition. + */ + if ((orig_assign != NULL) && (orig_assign->condition != NULL)) { + /* No need to clone the condition because the IR that it hangs on is + * going to be removed from the instruction sequence. + */ + ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition); + + sg.generate(0, length, &if_stmt->then_instructions); + base_ir->insert_before(if_stmt); + } else { + exec_list list; + + sg.generate(0, length, &list); + base_ir->insert_before(&list); + } + + return var; + } + + virtual void handle_rvalue(ir_rvalue **pir) + { + if (this->in_assignee) + return; + + if (!*pir) + return; + + ir_dereference_array* orig_deref = (*pir)->as_dereference_array(); + if (needs_lowering(orig_deref)) { + ir_variable *var = + convert_dereference_array(orig_deref, NULL, orig_deref); + assert(var); + *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var); + this->progress = true; + } + } + + ir_visitor_status + visit_leave(ir_assignment *ir) + { + ir_rvalue_visitor::visit_leave(ir); + + find_variable_index f; + ir->lhs->accept(&f); + + if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) { + convert_dereference_array(f.deref, ir, ir->lhs); + ir->remove(); + this->progress = true; + } + + return visit_continue; + } +}; + +} /* anonymous namespace */ + +bool +lower_variable_index_to_cond_assign(gl_shader_stage stage, + exec_list *instructions, + bool lower_input, + bool lower_output, + bool lower_temp, + bool lower_uniform) +{ + variable_index_to_cond_assign_visitor v(stage, + lower_input, + lower_output, + lower_temp, + lower_uniform); + + /* Continue lowering until no progress is made. If there are multiple + * levels of indirection (e.g., non-constant indexing of array elements and + * matrix columns of an array of matrix), each pass will only lower one + * level of indirection. + */ + bool progress_ever = false; + do { + v.progress = false; + visit_list_elements(&v, instructions); + progress_ever = v.progress || progress_ever; + } while (v.progress); + + return progress_ever; +} diff --git a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp new file mode 100644 index 0000000..784db08 --- /dev/null +++ b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp @@ -0,0 +1,239 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_vec_index_to_cond_assign.cpp + * + * Turns indexing into vector types to a series of conditional moves + * of each channel's swizzle into a temporary. + * + * Most GPUs don't have a native way to do this operation, and this + * works around that. For drivers using both this pass and + * ir_vec_index_to_swizzle, there's a risk that this pass will happen + * before sufficient constant folding to find that the array index is + * constant. However, we hope that other optimization passes, + * particularly constant folding of assignment conditions and copy + * propagation, will result in the same code in the end. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +namespace { + +/** + * Visitor class for replacing expressions with ir_constant values. + */ + +class ir_vec_index_to_cond_assign_visitor : public ir_hierarchical_visitor { +public: + ir_vec_index_to_cond_assign_visitor() + { + progress = false; + } + + ir_rvalue *convert_vec_index_to_cond_assign(void *mem_ctx, + ir_rvalue *orig_vector, + ir_rvalue *orig_index, + const glsl_type *type); + + ir_rvalue *convert_vector_extract_to_cond_assign(ir_rvalue *ir); + + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_leave(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_return *); + virtual ir_visitor_status visit_enter(ir_call *); + virtual ir_visitor_status visit_enter(ir_if *); + + bool progress; +}; + +} /* anonymous namespace */ + +ir_rvalue * +ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ctx, + ir_rvalue *orig_vector, + ir_rvalue *orig_index, + const glsl_type *type) +{ + ir_assignment *assign, *value_assign; + ir_variable *index, *var, *value; + ir_dereference *deref, *deref_value; + unsigned i; + + + exec_list list; + + /* Store the index to a temporary to avoid reusing its tree. */ + assert(orig_index->type == glsl_type::int_type || + orig_index->type == glsl_type::uint_type); + index = new(base_ir) ir_variable(orig_index->type, + "vec_index_tmp_i", + ir_var_temporary); + list.push_tail(index); + deref = new(base_ir) ir_dereference_variable(index); + assign = new(base_ir) ir_assignment(deref, orig_index, NULL); + list.push_tail(assign); + + /* Store the value inside a temp, thus avoiding matrixes duplication */ + value = new(base_ir) ir_variable(orig_vector->type, "vec_value_tmp", + ir_var_temporary); + list.push_tail(value); + deref_value = new(base_ir) ir_dereference_variable(value); + value_assign = new(base_ir) ir_assignment(deref_value, orig_vector); + list.push_tail(value_assign); + + /* Temporary where we store whichever value we swizzle out. */ + var = new(base_ir) ir_variable(type, "vec_index_tmp_v", + ir_var_temporary); + list.push_tail(var); + + /* Generate a single comparison condition "mask" for all of the components + * in the vector. + */ + ir_rvalue *const cond_deref = + compare_index_block(&list, index, 0, + orig_vector->type->vector_elements, + mem_ctx); + + /* Generate a conditional move of each vector element to the temp. */ + for (i = 0; i < orig_vector->type->vector_elements; i++) { + ir_rvalue *condition_swizzle = + new(base_ir) ir_swizzle(cond_deref->clone(mem_ctx, NULL), + i, 0, 0, 0, 1); + + /* Just clone the rest of the deref chain when trying to get at the + * underlying variable. + */ + ir_rvalue *swizzle = + new(base_ir) ir_swizzle(deref_value->clone(mem_ctx, NULL), + i, 0, 0, 0, 1); + + deref = new(base_ir) ir_dereference_variable(var); + assign = new(base_ir) ir_assignment(deref, swizzle, condition_swizzle); + list.push_tail(assign); + } + + /* Put all of the new instructions in the IR stream before the old + * instruction. + */ + base_ir->insert_before(&list); + + this->progress = true; + return new(base_ir) ir_dereference_variable(var); +} + +ir_rvalue * +ir_vec_index_to_cond_assign_visitor::convert_vector_extract_to_cond_assign(ir_rvalue *ir) +{ + ir_expression *const expr = ir->as_expression(); + + if (expr == NULL || expr->operation != ir_binop_vector_extract) + return ir; + + return convert_vec_index_to_cond_assign(ralloc_parent(ir), + expr->operands[0], + expr->operands[1], + ir->type); +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_enter(ir_expression *ir) +{ + unsigned int i; + + for (i = 0; i < ir->get_num_operands(); i++) { + ir->operands[i] = convert_vector_extract_to_cond_assign(ir->operands[i]); + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_enter(ir_swizzle *ir) +{ + /* Can't be hit from normal GLSL, since you can't swizzle a scalar (which + * the result of indexing a vector is. But maybe at some point we'll end up + * using swizzling of scalars for vector construction. + */ + ir->val = convert_vector_extract_to_cond_assign(ir->val); + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir) +{ + ir->rhs = convert_vector_extract_to_cond_assign(ir->rhs); + + if (ir->condition) { + ir->condition = convert_vector_extract_to_cond_assign(ir->condition); + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_enter(ir_call *ir) +{ + foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { + ir_rvalue *new_param = convert_vector_extract_to_cond_assign(param); + + if (new_param != param) { + param->replace_with(new_param); + } + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_enter(ir_return *ir) +{ + if (ir->value) { + ir->value = convert_vector_extract_to_cond_assign(ir->value); + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_enter(ir_if *ir) +{ + ir->condition = convert_vector_extract_to_cond_assign(ir->condition); + + return visit_continue; +} + +bool +do_vec_index_to_cond_assign(exec_list *instructions) +{ + ir_vec_index_to_cond_assign_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_vec_index_to_swizzle.cpp b/src/compiler/glsl/lower_vec_index_to_swizzle.cpp new file mode 100644 index 0000000..8b18e95 --- /dev/null +++ b/src/compiler/glsl/lower_vec_index_to_swizzle.cpp @@ -0,0 +1,171 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_vec_index_to_swizzle.cpp + * + * Turns constant indexing into vector types to swizzles. This will + * let other swizzle-aware optimization passes catch these constructs, + * and codegen backends not have to worry about this case. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" +#include "main/macros.h" + +/** + * Visitor class for replacing expressions with ir_constant values. + */ + +namespace { + +class ir_vec_index_to_swizzle_visitor : public ir_hierarchical_visitor { +public: + ir_vec_index_to_swizzle_visitor() + { + progress = false; + } + + ir_rvalue *convert_vector_extract_to_swizzle(ir_rvalue *val); + + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_return *); + virtual ir_visitor_status visit_enter(ir_call *); + virtual ir_visitor_status visit_enter(ir_if *); + + bool progress; +}; + +} /* anonymous namespace */ + +ir_rvalue * +ir_vec_index_to_swizzle_visitor::convert_vector_extract_to_swizzle(ir_rvalue *ir) +{ + ir_expression *const expr = ir->as_expression(); + if (expr == NULL || expr->operation != ir_binop_vector_extract) + return ir; + + ir_constant *const idx = expr->operands[1]->constant_expression_value(); + if (idx == NULL) + return ir; + + void *ctx = ralloc_parent(ir); + this->progress = true; + + /* Page 40 of the GLSL 1.20 spec says: + * + * "When indexing with non-constant expressions, behavior is undefined + * if the index is negative, or greater than or equal to the size of + * the vector." + * + * The quoted spec text mentions non-constant expressions, but this code + * operates on constants. These constants are the result of non-constant + * expressions that have been optimized to constants. The common case here + * is a loop counter from an unrolled loop that is used to index a vector. + * + * The ir_swizzle constructor gets angry if the index is negative or too + * large. For simplicity sake, just clamp the index to [0, size-1]. + */ + const int i = CLAMP(idx->value.i[0], 0, + (int) expr->operands[0]->type->vector_elements - 1); + + return new(ctx) ir_swizzle(expr->operands[0], i, 0, 0, 0, 1); +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_expression *ir) +{ + unsigned int i; + + for (i = 0; i < ir->get_num_operands(); i++) { + ir->operands[i] = convert_vector_extract_to_swizzle(ir->operands[i]); + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_swizzle *ir) +{ + /* Can't be hit from normal GLSL, since you can't swizzle a scalar (which + * the result of indexing a vector is. But maybe at some point we'll end up + * using swizzling of scalars for vector construction. + */ + ir->val = convert_vector_extract_to_swizzle(ir->val); + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_assignment *ir) +{ + ir->rhs = convert_vector_extract_to_swizzle(ir->rhs); + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_call *ir) +{ + foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { + ir_rvalue *new_param = convert_vector_extract_to_swizzle(param); + + if (new_param != param) { + param->replace_with(new_param); + } + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_return *ir) +{ + if (ir->value) { + ir->value = convert_vector_extract_to_swizzle(ir->value); + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_if *ir) +{ + ir->condition = convert_vector_extract_to_swizzle(ir->condition); + + return visit_continue; +} + +bool +do_vec_index_to_swizzle(exec_list *instructions) +{ + ir_vec_index_to_swizzle_visitor v; + + v.run(instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_vector.cpp b/src/compiler/glsl/lower_vector.cpp new file mode 100644 index 0000000..a658410 --- /dev/null +++ b/src/compiler/glsl/lower_vector.cpp @@ -0,0 +1,228 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_vector.cpp + * IR lowering pass to remove some types of ir_quadop_vector + * + * \author Ian Romanick <ian.d.romanick@intel.com> + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" + +namespace { + +class lower_vector_visitor : public ir_rvalue_visitor { +public: + lower_vector_visitor() : dont_lower_swz(false), progress(false) + { + /* empty */ + } + + void handle_rvalue(ir_rvalue **rvalue); + + /** + * Should SWZ-like expressions be lowered? + */ + bool dont_lower_swz; + + bool progress; +}; + +} /* anonymous namespace */ + +/** + * Determine if an IR expression tree looks like an extended swizzle + * + * Extended swizzles consist of access of a single vector source (with possible + * per component negation) and the constants -1, 0, or 1. + */ +bool +is_extended_swizzle(ir_expression *ir) +{ + /* Track any variables that are accessed by this expression. + */ + ir_variable *var = NULL; + + assert(ir->operation == ir_quadop_vector); + + for (unsigned i = 0; i < ir->type->vector_elements; i++) { + ir_rvalue *op = ir->operands[i]; + + while (op != NULL) { + switch (op->ir_type) { + case ir_type_constant: { + const ir_constant *const c = op->as_constant(); + + if (!c->is_one() && !c->is_zero() && !c->is_negative_one()) + return false; + + op = NULL; + break; + } + + case ir_type_dereference_variable: { + ir_dereference_variable *const d = (ir_dereference_variable *) op; + + if ((var != NULL) && (var != d->var)) + return false; + + var = d->var; + op = NULL; + break; + } + + case ir_type_expression: { + ir_expression *const ex = (ir_expression *) op; + + if (ex->operation != ir_unop_neg) + return false; + + op = ex->operands[0]; + break; + } + + case ir_type_swizzle: + op = ((ir_swizzle *) op)->val; + break; + + default: + return false; + } + } + } + + return true; +} + +void +lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if ((expr == NULL) || (expr->operation != ir_quadop_vector)) + return; + + if (this->dont_lower_swz && is_extended_swizzle(expr)) + return; + + /* FINISHME: Is this the right thing to use for the ralloc context? + */ + void *const mem_ctx = expr; + + assert(expr->type->vector_elements == expr->get_num_operands()); + + /* Generate a temporary with the same type as the ir_quadop_operation. + */ + ir_variable *const temp = + new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary); + + this->base_ir->insert_before(temp); + + /* Counter of the number of components collected so far. + */ + unsigned assigned; + + /* Write-mask in the destination that receives counted by 'assigned'. + */ + unsigned write_mask; + + + /* Generate upto four assignments to that variable. Try to group component + * assignments together: + * + * - All constant components can be assigned at once. + * - All assigments of components from a single variable with the same + * unary operator can be assigned at once. + */ + ir_constant_data d = { { 0 } }; + + assigned = 0; + write_mask = 0; + for (unsigned i = 0; i < expr->type->vector_elements; i++) { + const ir_constant *const c = expr->operands[i]->as_constant(); + + if (c == NULL) + continue; + + switch (expr->type->base_type) { + case GLSL_TYPE_UINT: d.u[assigned] = c->value.u[0]; break; + case GLSL_TYPE_INT: d.i[assigned] = c->value.i[0]; break; + case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break; + case GLSL_TYPE_BOOL: d.b[assigned] = c->value.b[0]; break; + default: assert(!"Should not get here."); break; + } + + write_mask |= (1U << i); + assigned++; + } + + assert((write_mask == 0) == (assigned == 0)); + + /* If there were constant values, generate an assignment. + */ + if (assigned > 0) { + ir_constant *const c = + new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type, + assigned, 1), + &d); + ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); + ir_assignment *const assign = + new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask); + + this->base_ir->insert_before(assign); + } + + /* FINISHME: This should try to coalesce assignments. + */ + for (unsigned i = 0; i < expr->type->vector_elements; i++) { + if (expr->operands[i]->ir_type == ir_type_constant) + continue; + + ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); + ir_assignment *const assign = + new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i)); + + this->base_ir->insert_before(assign); + assigned++; + } + + assert(assigned == expr->type->vector_elements); + + *rvalue = new(mem_ctx) ir_dereference_variable(temp); + this->progress = true; +} + +bool +lower_quadop_vector(exec_list *instructions, bool dont_lower_swz) +{ + lower_vector_visitor v; + + v.dont_lower_swz = dont_lower_swz; + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_vector_derefs.cpp b/src/compiler/glsl/lower_vector_derefs.cpp new file mode 100644 index 0000000..4a5d6f0 --- /dev/null +++ b/src/compiler/glsl/lower_vector_derefs.cpp @@ -0,0 +1,104 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "ir.h" +#include "ir_builder.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" + +using namespace ir_builder; + +namespace { + +class vector_deref_visitor : public ir_rvalue_enter_visitor { +public: + vector_deref_visitor() + : progress(false) + { + } + + virtual ~vector_deref_visitor() + { + } + + virtual void handle_rvalue(ir_rvalue **rv); + virtual ir_visitor_status visit_enter(ir_assignment *ir); + + bool progress; +}; + +} /* anonymous namespace */ + +ir_visitor_status +vector_deref_visitor::visit_enter(ir_assignment *ir) +{ + if (!ir->lhs || ir->lhs->ir_type != ir_type_dereference_array) + return ir_rvalue_enter_visitor::visit_enter(ir); + + ir_dereference_array *const deref = (ir_dereference_array *) ir->lhs; + if (!deref->array->type->is_vector()) + return ir_rvalue_enter_visitor::visit_enter(ir); + + ir_dereference *const new_lhs = (ir_dereference *) deref->array; + ir->set_lhs(new_lhs); + + ir_constant *old_index_constant = deref->array_index->constant_expression_value(); + void *mem_ctx = ralloc_parent(ir); + if (!old_index_constant) { + ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, + new_lhs->type, + new_lhs->clone(mem_ctx, NULL), + ir->rhs, + deref->array_index); + ir->write_mask = (1 << new_lhs->type->vector_elements) - 1; + } else { + ir->write_mask = 1 << old_index_constant->get_int_component(0); + } + + return ir_rvalue_enter_visitor::visit_enter(ir); +} + +void +vector_deref_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL || (*rv)->ir_type != ir_type_dereference_array) + return; + + ir_dereference_array *const deref = (ir_dereference_array *) *rv; + if (!deref->array->type->is_vector()) + return; + + void *mem_ctx = ralloc_parent(deref); + *rv = new(mem_ctx) ir_expression(ir_binop_vector_extract, + deref->array, + deref->array_index); +} + +bool +lower_vector_derefs(gl_shader *shader) +{ + vector_deref_visitor v; + + visit_list_elements(&v, shader->ir); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_vector_insert.cpp b/src/compiler/glsl/lower_vector_insert.cpp new file mode 100644 index 0000000..26d31b0 --- /dev/null +++ b/src/compiler/glsl/lower_vector_insert.cpp @@ -0,0 +1,146 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "ir.h" +#include "ir_builder.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" + +using namespace ir_builder; + +namespace { + +class vector_insert_visitor : public ir_rvalue_visitor { +public: + vector_insert_visitor(bool lower_nonconstant_index) + : progress(false), lower_nonconstant_index(lower_nonconstant_index) + { + factory.instructions = &factory_instructions; + } + + virtual ~vector_insert_visitor() + { + assert(factory_instructions.is_empty()); + } + + virtual void handle_rvalue(ir_rvalue **rv); + + ir_factory factory; + exec_list factory_instructions; + bool progress; + bool lower_nonconstant_index; +}; + +} /* anonymous namespace */ + +void +vector_insert_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL || (*rv)->ir_type != ir_type_expression) + return; + + ir_expression *const expr = (ir_expression *) *rv; + + if (likely(expr->operation != ir_triop_vector_insert)) + return; + + factory.mem_ctx = ralloc_parent(expr); + + ir_constant *const idx = expr->operands[2]->constant_expression_value(); + if (idx != NULL) { + /* Replace (vector_insert (vec) (scalar) (index)) with a dereference of + * a new temporary. The new temporary gets assigned as + * + * t = vec + * t.mask = scalar + * + * where mask is the component selected by index. + */ + ir_variable *const temp = + factory.make_temp(expr->operands[0]->type, "vec_tmp"); + + const int mask = 1 << idx->value.i[0]; + + factory.emit(assign(temp, expr->operands[0])); + factory.emit(assign(temp, expr->operands[1], mask)); + + this->progress = true; + *rv = new(factory.mem_ctx) ir_dereference_variable(temp); + } else if (this->lower_nonconstant_index) { + /* Replace (vector_insert (vec) (scalar) (index)) with a dereference of + * a new temporary. The new temporary gets assigned as + * + * t = vec + * if (index == 0) + * t.x = scalar + * if (index == 1) + * t.y = scalar + * if (index == 2) + * t.z = scalar + * if (index == 3) + * t.w = scalar + */ + ir_variable *const temp = + factory.make_temp(expr->operands[0]->type, "vec_tmp"); + + ir_variable *const src_temp = + factory.make_temp(expr->operands[1]->type, "src_temp"); + + factory.emit(assign(temp, expr->operands[0])); + factory.emit(assign(src_temp, expr->operands[1])); + + assert(expr->operands[2]->type == glsl_type::int_type || + expr->operands[2]->type == glsl_type::uint_type); + + for (unsigned i = 0; i < expr->type->vector_elements; i++) { + ir_constant *const cmp_index = + ir_constant::zero(factory.mem_ctx, expr->operands[2]->type); + cmp_index->value.u[0] = i; + + ir_variable *const cmp_result = + factory.make_temp(glsl_type::bool_type, "index_condition"); + + factory.emit(assign(cmp_result, + equal(expr->operands[2]->clone(factory.mem_ctx, + NULL), + cmp_index))); + + factory.emit(if_tree(cmp_result, + assign(temp, src_temp, WRITEMASK_X << i))); + } + + this->progress = true; + *rv = new(factory.mem_ctx) ir_dereference_variable(temp); + } + + base_ir->insert_before(factory.instructions); +} + +bool +lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index) +{ + vector_insert_visitor v(lower_nonconstant_index); + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_vertex_id.cpp b/src/compiler/glsl/lower_vertex_id.cpp new file mode 100644 index 0000000..3da7a2f --- /dev/null +++ b/src/compiler/glsl/lower_vertex_id.cpp @@ -0,0 +1,144 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_vertex_id.cpp + * + * There exists hardware, such as i965, that does not implement the OpenGL + * semantic for gl_VertexID. Instead, that hardware does not include the + * value of basevertex in the gl_VertexID value. To implement the OpenGL + * semantic, we'll have to convert gl_Vertex_ID to + * gl_VertexIDMESA+gl_BaseVertexMESA. + */ + +#include "glsl_symbol_table.h" +#include "ir_hierarchical_visitor.h" +#include "ir.h" +#include "ir_builder.h" +#include "linker.h" +#include "program/prog_statevars.h" + +namespace { + +class lower_vertex_id_visitor : public ir_hierarchical_visitor { +public: + explicit lower_vertex_id_visitor(ir_function_signature *main_sig, + exec_list *ir_list) + : progress(false), VertexID(NULL), gl_VertexID(NULL), + gl_BaseVertex(NULL), main_sig(main_sig), ir_list(ir_list) + { + foreach_in_list(ir_instruction, ir, ir_list) { + ir_variable *const var = ir->as_variable(); + + if (var != NULL && var->data.mode == ir_var_system_value && + var->data.location == SYSTEM_VALUE_BASE_VERTEX) { + gl_BaseVertex = var; + break; + } + } + } + + virtual ir_visitor_status visit(ir_dereference_variable *); + + bool progress; + +private: + ir_variable *VertexID; + ir_variable *gl_VertexID; + ir_variable *gl_BaseVertex; + + ir_function_signature *main_sig; + exec_list *ir_list; +}; + +} /* anonymous namespace */ + +ir_visitor_status +lower_vertex_id_visitor::visit(ir_dereference_variable *ir) +{ + if (ir->var->data.mode != ir_var_system_value || + ir->var->data.location != SYSTEM_VALUE_VERTEX_ID) + return visit_continue; + + if (VertexID == NULL) { + const glsl_type *const int_t = glsl_type::int_type; + void *const mem_ctx = ralloc_parent(ir); + + VertexID = new(mem_ctx) ir_variable(int_t, "__VertexID", + ir_var_temporary); + ir_list->push_head(VertexID); + + gl_VertexID = new(mem_ctx) ir_variable(int_t, "gl_VertexIDMESA", + ir_var_system_value); + gl_VertexID->data.how_declared = ir_var_declared_implicitly; + gl_VertexID->data.read_only = true; + gl_VertexID->data.location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + gl_VertexID->data.explicit_location = true; + gl_VertexID->data.explicit_index = 0; + ir_list->push_head(gl_VertexID); + + if (gl_BaseVertex == NULL) { + gl_BaseVertex = new(mem_ctx) ir_variable(int_t, "gl_BaseVertex", + ir_var_system_value); + gl_BaseVertex->data.how_declared = ir_var_declared_implicitly; + gl_BaseVertex->data.read_only = true; + gl_BaseVertex->data.location = SYSTEM_VALUE_BASE_VERTEX; + gl_BaseVertex->data.explicit_location = true; + gl_BaseVertex->data.explicit_index = 0; + ir_list->push_head(gl_BaseVertex); + } + + ir_instruction *const inst = + ir_builder::assign(VertexID, + ir_builder::add(gl_VertexID, gl_BaseVertex)); + + main_sig->body.push_head(inst); + } + + ir->var = VertexID; + progress = true; + + return visit_continue; +} + +bool +lower_vertex_id(gl_shader *shader) +{ + /* gl_VertexID only exists in the vertex shader. + */ + if (shader->Stage != MESA_SHADER_VERTEX) + return false; + + ir_function_signature *const main_sig = + _mesa_get_main_function_signature(shader); + if (main_sig == NULL) { + assert(main_sig != NULL); + return false; + } + + lower_vertex_id_visitor v(main_sig, shader->ir); + + v.run(shader->ir); + + return v.progress; +} diff --git a/src/compiler/glsl/main.cpp b/src/compiler/glsl/main.cpp new file mode 100644 index 0000000..df93a01 --- /dev/null +++ b/src/compiler/glsl/main.cpp @@ -0,0 +1,431 @@ +/* + * Copyright © 2008, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <getopt.h> + +/** @file main.cpp + * + * This file is the main() routine and scaffolding for producing + * builtin_compiler (which doesn't include builtins itself and is used + * to generate the profile information for builtin_function.cpp), and + * for glsl_compiler (which does include builtins and can be used to + * offline compile GLSL code and examine the resulting GLSL IR. + */ + +#include "ast.h" +#include "glsl_parser_extras.h" +#include "ir_optimization.h" +#include "program.h" +#include "program/hash_table.h" +#include "loop_analysis.h" +#include "standalone_scaffolding.h" + +static int glsl_version = 330; + +static void +initialize_context(struct gl_context *ctx, gl_api api) +{ + initialize_context_to_defaults(ctx, api); + + /* The standalone compiler needs to claim support for almost + * everything in order to compile the built-in functions. + */ + ctx->Const.GLSLVersion = glsl_version; + ctx->Extensions.ARB_ES3_compatibility = true; + ctx->Const.MaxComputeWorkGroupCount[0] = 65535; + ctx->Const.MaxComputeWorkGroupCount[1] = 65535; + ctx->Const.MaxComputeWorkGroupCount[2] = 65535; + ctx->Const.MaxComputeWorkGroupSize[0] = 1024; + ctx->Const.MaxComputeWorkGroupSize[1] = 1024; + ctx->Const.MaxComputeWorkGroupSize[2] = 64; + ctx->Const.MaxComputeWorkGroupInvocations = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */ + + switch (ctx->Const.GLSLVersion) { + case 100: + ctx->Const.MaxClipPlanes = 0; + ctx->Const.MaxCombinedTextureImageUnits = 8; + ctx->Const.MaxDrawBuffers = 2; + ctx->Const.MinProgramTexelOffset = 0; + ctx->Const.MaxProgramTexelOffset = 0; + ctx->Const.MaxLights = 0; + ctx->Const.MaxTextureCoordUnits = 0; + ctx->Const.MaxTextureUnits = 8; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 8; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 128 * 4; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = + ctx->Const.MaxCombinedTextureImageUnits; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 16 * 4; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ + + ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4; + break; + case 110: + case 120: + ctx->Const.MaxClipPlanes = 6; + ctx->Const.MaxCombinedTextureImageUnits = 2; + ctx->Const.MaxDrawBuffers = 1; + ctx->Const.MinProgramTexelOffset = 0; + ctx->Const.MaxProgramTexelOffset = 0; + ctx->Const.MaxLights = 8; + ctx->Const.MaxTextureCoordUnits = 2; + ctx->Const.MaxTextureUnits = 2; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = + ctx->Const.MaxCombinedTextureImageUnits; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ + + ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4; + break; + case 130: + case 140: + ctx->Const.MaxClipPlanes = 8; + ctx->Const.MaxCombinedTextureImageUnits = 16; + ctx->Const.MaxDrawBuffers = 8; + ctx->Const.MinProgramTexelOffset = -8; + ctx->Const.MaxProgramTexelOffset = 7; + ctx->Const.MaxLights = 8; + ctx->Const.MaxTextureCoordUnits = 8; + ctx->Const.MaxTextureUnits = 2; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ + + ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4; + break; + case 150: + case 330: + ctx->Const.MaxClipPlanes = 8; + ctx->Const.MaxDrawBuffers = 8; + ctx->Const.MinProgramTexelOffset = -8; + ctx->Const.MaxProgramTexelOffset = 7; + ctx->Const.MaxLights = 8; + ctx->Const.MaxTextureCoordUnits = 8; + ctx->Const.MaxTextureUnits = 2; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64; + + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ + + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits + + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + + ctx->Const.MaxGeometryOutputVertices = 256; + ctx->Const.MaxGeometryTotalOutputComponents = 1024; + + ctx->Const.MaxVarying = 60 / 4; + break; + case 300: + ctx->Const.MaxClipPlanes = 8; + ctx->Const.MaxCombinedTextureImageUnits = 32; + ctx->Const.MaxDrawBuffers = 4; + ctx->Const.MinProgramTexelOffset = -8; + ctx->Const.MaxProgramTexelOffset = 7; + ctx->Const.MaxLights = 0; + ctx->Const.MaxTextureCoordUnits = 0; + ctx->Const.MaxTextureUnits = 0; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 16 * 4; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 224; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 15 * 4; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ + + ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents / 4; + break; + } + + ctx->Const.GenerateTemporaryNames = true; + ctx->Const.MaxPatchVertices = 32; + + ctx->Driver.NewShader = _mesa_new_shader; +} + +/* Returned string will have 'ctx' as its ralloc owner. */ +static char * +load_text_file(void *ctx, const char *file_name) +{ + char *text = NULL; + size_t size; + size_t total_read = 0; + FILE *fp = fopen(file_name, "rb"); + + if (!fp) { + return NULL; + } + + fseek(fp, 0L, SEEK_END); + size = ftell(fp); + fseek(fp, 0L, SEEK_SET); + + text = (char *) ralloc_size(ctx, size + 1); + if (text != NULL) { + do { + size_t bytes = fread(text + total_read, + 1, size - total_read, fp); + if (bytes < size - total_read) { + free(text); + text = NULL; + goto error; + } + + if (bytes == 0) { + break; + } + + total_read += bytes; + } while (total_read < size); + + text[total_read] = '\0'; +error:; + } + + fclose(fp); + + return text; +} + +int dump_ast = 0; +int dump_hir = 0; +int dump_lir = 0; +int do_link = 0; + +const struct option compiler_opts[] = { + { "dump-ast", no_argument, &dump_ast, 1 }, + { "dump-hir", no_argument, &dump_hir, 1 }, + { "dump-lir", no_argument, &dump_lir, 1 }, + { "link", no_argument, &do_link, 1 }, + { "version", required_argument, NULL, 'v' }, + { NULL, 0, NULL, 0 } +}; + +/** + * \brief Print proper usage and exit with failure. + */ +void +usage_fail(const char *name) +{ + + const char *header = + "usage: %s [options] <file.vert | file.tesc | file.tese | file.geom | file.frag | file.comp>\n" + "\n" + "Possible options are:\n"; + printf(header, name); + for (const struct option *o = compiler_opts; o->name != 0; ++o) { + printf(" --%s\n", o->name); + } + exit(EXIT_FAILURE); +} + + +void +compile_shader(struct gl_context *ctx, struct gl_shader *shader) +{ + struct _mesa_glsl_parse_state *state = + new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader); + + _mesa_glsl_compile_shader(ctx, shader, dump_ast, dump_hir); + + /* Print out the resulting IR */ + if (!state->error && dump_lir) { + _mesa_print_ir(stdout, shader->ir, state); + } + + return; +} + +int +main(int argc, char **argv) +{ + int status = EXIT_SUCCESS; + struct gl_context local_ctx; + struct gl_context *ctx = &local_ctx; + bool glsl_es = false; + + int c; + int idx = 0; + while ((c = getopt_long(argc, argv, "", compiler_opts, &idx)) != -1) { + switch (c) { + case 'v': + glsl_version = strtol(optarg, NULL, 10); + switch (glsl_version) { + case 100: + case 300: + glsl_es = true; + break; + case 110: + case 120: + case 130: + case 140: + case 150: + case 330: + glsl_es = false; + break; + default: + fprintf(stderr, "Unrecognized GLSL version `%s'\n", optarg); + usage_fail(argv[0]); + break; + } + break; + default: + break; + } + } + + + if (argc <= optind) + usage_fail(argv[0]); + + initialize_context(ctx, (glsl_es) ? API_OPENGLES2 : API_OPENGL_COMPAT); + + struct gl_shader_program *whole_program; + + whole_program = rzalloc (NULL, struct gl_shader_program); + assert(whole_program != NULL); + whole_program->InfoLog = ralloc_strdup(whole_program, ""); + + /* Created just to avoid segmentation faults */ + whole_program->AttributeBindings = new string_to_uint_map; + whole_program->FragDataBindings = new string_to_uint_map; + whole_program->FragDataIndexBindings = new string_to_uint_map; + + for (/* empty */; argc > optind; optind++) { + whole_program->Shaders = + reralloc(whole_program, whole_program->Shaders, + struct gl_shader *, whole_program->NumShaders + 1); + assert(whole_program->Shaders != NULL); + + struct gl_shader *shader = rzalloc(whole_program, gl_shader); + + whole_program->Shaders[whole_program->NumShaders] = shader; + whole_program->NumShaders++; + + const unsigned len = strlen(argv[optind]); + if (len < 6) + usage_fail(argv[0]); + + const char *const ext = & argv[optind][len - 5]; + if (strncmp(".vert", ext, 5) == 0 || strncmp(".glsl", ext, 5) == 0) + shader->Type = GL_VERTEX_SHADER; + else if (strncmp(".tesc", ext, 5) == 0) + shader->Type = GL_TESS_CONTROL_SHADER; + else if (strncmp(".tese", ext, 5) == 0) + shader->Type = GL_TESS_EVALUATION_SHADER; + else if (strncmp(".geom", ext, 5) == 0) + shader->Type = GL_GEOMETRY_SHADER; + else if (strncmp(".frag", ext, 5) == 0) + shader->Type = GL_FRAGMENT_SHADER; + else if (strncmp(".comp", ext, 5) == 0) + shader->Type = GL_COMPUTE_SHADER; + else + usage_fail(argv[0]); + shader->Stage = _mesa_shader_enum_to_shader_stage(shader->Type); + + shader->Source = load_text_file(whole_program, argv[optind]); + if (shader->Source == NULL) { + printf("File \"%s\" does not exist.\n", argv[optind]); + exit(EXIT_FAILURE); + } + + compile_shader(ctx, shader); + + if (strlen(shader->InfoLog) > 0) + printf("Info log for %s:\n%s\n", argv[optind], shader->InfoLog); + + if (!shader->CompileStatus) { + status = EXIT_FAILURE; + break; + } + } + + if ((status == EXIT_SUCCESS) && do_link) { + _mesa_clear_shader_program_data(whole_program); + + link_shaders(ctx, whole_program); + status = (whole_program->LinkStatus) ? EXIT_SUCCESS : EXIT_FAILURE; + + if (strlen(whole_program->InfoLog) > 0) + printf("Info log for linking:\n%s\n", whole_program->InfoLog); + } + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) + ralloc_free(whole_program->_LinkedShaders[i]); + + delete whole_program->AttributeBindings; + delete whole_program->FragDataBindings; + delete whole_program->FragDataIndexBindings; + + ralloc_free(whole_program); + _mesa_glsl_release_types(); + _mesa_glsl_release_builtin_functions(); + + return status; +} diff --git a/src/compiler/glsl/opt_algebraic.cpp b/src/compiler/glsl/opt_algebraic.cpp new file mode 100644 index 0000000..1e58062 --- /dev/null +++ b/src/compiler/glsl/opt_algebraic.cpp @@ -0,0 +1,984 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_algebraic.cpp + * + * Takes advantage of association, commutivity, and other algebraic + * properties to simplify expressions. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "ir_builder.h" +#include "compiler/glsl_types.h" + +using namespace ir_builder; + +namespace { + +/** + * Visitor class for replacing expressions with ir_constant values. + */ + +class ir_algebraic_visitor : public ir_rvalue_visitor { +public: + ir_algebraic_visitor(bool native_integers, + const struct gl_shader_compiler_options *options) + : options(options) + { + this->progress = false; + this->mem_ctx = NULL; + this->native_integers = native_integers; + } + + virtual ~ir_algebraic_visitor() + { + } + + ir_rvalue *handle_expression(ir_expression *ir); + void handle_rvalue(ir_rvalue **rvalue); + bool reassociate_constant(ir_expression *ir1, + int const_index, + ir_constant *constant, + ir_expression *ir2); + void reassociate_operands(ir_expression *ir1, + int op1, + ir_expression *ir2, + int op2); + ir_rvalue *swizzle_if_required(ir_expression *expr, + ir_rvalue *operand); + + const struct gl_shader_compiler_options *options; + void *mem_ctx; + + bool native_integers; + bool progress; +}; + +} /* unnamed namespace */ + +static inline bool +is_vec_zero(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_zero(); +} + +static inline bool +is_vec_one(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_one(); +} + +static inline bool +is_vec_two(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_value(2.0, 2); +} + +static inline bool +is_vec_four(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_value(4.0, 4); +} + +static inline bool +is_vec_negative_one(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_negative_one(); +} + +static inline bool +is_valid_vec_const(ir_constant *ir) +{ + if (ir == NULL) + return false; + + if (!ir->type->is_scalar() && !ir->type->is_vector()) + return false; + + return true; +} + +static inline bool +is_less_than_one(ir_constant *ir) +{ + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + + if (!is_valid_vec_const(ir)) + return false; + + unsigned component = 0; + for (int c = 0; c < ir->type->vector_elements; c++) { + if (ir->get_float_component(c) < 1.0f) + component++; + } + + return (component == ir->type->vector_elements); +} + +static inline bool +is_greater_than_zero(ir_constant *ir) +{ + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + + if (!is_valid_vec_const(ir)) + return false; + + unsigned component = 0; + for (int c = 0; c < ir->type->vector_elements; c++) { + if (ir->get_float_component(c) > 0.0f) + component++; + } + + return (component == ir->type->vector_elements); +} + +static void +update_type(ir_expression *ir) +{ + if (ir->operands[0]->type->is_vector()) + ir->type = ir->operands[0]->type; + else + ir->type = ir->operands[1]->type; +} + +/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ +static ir_expression * +try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx) +{ + if (expr0 && expr0->operation == ir_binop_add && + expr0->type->is_float() && + expr1 && expr1->operation == ir_binop_add && + expr1->type->is_float()) { + ir_swizzle *x = expr0->operands[0]->as_swizzle(); + ir_swizzle *y = expr0->operands[1]->as_swizzle(); + ir_swizzle *z = expr1->operands[0]->as_swizzle(); + ir_swizzle *w = expr1->operands[1]->as_swizzle(); + + if (!x || x->mask.num_components != 1 || + !y || y->mask.num_components != 1 || + !z || z->mask.num_components != 1 || + !w || w->mask.num_components != 1) { + return NULL; + } + + bool swiz_seen[4] = {false, false, false, false}; + swiz_seen[x->mask.x] = true; + swiz_seen[y->mask.x] = true; + swiz_seen[z->mask.x] = true; + swiz_seen[w->mask.x] = true; + + if (!swiz_seen[0] || !swiz_seen[1] || + !swiz_seen[2] || !swiz_seen[3]) { + return NULL; + } + + if (x->val->equals(y->val) && + x->val->equals(z->val) && + x->val->equals(w->val)) { + return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); + } + } + return NULL; +} + +void +ir_algebraic_visitor::reassociate_operands(ir_expression *ir1, + int op1, + ir_expression *ir2, + int op2) +{ + ir_rvalue *temp = ir2->operands[op2]; + ir2->operands[op2] = ir1->operands[op1]; + ir1->operands[op1] = temp; + + /* Update the type of ir2. The type of ir1 won't have changed -- + * base types matched, and at least one of the operands of the 2 + * binops is still a vector if any of them were. + */ + update_type(ir2); + + this->progress = true; +} + +/** + * Reassociates a constant down a tree of adds or multiplies. + * + * Consider (2 * (a * (b * 0.5))). We want to send up with a * b. + */ +bool +ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index, + ir_constant *constant, + ir_expression *ir2) +{ + if (!ir2 || ir1->operation != ir2->operation) + return false; + + /* Don't want to even think about matrices. */ + if (ir1->operands[0]->type->is_matrix() || + ir1->operands[1]->type->is_matrix() || + ir2->operands[0]->type->is_matrix() || + ir2->operands[1]->type->is_matrix()) + return false; + + ir_constant *ir2_const[2]; + ir2_const[0] = ir2->operands[0]->constant_expression_value(); + ir2_const[1] = ir2->operands[1]->constant_expression_value(); + + if (ir2_const[0] && ir2_const[1]) + return false; + + if (ir2_const[0]) { + reassociate_operands(ir1, const_index, ir2, 1); + return true; + } else if (ir2_const[1]) { + reassociate_operands(ir1, const_index, ir2, 0); + return true; + } + + if (reassociate_constant(ir1, const_index, constant, + ir2->operands[0]->as_expression())) { + update_type(ir2); + return true; + } + + if (reassociate_constant(ir1, const_index, constant, + ir2->operands[1]->as_expression())) { + update_type(ir2); + return true; + } + + return false; +} + +/* When eliminating an expression and just returning one of its operands, + * we may need to swizzle that operand out to a vector if the expression was + * vector type. + */ +ir_rvalue * +ir_algebraic_visitor::swizzle_if_required(ir_expression *expr, + ir_rvalue *operand) +{ + if (expr->type->is_vector() && operand->type->is_scalar()) { + return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0, + expr->type->vector_elements); + } else + return operand; +} + +ir_rvalue * +ir_algebraic_visitor::handle_expression(ir_expression *ir) +{ + ir_constant *op_const[4] = {NULL, NULL, NULL, NULL}; + ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL}; + unsigned int i; + + if (ir->operation == ir_binop_mul && + ir->operands[0]->type->is_matrix() && + ir->operands[1]->type->is_vector()) { + ir_expression *matrix_mul = ir->operands[0]->as_expression(); + + if (matrix_mul && matrix_mul->operation == ir_binop_mul && + matrix_mul->operands[0]->type->is_matrix() && + matrix_mul->operands[1]->type->is_matrix()) { + + return mul(matrix_mul->operands[0], + mul(matrix_mul->operands[1], ir->operands[1])); + } + } + + assert(ir->get_num_operands() <= 4); + for (i = 0; i < ir->get_num_operands(); i++) { + if (ir->operands[i]->type->is_matrix()) + return ir; + + op_const[i] = ir->operands[i]->constant_expression_value(); + op_expr[i] = ir->operands[i]->as_expression(); + } + + if (this->mem_ctx == NULL) + this->mem_ctx = ralloc_parent(ir); + + switch (ir->operation) { + case ir_unop_bit_not: + if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not) + return op_expr[0]->operands[0]; + break; + + case ir_unop_abs: + if (op_expr[0] == NULL) + break; + + switch (op_expr[0]->operation) { + case ir_unop_abs: + case ir_unop_neg: + return abs(op_expr[0]->operands[0]); + default: + break; + } + break; + + case ir_unop_neg: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_neg) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_exp: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_log) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_log: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_exp) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_exp2: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_log2) { + return op_expr[0]->operands[0]; + } + + if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) { + for (int log2_pos = 0; log2_pos < 2; log2_pos++) { + ir_expression *log2_expr = + op_expr[0]->operands[log2_pos]->as_expression(); + + if (log2_expr && log2_expr->operation == ir_unop_log2) { + return new(mem_ctx) ir_expression(ir_binop_pow, + ir->type, + log2_expr->operands[0], + op_expr[0]->operands[1 - log2_pos]); + } + } + } + break; + + case ir_unop_log2: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_exp2) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_f2i: + case ir_unop_f2u: + if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) { + return new(mem_ctx) ir_expression(ir->operation, + ir->type, + op_expr[0]->operands[0]); + } + break; + + case ir_unop_logic_not: { + enum ir_expression_operation new_op = ir_unop_logic_not; + + if (op_expr[0] == NULL) + break; + + switch (op_expr[0]->operation) { + case ir_binop_less: new_op = ir_binop_gequal; break; + case ir_binop_greater: new_op = ir_binop_lequal; break; + case ir_binop_lequal: new_op = ir_binop_greater; break; + case ir_binop_gequal: new_op = ir_binop_less; break; + case ir_binop_equal: new_op = ir_binop_nequal; break; + case ir_binop_nequal: new_op = ir_binop_equal; break; + case ir_binop_all_equal: new_op = ir_binop_any_nequal; break; + case ir_binop_any_nequal: new_op = ir_binop_all_equal; break; + + default: + /* The default case handler is here to silence a warning from GCC. + */ + break; + } + + if (new_op != ir_unop_logic_not) { + return new(mem_ctx) ir_expression(new_op, + ir->type, + op_expr[0]->operands[0], + op_expr[0]->operands[1]); + } + + break; + } + + case ir_unop_saturate: + if (op_expr[0] && op_expr[0]->operation == ir_binop_add) { + ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression(); + ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression(); + + if (b2f_0 && b2f_0->operation == ir_unop_b2f && + b2f_1 && b2f_1->operation == ir_unop_b2f) { + return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0])); + } + } + break; + + case ir_binop_add: + if (is_vec_zero(op_const[0])) + return ir->operands[1]; + if (is_vec_zero(op_const[1])) + return ir->operands[0]; + + /* Reassociate addition of constants so that we can do constant + * folding. + */ + if (op_const[0] && !op_const[1]) + reassociate_constant(ir, 0, op_const[0], op_expr[1]); + if (op_const[1] && !op_const[0]) + reassociate_constant(ir, 1, op_const[1], op_expr[0]); + + /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ + if (options->OptimizeForAOS) { + ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1], + mem_ctx); + if (expr) + return expr; + } + + /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a). + * + * (-x + y) * a + x + * (x * -a) + (y * a) + x + * x + (x * -a) + (y * a) + * x * (1 - a) + y * a + * lrp(x, y, a) + */ + for (int mul_pos = 0; mul_pos < 2; mul_pos++) { + ir_expression *mul = op_expr[mul_pos]; + + if (!mul || mul->operation != ir_binop_mul) + continue; + + /* Multiply found on one of the operands. Now check for an + * inner addition operation. + */ + for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) { + ir_expression *inner_add = + mul->operands[inner_add_pos]->as_expression(); + + if (!inner_add || inner_add->operation != ir_binop_add) + continue; + + /* Inner addition found on one of the operands. Now check for + * one of the operands of the inner addition to be the negative + * of x_operand. + */ + for (int neg_pos = 0; neg_pos < 2; neg_pos++) { + ir_expression *neg = + inner_add->operands[neg_pos]->as_expression(); + + if (!neg || neg->operation != ir_unop_neg) + continue; + + ir_rvalue *x_operand = ir->operands[1 - mul_pos]; + + if (!neg->operands[0]->equals(x_operand)) + continue; + + ir_rvalue *y_operand = inner_add->operands[1 - neg_pos]; + ir_rvalue *a_operand = mul->operands[1 - inner_add_pos]; + + if (x_operand->type != y_operand->type || + x_operand->type != a_operand->type) + continue; + + return lrp(x_operand, y_operand, a_operand); + } + } + } + + break; + + case ir_binop_sub: + if (is_vec_zero(op_const[0])) + return neg(ir->operands[1]); + if (is_vec_zero(op_const[1])) + return ir->operands[0]; + break; + + case ir_binop_mul: + if (is_vec_one(op_const[0])) + return ir->operands[1]; + if (is_vec_one(op_const[1])) + return ir->operands[0]; + + if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) + return ir_constant::zero(ir, ir->type); + + if (is_vec_negative_one(op_const[0])) + return neg(ir->operands[1]); + if (is_vec_negative_one(op_const[1])) + return neg(ir->operands[0]); + + if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f && + op_expr[1] && op_expr[1]->operation == ir_unop_b2f) { + return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0])); + } + + /* Reassociate multiplication of constants so that we can do + * constant folding. + */ + if (op_const[0] && !op_const[1]) + reassociate_constant(ir, 0, op_const[0], op_expr[1]); + if (op_const[1] && !op_const[0]) + reassociate_constant(ir, 1, op_const[1], op_expr[0]); + + /* Optimizes + * + * (mul (floor (add (abs x) 0.5) (sign x))) + * + * into + * + * (trunc (add x (mul (sign x) 0.5))) + */ + for (int i = 0; i < 2; i++) { + ir_expression *sign_expr = ir->operands[i]->as_expression(); + ir_expression *floor_expr = ir->operands[1 - i]->as_expression(); + + if (!sign_expr || sign_expr->operation != ir_unop_sign || + !floor_expr || floor_expr->operation != ir_unop_floor) + continue; + + ir_expression *add_expr = floor_expr->operands[0]->as_expression(); + if (!add_expr || add_expr->operation != ir_binop_add) + continue; + + for (int j = 0; j < 2; j++) { + ir_expression *abs_expr = add_expr->operands[j]->as_expression(); + if (!abs_expr || abs_expr->operation != ir_unop_abs) + continue; + + ir_constant *point_five = add_expr->operands[1 - j]->as_constant(); + if (!point_five || !point_five->is_value(0.5, 0)) + continue; + + if (abs_expr->operands[0]->equals(sign_expr->operands[0])) { + return trunc(add(abs_expr->operands[0], + mul(sign_expr, point_five))); + } + } + } + break; + + case ir_binop_div: + if (is_vec_one(op_const[0]) && ( + ir->type->base_type == GLSL_TYPE_FLOAT || + ir->type->base_type == GLSL_TYPE_DOUBLE)) { + return new(mem_ctx) ir_expression(ir_unop_rcp, + ir->operands[1]->type, + ir->operands[1], + NULL); + } + if (is_vec_one(op_const[1])) + return ir->operands[0]; + break; + + case ir_binop_dot: + if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) + return ir_constant::zero(mem_ctx, ir->type); + + for (int i = 0; i < 2; i++) { + if (!op_const[i]) + continue; + + unsigned components[4] = { 0 }, count = 0; + + for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) { + if (op_const[i]->is_zero()) + continue; + + components[count] = c; + count++; + } + + /* No channels had zero values; bail. */ + if (count >= op_const[i]->type->vector_elements) + break; + + ir_expression_operation op = count == 1 ? + ir_binop_mul : ir_binop_dot; + + /* Swizzle both operands to remove the channels that were zero. */ + return new(mem_ctx) + ir_expression(op, ir->type, + new(mem_ctx) ir_swizzle(ir->operands[0], + components, count), + new(mem_ctx) ir_swizzle(ir->operands[1], + components, count)); + } + break; + + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: + for (int add_pos = 0; add_pos < 2; add_pos++) { + ir_expression *add = op_expr[add_pos]; + + if (!add || add->operation != ir_binop_add) + continue; + + ir_constant *zero = op_const[1 - add_pos]; + if (!is_vec_zero(zero)) + continue; + + /* Depending of the zero position we want to optimize + * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y) + */ + if (add_pos == 1) { + return new(mem_ctx) ir_expression(ir->operation, + neg(add->operands[0]), + add->operands[1]); + } else { + return new(mem_ctx) ir_expression(ir->operation, + add->operands[0], + neg(add->operands[1])); + } + } + break; + + case ir_binop_all_equal: + case ir_binop_any_nequal: + if (ir->operands[0]->type->is_scalar() && + ir->operands[1]->type->is_scalar()) + return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal + ? ir_binop_equal : ir_binop_nequal, + ir->operands[0], + ir->operands[1]); + break; + + case ir_binop_rshift: + case ir_binop_lshift: + /* 0 >> x == 0 */ + if (is_vec_zero(op_const[0])) + return ir->operands[0]; + /* x >> 0 == x */ + if (is_vec_zero(op_const[1])) + return ir->operands[0]; + break; + + case ir_binop_logic_and: + if (is_vec_one(op_const[0])) { + return ir->operands[1]; + } else if (is_vec_one(op_const[1])) { + return ir->operands[0]; + } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { + return ir_constant::zero(mem_ctx, ir->type); + } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && + op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { + /* De Morgan's Law: + * (not A) and (not B) === not (A or B) + */ + return logic_not(logic_or(op_expr[0]->operands[0], + op_expr[1]->operands[0])); + } else if (ir->operands[0]->equals(ir->operands[1])) { + /* (a && a) == a */ + return ir->operands[0]; + } + break; + + case ir_binop_logic_xor: + if (is_vec_zero(op_const[0])) { + return ir->operands[1]; + } else if (is_vec_zero(op_const[1])) { + return ir->operands[0]; + } else if (is_vec_one(op_const[0])) { + return logic_not(ir->operands[1]); + } else if (is_vec_one(op_const[1])) { + return logic_not(ir->operands[0]); + } else if (ir->operands[0]->equals(ir->operands[1])) { + /* (a ^^ a) == false */ + return ir_constant::zero(mem_ctx, ir->type); + } + break; + + case ir_binop_logic_or: + if (is_vec_zero(op_const[0])) { + return ir->operands[1]; + } else if (is_vec_zero(op_const[1])) { + return ir->operands[0]; + } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) { + ir_constant_data data; + + for (unsigned i = 0; i < 16; i++) + data.b[i] = true; + + return new(mem_ctx) ir_constant(ir->type, &data); + } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && + op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { + /* De Morgan's Law: + * (not A) or (not B) === not (A and B) + */ + return logic_not(logic_and(op_expr[0]->operands[0], + op_expr[1]->operands[0])); + } else if (ir->operands[0]->equals(ir->operands[1])) { + /* (a || a) == a */ + return ir->operands[0]; + } + break; + + case ir_binop_pow: + /* 1^x == 1 */ + if (is_vec_one(op_const[0])) + return op_const[0]; + + /* x^1 == x */ + if (is_vec_one(op_const[1])) + return ir->operands[0]; + + /* pow(2,x) == exp2(x) */ + if (is_vec_two(op_const[0])) + return expr(ir_unop_exp2, ir->operands[1]); + + if (is_vec_two(op_const[1])) { + ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", + ir_var_temporary); + base_ir->insert_before(x); + base_ir->insert_before(assign(x, ir->operands[0])); + return mul(x, x); + } + + if (is_vec_four(op_const[1])) { + ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", + ir_var_temporary); + base_ir->insert_before(x); + base_ir->insert_before(assign(x, ir->operands[0])); + + ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type, + "squared", + ir_var_temporary); + base_ir->insert_before(squared); + base_ir->insert_before(assign(squared, mul(x, x))); + return mul(squared, squared); + } + + break; + + case ir_binop_min: + case ir_binop_max: + if (ir->type->base_type != GLSL_TYPE_FLOAT || options->EmitNoSat) + break; + + /* Replace min(max) operations and its commutative combinations with + * a saturate operation + */ + for (int op = 0; op < 2; op++) { + ir_expression *inner_expr = op_expr[op]; + ir_constant *outer_const = op_const[1 - op]; + ir_expression_operation op_cond = (ir->operation == ir_binop_max) ? + ir_binop_min : ir_binop_max; + + if (!inner_expr || !outer_const || (inner_expr->operation != op_cond)) + continue; + + /* One of these has to be a constant */ + if (!inner_expr->operands[0]->as_constant() && + !inner_expr->operands[1]->as_constant()) + break; + + /* Found a min(max) combination. Now try to see if its operands + * meet our conditions that we can do just a single saturate operation + */ + for (int minmax_op = 0; minmax_op < 2; minmax_op++) { + ir_rvalue *x = inner_expr->operands[minmax_op]; + ir_rvalue *y = inner_expr->operands[1 - minmax_op]; + + ir_constant *inner_const = y->as_constant(); + if (!inner_const) + continue; + + /* min(max(x, 0.0), 1.0) is sat(x) */ + if (ir->operation == ir_binop_min && + inner_const->is_zero() && + outer_const->is_one()) + return saturate(x); + + /* max(min(x, 1.0), 0.0) is sat(x) */ + if (ir->operation == ir_binop_max && + inner_const->is_one() && + outer_const->is_zero()) + return saturate(x); + + /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */ + if (ir->operation == ir_binop_min && + inner_const->is_zero() && + is_less_than_one(outer_const)) + return saturate(expr(ir_binop_min, x, outer_const)); + + /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */ + if (ir->operation == ir_binop_max && + is_less_than_one(inner_const) && + outer_const->is_zero()) + return saturate(expr(ir_binop_min, x, inner_const)); + + /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */ + if (ir->operation == ir_binop_max && + inner_const->is_one() && + is_greater_than_zero(outer_const)) + return saturate(expr(ir_binop_max, x, outer_const)); + + /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */ + if (ir->operation == ir_binop_min && + is_greater_than_zero(inner_const) && + outer_const->is_one()) + return saturate(expr(ir_binop_max, x, inner_const)); + } + } + + break; + + case ir_unop_rcp: + if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp) + return op_expr[0]->operands[0]; + + if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 || + op_expr[0]->operation == ir_unop_exp)) { + return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type, + neg(op_expr[0]->operands[0])); + } + + /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at + * its IR level, so we can always apply this transformation. + */ + if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq) + return sqrt(op_expr[0]->operands[0]); + + /* As far as we know, all backends are OK with rsq. */ + if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) { + return rsq(op_expr[0]->operands[0]); + } + + break; + + case ir_triop_fma: + /* Operands are op0 * op1 + op2. */ + if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { + return ir->operands[2]; + } else if (is_vec_zero(op_const[2])) { + return mul(ir->operands[0], ir->operands[1]); + } else if (is_vec_one(op_const[0])) { + return add(ir->operands[1], ir->operands[2]); + } else if (is_vec_one(op_const[1])) { + return add(ir->operands[0], ir->operands[2]); + } + break; + + case ir_triop_lrp: + /* Operands are (x, y, a). */ + if (is_vec_zero(op_const[2])) { + return ir->operands[0]; + } else if (is_vec_one(op_const[2])) { + return ir->operands[1]; + } else if (ir->operands[0]->equals(ir->operands[1])) { + return ir->operands[0]; + } else if (is_vec_zero(op_const[0])) { + return mul(ir->operands[1], ir->operands[2]); + } else if (is_vec_zero(op_const[1])) { + unsigned op2_components = ir->operands[2]->type->vector_elements; + ir_constant *one; + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + one = new(mem_ctx) ir_constant(1.0f, op2_components); + break; + case GLSL_TYPE_DOUBLE: + one = new(mem_ctx) ir_constant(1.0, op2_components); + break; + default: + one = NULL; + unreachable("unexpected type"); + } + + return mul(ir->operands[0], add(one, neg(ir->operands[2]))); + } + break; + + case ir_triop_csel: + if (is_vec_one(op_const[0])) + return ir->operands[1]; + if (is_vec_zero(op_const[0])) + return ir->operands[2]; + break; + + default: + break; + } + + return ir; +} + +void +ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr || expr->operation == ir_quadop_vector) + return; + + ir_rvalue *new_rvalue = handle_expression(expr); + if (new_rvalue == *rvalue) + return; + + /* If the expr used to be some vec OP scalar returning a vector, and the + * optimization gave us back a scalar, we still need to turn it into a + * vector. + */ + *rvalue = swizzle_if_required(expr, new_rvalue); + + this->progress = true; +} + +bool +do_algebraic(exec_list *instructions, bool native_integers, + const struct gl_shader_compiler_options *options) +{ + ir_algebraic_visitor v(native_integers, options); + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_array_splitting.cpp b/src/compiler/glsl/opt_array_splitting.cpp new file mode 100644 index 0000000..cceec6b --- /dev/null +++ b/src/compiler/glsl/opt_array_splitting.cpp @@ -0,0 +1,408 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_array_splitting.cpp + * + * If an array is always dereferenced with a constant index, then + * split it apart into its elements, making it more amenable to other + * optimization passes. + * + * This skips uniform/varying arrays, which would need careful + * handling due to their ir->location fields tying them to the GL API + * and other shader stages. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "compiler/glsl_types.h" + +static bool debug = false; + +namespace { + +namespace opt_array_splitting { + +class variable_entry : public exec_node +{ +public: + variable_entry(ir_variable *var) + { + this->var = var; + this->split = true; + this->declaration = false; + this->components = NULL; + this->mem_ctx = NULL; + if (var->type->is_array()) + this->size = var->type->length; + else + this->size = var->type->matrix_columns; + } + + ir_variable *var; /* The key: the variable's pointer. */ + unsigned size; /* array length or matrix columns */ + + /** Whether this array should be split or not. */ + bool split; + + /* If the variable had a decl we can work with in the instruction + * stream. We can't do splitting on function arguments, which + * don't get this variable set. + */ + bool declaration; + + ir_variable **components; + + /** ralloc_parent(this->var) -- the shader's talloc context. */ + void *mem_ctx; +}; + +} /* namespace */ + +using namespace opt_array_splitting; + +/** + * This class does a walk over the tree, coming up with the set of + * variables that could be split by looking to see if they are arrays + * that are only ever constant-index dereferenced. + */ +class ir_array_reference_visitor : public ir_hierarchical_visitor { +public: + ir_array_reference_visitor(void) + { + this->mem_ctx = ralloc_context(NULL); + this->variable_list.make_empty(); + } + + ~ir_array_reference_visitor(void) + { + ralloc_free(mem_ctx); + } + + bool get_split_list(exec_list *instructions, bool linked); + + virtual ir_visitor_status visit(ir_variable *); + virtual ir_visitor_status visit(ir_dereference_variable *); + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_function_signature *); + + variable_entry *get_variable_entry(ir_variable *var); + + /* List of variable_entry */ + exec_list variable_list; + + void *mem_ctx; +}; + +} /* namespace */ + +variable_entry * +ir_array_reference_visitor::get_variable_entry(ir_variable *var) +{ + assert(var); + + if (var->data.mode != ir_var_auto && + var->data.mode != ir_var_temporary) + return NULL; + + if (!(var->type->is_array() || var->type->is_matrix())) + return NULL; + + /* If the array hasn't been sized yet, we can't split it. After + * linking, this should be resolved. + */ + if (var->type->is_unsized_array()) + return NULL; + + foreach_in_list(variable_entry, entry, &this->variable_list) { + if (entry->var == var) + return entry; + } + + variable_entry *entry = new(mem_ctx) variable_entry(var); + this->variable_list.push_tail(entry); + return entry; +} + + +ir_visitor_status +ir_array_reference_visitor::visit(ir_variable *ir) +{ + variable_entry *entry = this->get_variable_entry(ir); + + if (entry) + entry->declaration = true; + + return visit_continue; +} + +ir_visitor_status +ir_array_reference_visitor::visit(ir_dereference_variable *ir) +{ + variable_entry *entry = this->get_variable_entry(ir->var); + + /* If we made it to here without seeing an ir_dereference_array, + * then the dereference of this array didn't have a constant index + * (see the visit_continue_with_parent below), so we can't split + * the variable. + */ + if (entry) + entry->split = false; + + return visit_continue; +} + +ir_visitor_status +ir_array_reference_visitor::visit_enter(ir_dereference_array *ir) +{ + ir_dereference_variable *deref = ir->array->as_dereference_variable(); + if (!deref) + return visit_continue; + + variable_entry *entry = this->get_variable_entry(deref->var); + + /* If the access to the array has a variable index, we wouldn't + * know which split variable this dereference should go to. + */ + if (entry && !ir->array_index->as_constant()) + entry->split = false; + + /* If the index is also array dereference, visit index. */ + if (ir->array_index->as_dereference_array()) + visit_enter(ir->array_index->as_dereference_array()); + + return visit_continue_with_parent; +} + +ir_visitor_status +ir_array_reference_visitor::visit_enter(ir_function_signature *ir) +{ + /* We don't have logic for array-splitting function arguments, + * so just look at the body instructions and not the parameter + * declarations. + */ + visit_list_elements(this, &ir->body); + return visit_continue_with_parent; +} + +bool +ir_array_reference_visitor::get_split_list(exec_list *instructions, + bool linked) +{ + visit_list_elements(this, instructions); + + /* If the shaders aren't linked yet, we can't mess with global + * declarations, which need to be matched by name across shaders. + */ + if (!linked) { + foreach_in_list(ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + if (var) { + variable_entry *entry = get_variable_entry(var); + if (entry) + entry->remove(); + } + } + } + + /* Trim out variables we found that we can't split. */ + foreach_in_list_safe(variable_entry, entry, &variable_list) { + if (debug) { + printf("array %s@%p: decl %d, split %d\n", + entry->var->name, (void *) entry->var, entry->declaration, + entry->split); + } + + if (!(entry->declaration && entry->split)) { + entry->remove(); + } + } + + return !variable_list.is_empty(); +} + +/** + * This class rewrites the dereferences of arrays that have been split + * to use the newly created ir_variables for each component. + */ +class ir_array_splitting_visitor : public ir_rvalue_visitor { +public: + ir_array_splitting_visitor(exec_list *vars) + { + this->variable_list = vars; + } + + virtual ~ir_array_splitting_visitor() + { + } + + virtual ir_visitor_status visit_leave(ir_assignment *); + + void split_deref(ir_dereference **deref); + void handle_rvalue(ir_rvalue **rvalue); + variable_entry *get_splitting_entry(ir_variable *var); + + exec_list *variable_list; +}; + +variable_entry * +ir_array_splitting_visitor::get_splitting_entry(ir_variable *var) +{ + assert(var); + + foreach_in_list(variable_entry, entry, this->variable_list) { + if (entry->var == var) { + return entry; + } + } + + return NULL; +} + +void +ir_array_splitting_visitor::split_deref(ir_dereference **deref) +{ + ir_dereference_array *deref_array = (*deref)->as_dereference_array(); + if (!deref_array) + return; + + ir_dereference_variable *deref_var = deref_array->array->as_dereference_variable(); + if (!deref_var) + return; + ir_variable *var = deref_var->var; + + variable_entry *entry = get_splitting_entry(var); + if (!entry) + return; + + ir_constant *constant = deref_array->array_index->as_constant(); + assert(constant); + + if (constant->value.i[0] >= 0 && constant->value.i[0] < (int)entry->size) { + *deref = new(entry->mem_ctx) + ir_dereference_variable(entry->components[constant->value.i[0]]); + } else { + /* There was a constant array access beyond the end of the + * array. This might have happened due to constant folding + * after the initial parse. This produces an undefined value, + * but shouldn't crash. Just give them an uninitialized + * variable. + */ + ir_variable *temp = new(entry->mem_ctx) ir_variable(deref_array->type, + "undef", + ir_var_temporary); + entry->components[0]->insert_before(temp); + *deref = new(entry->mem_ctx) ir_dereference_variable(temp); + } +} + +void +ir_array_splitting_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + + if (!deref) + return; + + split_deref(&deref); + *rvalue = deref; +} + +ir_visitor_status +ir_array_splitting_visitor::visit_leave(ir_assignment *ir) +{ + /* The normal rvalue visitor skips the LHS of assignments, but we + * need to process those just the same. + */ + ir_rvalue *lhs = ir->lhs; + + handle_rvalue(&lhs); + ir->lhs = lhs->as_dereference(); + + ir->lhs->accept(this); + + handle_rvalue(&ir->rhs); + ir->rhs->accept(this); + + if (ir->condition) { + handle_rvalue(&ir->condition); + ir->condition->accept(this); + } + + return visit_continue; +} + +bool +optimize_split_arrays(exec_list *instructions, bool linked) +{ + ir_array_reference_visitor refs; + if (!refs.get_split_list(instructions, linked)) + return false; + + void *mem_ctx = ralloc_context(NULL); + + /* Replace the decls of the arrays to be split with their split + * components. + */ + foreach_in_list(variable_entry, entry, &refs.variable_list) { + const struct glsl_type *type = entry->var->type; + const struct glsl_type *subtype; + + if (type->is_matrix()) + subtype = type->column_type(); + else + subtype = type->fields.array; + + entry->mem_ctx = ralloc_parent(entry->var); + + entry->components = ralloc_array(mem_ctx, + ir_variable *, + entry->size); + + for (unsigned int i = 0; i < entry->size; i++) { + const char *name = ralloc_asprintf(mem_ctx, "%s_%d", + entry->var->name, i); + + entry->components[i] = + new(entry->mem_ctx) ir_variable(subtype, name, ir_var_temporary); + entry->var->insert_before(entry->components[i]); + } + + entry->var->remove(); + } + + ir_array_splitting_visitor split(&refs.variable_list); + visit_list_elements(&split, instructions); + + if (debug) + _mesa_print_ir(stdout, instructions, NULL); + + ralloc_free(mem_ctx); + + return true; + +} diff --git a/src/compiler/glsl/opt_conditional_discard.cpp b/src/compiler/glsl/opt_conditional_discard.cpp new file mode 100644 index 0000000..1ca8803 --- /dev/null +++ b/src/compiler/glsl/opt_conditional_discard.cpp @@ -0,0 +1,81 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_conditional_discard.cpp + * + * Replace + * + * if (cond) discard; + * + * with + * + * (discard <condition>) + */ + +#include "compiler/glsl_types.h" +#include "ir.h" + +namespace { + +class opt_conditional_discard_visitor : public ir_hierarchical_visitor { +public: + opt_conditional_discard_visitor() + { + progress = false; + } + + ir_visitor_status visit_leave(ir_if *); + + bool progress; +}; + +} /* anonymous namespace */ + +bool +opt_conditional_discard(exec_list *instructions) +{ + opt_conditional_discard_visitor v; + v.run(instructions); + return v.progress; +} + +ir_visitor_status +opt_conditional_discard_visitor::visit_leave(ir_if *ir) +{ + /* Look for "if (...) discard" with no else clause or extra statements. */ + if (ir->then_instructions.is_empty() || + !ir->then_instructions.head->next->is_tail_sentinel() || + !((ir_instruction *) ir->then_instructions.head)->as_discard() || + !ir->else_instructions.is_empty()) + return visit_continue; + + /* Move the condition and replace the ir_if with the ir_discard. */ + ir_discard *discard = (ir_discard *) ir->then_instructions.head; + discard->condition = ir->condition; + ir->replace_with(discard); + + progress = true; + + return visit_continue; +} diff --git a/src/compiler/glsl/opt_constant_folding.cpp b/src/compiler/glsl/opt_constant_folding.cpp new file mode 100644 index 0000000..150a17b --- /dev/null +++ b/src/compiler/glsl/opt_constant_folding.cpp @@ -0,0 +1,190 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_constant_folding.cpp + * Replace constant-valued expressions with references to constant values. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +namespace { + +/** + * Visitor class for replacing expressions with ir_constant values. + */ + +class ir_constant_folding_visitor : public ir_rvalue_visitor { +public: + ir_constant_folding_visitor() + { + this->progress = false; + } + + virtual ~ir_constant_folding_visitor() + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_discard *ir); + virtual ir_visitor_status visit_enter(ir_assignment *ir); + virtual ir_visitor_status visit_enter(ir_call *ir); + + virtual void handle_rvalue(ir_rvalue **rvalue); + + bool progress; +}; + +} /* unnamed namespace */ + +void +ir_constant_folding_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (*rvalue == NULL || (*rvalue)->ir_type == ir_type_constant) + return; + + /* Note that we do rvalue visitoring on leaving. So if an + * expression has a non-constant operand, no need to go looking + * down it to find if it's constant. This cuts the time of this + * pass down drastically. + */ + ir_expression *expr = (*rvalue)->as_expression(); + if (expr) { + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + if (!expr->operands[i]->as_constant()) + return; + } + } + + /* Ditto for swizzles. */ + ir_swizzle *swiz = (*rvalue)->as_swizzle(); + if (swiz && !swiz->val->as_constant()) + return; + + ir_constant *constant = (*rvalue)->constant_expression_value(); + if (constant) { + *rvalue = constant; + this->progress = true; + } else { + (*rvalue)->accept(this); + } +} + +ir_visitor_status +ir_constant_folding_visitor::visit_enter(ir_discard *ir) +{ + if (ir->condition) { + ir->condition->accept(this); + handle_rvalue(&ir->condition); + + ir_constant *const_val = ir->condition->as_constant(); + /* If the condition is constant, either remove the condition or + * remove the never-executed assignment. + */ + if (const_val) { + if (const_val->value.b[0]) + ir->condition = NULL; + else + ir->remove(); + this->progress = true; + } + } + + return visit_continue_with_parent; +} + +ir_visitor_status +ir_constant_folding_visitor::visit_enter(ir_assignment *ir) +{ + ir->rhs->accept(this); + handle_rvalue(&ir->rhs); + + if (ir->condition) { + ir->condition->accept(this); + handle_rvalue(&ir->condition); + + ir_constant *const_val = ir->condition->as_constant(); + /* If the condition is constant, either remove the condition or + * remove the never-executed assignment. + */ + if (const_val) { + if (const_val->value.b[0]) + ir->condition = NULL; + else + ir->remove(); + this->progress = true; + } + } + + /* Don't descend into the LHS because we want it to stay as a + * variable dereference. FINISHME: We probably should to get array + * indices though. + */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_constant_folding_visitor::visit_enter(ir_call *ir) +{ + /* Attempt to constant fold parameters */ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_rvalue *param_rval = (ir_rvalue *) actual_node; + ir_variable *sig_param = (ir_variable *) formal_node; + + if (sig_param->data.mode == ir_var_function_in + || sig_param->data.mode == ir_var_const_in) { + ir_rvalue *new_param = param_rval; + + handle_rvalue(&new_param); + if (new_param != param_rval) { + param_rval->replace_with(new_param); + } + } + } + + /* Next, see if the call can be replaced with an assignment of a constant */ + ir_constant *const_val = ir->constant_expression_value(); + + if (const_val != NULL) { + ir_assignment *assignment = + new(ralloc_parent(ir)) ir_assignment(ir->return_deref, const_val); + ir->replace_with(assignment); + } + + return visit_continue_with_parent; +} + +bool +do_constant_folding(exec_list *instructions) +{ + ir_constant_folding_visitor constant_folding; + + visit_list_elements(&constant_folding, instructions); + + return constant_folding.progress; +} diff --git a/src/compiler/glsl/opt_constant_propagation.cpp b/src/compiler/glsl/opt_constant_propagation.cpp new file mode 100644 index 0000000..416ba16 --- /dev/null +++ b/src/compiler/glsl/opt_constant_propagation.cpp @@ -0,0 +1,524 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * constant of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, constant, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above constantright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR CONSTANTRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_constant_propagation.cpp + * + * Tracks assignments of constants to channels of variables, and + * usage of those constant channels with direct usage of the constants. + * + * This can lead to constant folding and algebraic optimizations in + * those later expressions, while causing no increase in instruction + * count (due to constants being generally free to load from a + * constant push buffer or as instruction immediate values) and + * possibly reducing register pressure. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_basic_block.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" +#include "util/hash_table.h" + +namespace { + +class acp_entry : public exec_node +{ +public: + acp_entry(ir_variable *var, unsigned write_mask, ir_constant *constant) + { + assert(var); + assert(constant); + this->var = var; + this->write_mask = write_mask; + this->constant = constant; + this->initial_values = write_mask; + } + + acp_entry(const acp_entry *src) + { + this->var = src->var; + this->write_mask = src->write_mask; + this->constant = src->constant; + this->initial_values = src->initial_values; + } + + ir_variable *var; + ir_constant *constant; + unsigned write_mask; + + /** Mask of values initially available in the constant. */ + unsigned initial_values; +}; + + +class kill_entry : public exec_node +{ +public: + kill_entry(ir_variable *var, unsigned write_mask) + { + assert(var); + this->var = var; + this->write_mask = write_mask; + } + + ir_variable *var; + unsigned write_mask; +}; + +class ir_constant_propagation_visitor : public ir_rvalue_visitor { +public: + ir_constant_propagation_visitor() + { + progress = false; + killed_all = false; + mem_ctx = ralloc_context(0); + this->acp = new(mem_ctx) exec_list; + this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + } + ~ir_constant_propagation_visitor() + { + ralloc_free(mem_ctx); + } + + virtual ir_visitor_status visit_enter(class ir_loop *); + virtual ir_visitor_status visit_enter(class ir_function_signature *); + virtual ir_visitor_status visit_enter(class ir_function *); + virtual ir_visitor_status visit_leave(class ir_assignment *); + virtual ir_visitor_status visit_enter(class ir_call *); + virtual ir_visitor_status visit_enter(class ir_if *); + + void add_constant(ir_assignment *ir); + void constant_folding(ir_rvalue **rvalue); + void constant_propagation(ir_rvalue **rvalue); + void kill(ir_variable *ir, unsigned write_mask); + void handle_if_block(exec_list *instructions); + void handle_rvalue(ir_rvalue **rvalue); + + /** List of acp_entry: The available constants to propagate */ + exec_list *acp; + + /** + * List of kill_entry: The masks of variables whose values were + * killed in this block. + */ + hash_table *kills; + + bool progress; + + bool killed_all; + + void *mem_ctx; +}; + + +void +ir_constant_propagation_visitor::constant_folding(ir_rvalue **rvalue) { + + if (*rvalue == NULL || (*rvalue)->ir_type == ir_type_constant) + return; + + /* Note that we visit rvalues one leaving. So if an expression has a + * non-constant operand, no need to go looking down it to find if it's + * constant. This cuts the time of this pass down drastically. + */ + ir_expression *expr = (*rvalue)->as_expression(); + if (expr) { + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + if (!expr->operands[i]->as_constant()) + return; + } + } + + /* Ditto for swizzles. */ + ir_swizzle *swiz = (*rvalue)->as_swizzle(); + if (swiz && !swiz->val->as_constant()) + return; + + ir_constant *constant = (*rvalue)->constant_expression_value(); + if (constant) { + *rvalue = constant; + this->progress = true; + } +} + +void +ir_constant_propagation_visitor::constant_propagation(ir_rvalue **rvalue) { + + if (this->in_assignee || !*rvalue) + return; + + const glsl_type *type = (*rvalue)->type; + if (!type->is_scalar() && !type->is_vector()) + return; + + ir_swizzle *swiz = NULL; + ir_dereference_variable *deref = (*rvalue)->as_dereference_variable(); + if (!deref) { + swiz = (*rvalue)->as_swizzle(); + if (!swiz) + return; + + deref = swiz->val->as_dereference_variable(); + if (!deref) + return; + } + + ir_constant_data data; + memset(&data, 0, sizeof(data)); + + for (unsigned int i = 0; i < type->components(); i++) { + int channel; + acp_entry *found = NULL; + + if (swiz) { + switch (i) { + case 0: channel = swiz->mask.x; break; + case 1: channel = swiz->mask.y; break; + case 2: channel = swiz->mask.z; break; + case 3: channel = swiz->mask.w; break; + default: assert(!"shouldn't be reached"); channel = 0; break; + } + } else { + channel = i; + } + + foreach_in_list(acp_entry, entry, this->acp) { + if (entry->var == deref->var && entry->write_mask & (1 << channel)) { + found = entry; + break; + } + } + + if (!found) + return; + + int rhs_channel = 0; + for (int j = 0; j < 4; j++) { + if (j == channel) + break; + if (found->initial_values & (1 << j)) + rhs_channel++; + } + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + data.f[i] = found->constant->value.f[rhs_channel]; + break; + case GLSL_TYPE_DOUBLE: + data.d[i] = found->constant->value.d[rhs_channel]; + break; + case GLSL_TYPE_INT: + data.i[i] = found->constant->value.i[rhs_channel]; + break; + case GLSL_TYPE_UINT: + data.u[i] = found->constant->value.u[rhs_channel]; + break; + case GLSL_TYPE_BOOL: + data.b[i] = found->constant->value.b[rhs_channel]; + break; + default: + assert(!"not reached"); + break; + } + } + + *rvalue = new(ralloc_parent(deref)) ir_constant(type, &data); + this->progress = true; +} + +void +ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + constant_propagation(rvalue); + constant_folding(rvalue); +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir) +{ + /* Treat entry into a function signature as a completely separate + * block. Any instructions at global scope will be shuffled into + * main() at link time, so they're irrelevant to us. + */ + exec_list *orig_acp = this->acp; + hash_table *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + this->killed_all = false; + + visit_list_elements(this, &ir->body); + + this->kills = orig_kills; + this->acp = orig_acp; + this->killed_all = orig_killed_all; + + return visit_continue_with_parent; +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_leave(ir_assignment *ir) +{ + constant_folding(&ir->rhs); + + if (this->in_assignee) + return visit_continue; + + unsigned kill_mask = ir->write_mask; + if (ir->lhs->as_dereference_array()) { + /* The LHS of the assignment uses an array indexing operator (e.g. v[i] + * = ...;). Since we only try to constant propagate vectors and + * scalars, this means that either (a) array indexing is being used to + * select a vector component, or (b) the variable in question is neither + * a scalar or a vector, so we don't care about it. In the former case, + * we want to kill the whole vector, since in general we can't predict + * which vector component will be selected by array indexing. In the + * latter case, it doesn't matter what we do, so go ahead and kill the + * whole variable anyway. + * + * Note that if the array index is constant (e.g. v[2] = ...;), we could + * in principle be smarter, but we don't need to, because a future + * optimization pass will convert it to a simple assignment with the + * correct mask. + */ + kill_mask = ~0; + } + kill(ir->lhs->variable_referenced(), kill_mask); + + add_constant(ir); + + return visit_continue; +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_enter(ir_function *ir) +{ + (void) ir; + return visit_continue; +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_enter(ir_call *ir) +{ + /* Do constant propagation on call parameters, but skip any out params */ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + ir_rvalue *param = (ir_rvalue *) actual_node; + if (sig_param->data.mode != ir_var_function_out + && sig_param->data.mode != ir_var_function_inout) { + ir_rvalue *new_param = param; + handle_rvalue(&new_param); + if (new_param != param) + param->replace_with(new_param); + else + param->accept(this); + } + } + + /* Since we're unlinked, we don't (necssarily) know the side effects of + * this call. So kill all copies. + */ + acp->make_empty(); + this->killed_all = true; + + return visit_continue_with_parent; +} + +void +ir_constant_propagation_visitor::handle_if_block(exec_list *instructions) +{ + exec_list *orig_acp = this->acp; + hash_table *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + this->killed_all = false; + + /* Populate the initial acp with a constant of the original */ + foreach_in_list(acp_entry, a, orig_acp) { + this->acp->push_tail(new(this->mem_ctx) acp_entry(a)); + } + + visit_list_elements(this, instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + hash_table *new_kills = this->kills; + this->kills = orig_kills; + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + hash_entry *htk; + hash_table_foreach(new_kills, htk) { + kill_entry *k = (kill_entry *) htk->data; + kill(k->var, k->write_mask); + } +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_enter(ir_if *ir) +{ + ir->condition->accept(this); + handle_rvalue(&ir->condition); + + handle_if_block(&ir->then_instructions); + handle_if_block(&ir->else_instructions); + + /* handle_if_block() already descended into the children. */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_enter(ir_loop *ir) +{ + exec_list *orig_acp = this->acp; + hash_table *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + /* FINISHME: For now, the initial acp for loops is totally empty. + * We could go through once, then go through again with the acp + * cloned minus the killed entries after the first run through. + */ + this->acp = new(mem_ctx) exec_list; + this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + this->killed_all = false; + + visit_list_elements(this, &ir->body_instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + hash_table *new_kills = this->kills; + this->kills = orig_kills; + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + hash_entry *htk; + hash_table_foreach(new_kills, htk) { + kill_entry *k = (kill_entry *) htk->data; + kill(k->var, k->write_mask); + } + + /* already descended into the children. */ + return visit_continue_with_parent; +} + +void +ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask) +{ + assert(var != NULL); + + /* We don't track non-vectors. */ + if (!var->type->is_vector() && !var->type->is_scalar()) + return; + + /* Remove any entries currently in the ACP for this kill. */ + foreach_in_list_safe(acp_entry, entry, this->acp) { + if (entry->var == var) { + entry->write_mask &= ~write_mask; + if (entry->write_mask == 0) + entry->remove(); + } + } + + /* Add this writemask of the variable to the list of killed + * variables in this block. + */ + hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var); + if (kill_hash_entry) { + kill_entry *entry = (kill_entry *) kill_hash_entry->data; + entry->write_mask |= write_mask; + return; + } + /* Not already in the list. Make new entry. */ + _mesa_hash_table_insert(this->kills, var, + new(this->mem_ctx) kill_entry(var, write_mask)); +} + +/** + * Adds an entry to the available constant list if it's a plain assignment + * of a variable to a variable. + */ +void +ir_constant_propagation_visitor::add_constant(ir_assignment *ir) +{ + acp_entry *entry; + + if (ir->condition) + return; + + if (!ir->write_mask) + return; + + ir_dereference_variable *deref = ir->lhs->as_dereference_variable(); + ir_constant *constant = ir->rhs->as_constant(); + + if (!deref || !constant) + return; + + /* Only do constant propagation on vectors. Constant matrices, + * arrays, or structures would require more work elsewhere. + */ + if (!deref->var->type->is_vector() && !deref->var->type->is_scalar()) + return; + + /* We can't do copy propagation on buffer variables, since the underlying + * memory storage is shared across multiple threads we can't be sure that + * the variable value isn't modified between this assignment and the next + * instruction where its value is read. + */ + if (deref->var->data.mode == ir_var_shader_storage || + deref->var->data.mode == ir_var_shader_shared) + return; + + entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant); + this->acp->push_tail(entry); +} + +} /* unnamed namespace */ + +/** + * Does a constant propagation pass on the code present in the instruction stream. + */ +bool +do_constant_propagation(exec_list *instructions) +{ + ir_constant_propagation_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_constant_variable.cpp b/src/compiler/glsl/opt_constant_variable.cpp new file mode 100644 index 0000000..3ddb129 --- /dev/null +++ b/src/compiler/glsl/opt_constant_variable.cpp @@ -0,0 +1,218 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_constant_variable.cpp + * + * Marks variables assigned a single constant value over the course + * of the program as constant. + * + * The goal here is to trigger further constant folding and then dead + * code elimination. This is common with vector/matrix constructors + * and calls to builtin functions. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" +#include "util/hash_table.h" + +namespace { + +struct assignment_entry { + int assignment_count; + ir_variable *var; + ir_constant *constval; + bool our_scope; +}; + +class ir_constant_variable_visitor : public ir_hierarchical_visitor { +public: + virtual ir_visitor_status visit_enter(ir_dereference_variable *); + virtual ir_visitor_status visit(ir_variable *); + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_call *); + + struct hash_table *ht; +}; + +} /* unnamed namespace */ + +static struct assignment_entry * +get_assignment_entry(ir_variable *var, struct hash_table *ht) +{ + struct hash_entry *hte = _mesa_hash_table_search(ht, var); + struct assignment_entry *entry; + + if (hte) { + entry = (struct assignment_entry *) hte->data; + } else { + entry = (struct assignment_entry *) calloc(1, sizeof(*entry)); + entry->var = var; + _mesa_hash_table_insert(ht, var, entry); + } + + return entry; +} + +ir_visitor_status +ir_constant_variable_visitor::visit(ir_variable *ir) +{ + struct assignment_entry *entry = get_assignment_entry(ir, this->ht); + entry->our_scope = true; + return visit_continue; +} + +/* Skip derefs of variables so that we can detect declarations. */ +ir_visitor_status +ir_constant_variable_visitor::visit_enter(ir_dereference_variable *ir) +{ + (void)ir; + return visit_continue_with_parent; +} + +ir_visitor_status +ir_constant_variable_visitor::visit_enter(ir_assignment *ir) +{ + ir_constant *constval; + struct assignment_entry *entry; + + entry = get_assignment_entry(ir->lhs->variable_referenced(), this->ht); + assert(entry); + entry->assignment_count++; + + /* If it's already constant, don't do the work. */ + if (entry->var->constant_value) + return visit_continue; + + /* OK, now find if we actually have all the right conditions for + * this to be a constant value assigned to the var. + */ + if (ir->condition) + return visit_continue; + + ir_variable *var = ir->whole_variable_written(); + if (!var) + return visit_continue; + + /* Ignore buffer variables, since the underlying storage is shared + * and we can't be sure that this variable won't be written by another + * thread. + */ + if (var->data.mode == ir_var_shader_storage || + var->data.mode == ir_var_shader_shared) + return visit_continue; + + constval = ir->rhs->constant_expression_value(); + if (!constval) + return visit_continue; + + /* Mark this entry as having a constant assignment (if the + * assignment count doesn't go >1). do_constant_variable will fix + * up the variable with the constant value later. + */ + entry->constval = constval; + + return visit_continue; +} + +ir_visitor_status +ir_constant_variable_visitor::visit_enter(ir_call *ir) +{ + /* Mark any out parameters as assigned to */ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_rvalue *param_rval = (ir_rvalue *) actual_node; + ir_variable *param = (ir_variable *) formal_node; + + if (param->data.mode == ir_var_function_out || + param->data.mode == ir_var_function_inout) { + ir_variable *var = param_rval->variable_referenced(); + struct assignment_entry *entry; + + assert(var); + entry = get_assignment_entry(var, this->ht); + entry->assignment_count++; + } + } + + /* Mark the return storage as having been assigned to */ + if (ir->return_deref != NULL) { + ir_variable *var = ir->return_deref->variable_referenced(); + struct assignment_entry *entry; + + assert(var); + entry = get_assignment_entry(var, this->ht); + entry->assignment_count++; + } + + return visit_continue; +} + +/** + * Does a copy propagation pass on the code present in the instruction stream. + */ +bool +do_constant_variable(exec_list *instructions) +{ + bool progress = false; + ir_constant_variable_visitor v; + + v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + v.run(instructions); + + struct hash_entry *hte; + hash_table_foreach(v.ht, hte) { + struct assignment_entry *entry = (struct assignment_entry *) hte->data; + + if (entry->assignment_count == 1 && entry->constval && entry->our_scope) { + entry->var->constant_value = entry->constval; + progress = true; + } + hte->data = NULL; + free(entry); + } + _mesa_hash_table_destroy(v.ht, NULL); + + return progress; +} + +bool +do_constant_variable_unlinked(exec_list *instructions) +{ + bool progress = false; + + foreach_in_list(ir_instruction, ir, instructions) { + ir_function *f = ir->as_function(); + if (f) { + foreach_in_list(ir_function_signature, sig, &f->signatures) { + if (do_constant_variable(&sig->body)) + progress = true; + } + } + } + + return progress; +} diff --git a/src/compiler/glsl/opt_copy_propagation.cpp b/src/compiler/glsl/opt_copy_propagation.cpp new file mode 100644 index 0000000..310708d --- /dev/null +++ b/src/compiler/glsl/opt_copy_propagation.cpp @@ -0,0 +1,352 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_copy_propagation.cpp + * + * Moves usage of recently-copied variables to the previous copy of + * the variable. + * + * This should reduce the number of MOV instructions in the generated + * programs unless copy propagation is also done on the LIR, and may + * help anyway by triggering other optimizations that live in the HIR. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_basic_block.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +namespace { + +class acp_entry : public exec_node +{ +public: + acp_entry(ir_variable *lhs, ir_variable *rhs) + { + assert(lhs); + assert(rhs); + this->lhs = lhs; + this->rhs = rhs; + } + + ir_variable *lhs; + ir_variable *rhs; +}; + + +class kill_entry : public exec_node +{ +public: + kill_entry(ir_variable *var) + { + assert(var); + this->var = var; + } + + ir_variable *var; +}; + +class ir_copy_propagation_visitor : public ir_hierarchical_visitor { +public: + ir_copy_propagation_visitor() + { + progress = false; + mem_ctx = ralloc_context(0); + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + } + ~ir_copy_propagation_visitor() + { + ralloc_free(mem_ctx); + } + + virtual ir_visitor_status visit(class ir_dereference_variable *); + virtual ir_visitor_status visit_enter(class ir_loop *); + virtual ir_visitor_status visit_enter(class ir_function_signature *); + virtual ir_visitor_status visit_enter(class ir_function *); + virtual ir_visitor_status visit_leave(class ir_assignment *); + virtual ir_visitor_status visit_enter(class ir_call *); + virtual ir_visitor_status visit_enter(class ir_if *); + + void add_copy(ir_assignment *ir); + void kill(ir_variable *ir); + void handle_if_block(exec_list *instructions); + + /** List of acp_entry: The available copies to propagate */ + exec_list *acp; + /** + * List of kill_entry: The variables whose values were killed in this + * block. + */ + exec_list *kills; + + bool progress; + + bool killed_all; + + void *mem_ctx; +}; + +} /* unnamed namespace */ + +ir_visitor_status +ir_copy_propagation_visitor::visit_enter(ir_function_signature *ir) +{ + /* Treat entry into a function signature as a completely separate + * block. Any instructions at global scope will be shuffled into + * main() at link time, so they're irrelevant to us. + */ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + visit_list_elements(this, &ir->body); + + ralloc_free(this->acp); + ralloc_free(this->kills); + + this->kills = orig_kills; + this->acp = orig_acp; + this->killed_all = orig_killed_all; + + return visit_continue_with_parent; +} + +ir_visitor_status +ir_copy_propagation_visitor::visit_leave(ir_assignment *ir) +{ + kill(ir->lhs->variable_referenced()); + + add_copy(ir); + + return visit_continue; +} + +ir_visitor_status +ir_copy_propagation_visitor::visit_enter(ir_function *ir) +{ + (void) ir; + return visit_continue; +} + +/** + * Replaces dereferences of ACP RHS variables with ACP LHS variables. + * + * This is where the actual copy propagation occurs. Note that the + * rewriting of ir_dereference means that the ir_dereference instance + * must not be shared by multiple IR operations! + */ +ir_visitor_status +ir_copy_propagation_visitor::visit(ir_dereference_variable *ir) +{ + if (this->in_assignee) + return visit_continue; + + ir_variable *var = ir->var; + + foreach_in_list(acp_entry, entry, this->acp) { + if (var == entry->lhs) { + ir->var = entry->rhs; + this->progress = true; + break; + } + } + + return visit_continue; +} + + +ir_visitor_status +ir_copy_propagation_visitor::visit_enter(ir_call *ir) +{ + /* Do copy propagation on call parameters, but skip any out params */ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + ir_rvalue *ir = (ir_rvalue *) actual_node; + if (sig_param->data.mode != ir_var_function_out + && sig_param->data.mode != ir_var_function_inout) { + ir->accept(this); + } + } + + /* Since we're unlinked, we don't (necessarily) know the side effects of + * this call. So kill all copies. + */ + acp->make_empty(); + this->killed_all = true; + + return visit_continue_with_parent; +} + +void +ir_copy_propagation_visitor::handle_if_block(exec_list *instructions) +{ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + /* Populate the initial acp with a copy of the original */ + foreach_in_list(acp_entry, a, orig_acp) { + this->acp->push_tail(new(this->acp) acp_entry(a->lhs, a->rhs)); + } + + visit_list_elements(this, instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + exec_list *new_kills = this->kills; + this->kills = orig_kills; + ralloc_free(this->acp); + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + foreach_in_list(kill_entry, k, new_kills) { + kill(k->var); + } + + ralloc_free(new_kills); +} + +ir_visitor_status +ir_copy_propagation_visitor::visit_enter(ir_if *ir) +{ + ir->condition->accept(this); + + handle_if_block(&ir->then_instructions); + handle_if_block(&ir->else_instructions); + + /* handle_if_block() already descended into the children. */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_copy_propagation_visitor::visit_enter(ir_loop *ir) +{ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + /* FINISHME: For now, the initial acp for loops is totally empty. + * We could go through once, then go through again with the acp + * cloned minus the killed entries after the first run through. + */ + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + visit_list_elements(this, &ir->body_instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + exec_list *new_kills = this->kills; + this->kills = orig_kills; + ralloc_free(this->acp); + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + foreach_in_list(kill_entry, k, new_kills) { + kill(k->var); + } + + ralloc_free(new_kills); + + /* already descended into the children. */ + return visit_continue_with_parent; +} + +void +ir_copy_propagation_visitor::kill(ir_variable *var) +{ + assert(var != NULL); + + /* Remove any entries currently in the ACP for this kill. */ + foreach_in_list_safe(acp_entry, entry, acp) { + if (entry->lhs == var || entry->rhs == var) { + entry->remove(); + } + } + + /* Add the LHS variable to the list of killed variables in this block. + */ + this->kills->push_tail(new(this->kills) kill_entry(var)); +} + +/** + * Adds an entry to the available copy list if it's a plain assignment + * of a variable to a variable. + */ +void +ir_copy_propagation_visitor::add_copy(ir_assignment *ir) +{ + acp_entry *entry; + + if (ir->condition) + return; + + ir_variable *lhs_var = ir->whole_variable_written(); + ir_variable *rhs_var = ir->rhs->whole_variable_referenced(); + + if ((lhs_var != NULL) && (rhs_var != NULL)) { + if (lhs_var == rhs_var) { + /* This is a dumb assignment, but we've conveniently noticed + * it here. Removing it now would mess up the loop iteration + * calling us. Just flag it to not execute, and someone else + * will clean up the mess. + */ + ir->condition = new(ralloc_parent(ir)) ir_constant(false); + this->progress = true; + } else if (lhs_var->data.mode != ir_var_shader_storage && + lhs_var->data.mode != ir_var_shader_shared) { + entry = new(this->acp) acp_entry(lhs_var, rhs_var); + this->acp->push_tail(entry); + } + } +} + +/** + * Does a copy propagation pass on the code present in the instruction stream. + */ +bool +do_copy_propagation(exec_list *instructions) +{ + ir_copy_propagation_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_copy_propagation_elements.cpp b/src/compiler/glsl/opt_copy_propagation_elements.cpp new file mode 100644 index 0000000..a679180 --- /dev/null +++ b/src/compiler/glsl/opt_copy_propagation_elements.cpp @@ -0,0 +1,509 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_copy_propagation_elements.cpp + * + * Replaces usage of recently-copied components of variables with the + * previous copy of the variable. + * + * This pass can be compared with opt_copy_propagation, which operands + * on arbitrary whole-variable copies. However, in order to handle + * the copy propagation of swizzled variables or writemasked writes, + * we want to track things on a channel-wise basis. I found that + * trying to mix the swizzled/writemasked support here with the + * whole-variable stuff in opt_copy_propagation.cpp just made a mess, + * so this is separate despite the ACP handling being somewhat + * similar. + * + * This should reduce the number of MOV instructions in the generated + * programs unless copy propagation is also done on the LIR, and may + * help anyway by triggering other optimizations that live in the HIR. + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" +#include "ir_basic_block.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +static bool debug = false; + +namespace { + +class acp_entry : public exec_node +{ +public: + acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4]) + { + this->lhs = lhs; + this->rhs = rhs; + this->write_mask = write_mask; + memcpy(this->swizzle, swizzle, sizeof(this->swizzle)); + } + + acp_entry(acp_entry *a) + { + this->lhs = a->lhs; + this->rhs = a->rhs; + this->write_mask = a->write_mask; + memcpy(this->swizzle, a->swizzle, sizeof(this->swizzle)); + } + + ir_variable *lhs; + ir_variable *rhs; + unsigned int write_mask; + int swizzle[4]; +}; + + +class kill_entry : public exec_node +{ +public: + kill_entry(ir_variable *var, int write_mask) + { + this->var = var; + this->write_mask = write_mask; + } + + ir_variable *var; + unsigned int write_mask; +}; + +class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor { +public: + ir_copy_propagation_elements_visitor() + { + this->progress = false; + this->killed_all = false; + this->mem_ctx = ralloc_context(NULL); + this->shader_mem_ctx = NULL; + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + } + ~ir_copy_propagation_elements_visitor() + { + ralloc_free(mem_ctx); + } + + virtual ir_visitor_status visit_enter(class ir_loop *); + virtual ir_visitor_status visit_enter(class ir_function_signature *); + virtual ir_visitor_status visit_leave(class ir_assignment *); + virtual ir_visitor_status visit_enter(class ir_call *); + virtual ir_visitor_status visit_enter(class ir_if *); + virtual ir_visitor_status visit_leave(class ir_swizzle *); + + void handle_rvalue(ir_rvalue **rvalue); + + void add_copy(ir_assignment *ir); + void kill(kill_entry *k); + void handle_if_block(exec_list *instructions); + + /** List of acp_entry: The available copies to propagate */ + exec_list *acp; + /** + * List of kill_entry: The variables whose values were killed in this + * block. + */ + exec_list *kills; + + bool progress; + + bool killed_all; + + /* Context for our local data structures. */ + void *mem_ctx; + /* Context for allocating new shader nodes. */ + void *shader_mem_ctx; +}; + +} /* unnamed namespace */ + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir) +{ + /* Treat entry into a function signature as a completely separate + * block. Any instructions at global scope will be shuffled into + * main() at link time, so they're irrelevant to us. + */ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + visit_list_elements(this, &ir->body); + + ralloc_free(this->acp); + ralloc_free(this->kills); + + this->kills = orig_kills; + this->acp = orig_acp; + this->killed_all = orig_killed_all; + + return visit_continue_with_parent; +} + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir) +{ + ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); + ir_variable *var = ir->lhs->variable_referenced(); + + if (var->type->is_scalar() || var->type->is_vector()) { + kill_entry *k; + + if (lhs) + k = new(this->kills) kill_entry(var, ir->write_mask); + else + k = new(this->kills) kill_entry(var, ~0); + + kill(k); + } + + add_copy(ir); + + return visit_continue; +} + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *) +{ + /* Don't visit the values of swizzles since they are handled while + * visiting the swizzle itself. + */ + return visit_continue; +} + +/** + * Replaces dereferences of ACP RHS variables with ACP LHS variables. + * + * This is where the actual copy propagation occurs. Note that the + * rewriting of ir_dereference means that the ir_dereference instance + * must not be shared by multiple IR operations! + */ +void +ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir) +{ + int swizzle_chan[4]; + ir_dereference_variable *deref_var; + ir_variable *source[4] = {NULL, NULL, NULL, NULL}; + int source_chan[4] = {0, 0, 0, 0}; + int chans; + bool noop_swizzle = true; + + if (!*ir) + return; + + ir_swizzle *swizzle = (*ir)->as_swizzle(); + if (swizzle) { + deref_var = swizzle->val->as_dereference_variable(); + if (!deref_var) + return; + + swizzle_chan[0] = swizzle->mask.x; + swizzle_chan[1] = swizzle->mask.y; + swizzle_chan[2] = swizzle->mask.z; + swizzle_chan[3] = swizzle->mask.w; + chans = swizzle->type->vector_elements; + } else { + deref_var = (*ir)->as_dereference_variable(); + if (!deref_var) + return; + + swizzle_chan[0] = 0; + swizzle_chan[1] = 1; + swizzle_chan[2] = 2; + swizzle_chan[3] = 3; + chans = deref_var->type->vector_elements; + } + + if (this->in_assignee) + return; + + ir_variable *var = deref_var->var; + + /* Try to find ACP entries covering swizzle_chan[], hoping they're + * the same source variable. + */ + foreach_in_list(acp_entry, entry, this->acp) { + if (var == entry->lhs) { + for (int c = 0; c < chans; c++) { + if (entry->write_mask & (1 << swizzle_chan[c])) { + source[c] = entry->rhs; + source_chan[c] = entry->swizzle[swizzle_chan[c]]; + + if (source_chan[c] != swizzle_chan[c]) + noop_swizzle = false; + } + } + } + } + + /* Make sure all channels are copying from the same source variable. */ + if (!source[0]) + return; + for (int c = 1; c < chans; c++) { + if (source[c] != source[0]) + return; + } + + if (!shader_mem_ctx) + shader_mem_ctx = ralloc_parent(deref_var); + + /* Don't pointlessly replace the rvalue with itself (or a noop swizzle + * of itself, which would just be deleted by opt_noop_swizzle). + */ + if (source[0] == var && noop_swizzle) + return; + + if (debug) { + printf("Copy propagation from:\n"); + (*ir)->print(); + } + + deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]); + *ir = new(shader_mem_ctx) ir_swizzle(deref_var, + source_chan[0], + source_chan[1], + source_chan[2], + source_chan[3], + chans); + progress = true; + + if (debug) { + printf("to:\n"); + (*ir)->print(); + printf("\n"); + } +} + + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir) +{ + /* Do copy propagation on call parameters, but skip any out params */ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + ir_rvalue *ir = (ir_rvalue *) actual_node; + if (sig_param->data.mode != ir_var_function_out + && sig_param->data.mode != ir_var_function_inout) { + ir->accept(this); + } + } + + /* Since we're unlinked, we don't (necessarily) know the side effects of + * this call. So kill all copies. + */ + acp->make_empty(); + this->killed_all = true; + + return visit_continue_with_parent; +} + +void +ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions) +{ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + /* Populate the initial acp with a copy of the original */ + foreach_in_list(acp_entry, a, orig_acp) { + this->acp->push_tail(new(this->acp) acp_entry(a)); + } + + visit_list_elements(this, instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + exec_list *new_kills = this->kills; + this->kills = orig_kills; + ralloc_free(this->acp); + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + /* Move the new kills into the parent block's list, removing them + * from the parent's ACP list in the process. + */ + foreach_in_list_safe(kill_entry, k, new_kills) { + kill(k); + } + + ralloc_free(new_kills); +} + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir) +{ + ir->condition->accept(this); + + handle_if_block(&ir->then_instructions); + handle_if_block(&ir->else_instructions); + + /* handle_if_block() already descended into the children. */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir) +{ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + /* FINISHME: For now, the initial acp for loops is totally empty. + * We could go through once, then go through again with the acp + * cloned minus the killed entries after the first run through. + */ + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + visit_list_elements(this, &ir->body_instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + exec_list *new_kills = this->kills; + this->kills = orig_kills; + ralloc_free(this->acp); + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + foreach_in_list_safe(kill_entry, k, new_kills) { + kill(k); + } + + ralloc_free(new_kills); + + /* already descended into the children. */ + return visit_continue_with_parent; +} + +/* Remove any entries currently in the ACP for this kill. */ +void +ir_copy_propagation_elements_visitor::kill(kill_entry *k) +{ + foreach_in_list_safe(acp_entry, entry, acp) { + if (entry->lhs == k->var) { + entry->write_mask = entry->write_mask & ~k->write_mask; + if (entry->write_mask == 0) { + entry->remove(); + continue; + } + } + if (entry->rhs == k->var) { + entry->remove(); + } + } + + /* If we were on a list, remove ourselves before inserting */ + if (k->next) + k->remove(); + + ralloc_steal(this->kills, k); + this->kills->push_tail(k); +} + +/** + * Adds directly-copied channels between vector variables to the available + * copy propagation list. + */ +void +ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir) +{ + acp_entry *entry; + int orig_swizzle[4] = {0, 1, 2, 3}; + int swizzle[4]; + + if (ir->condition) + return; + + ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); + if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector())) + return; + + ir_dereference_variable *rhs = ir->rhs->as_dereference_variable(); + if (!rhs) { + ir_swizzle *swiz = ir->rhs->as_swizzle(); + if (!swiz) + return; + + rhs = swiz->val->as_dereference_variable(); + if (!rhs) + return; + + orig_swizzle[0] = swiz->mask.x; + orig_swizzle[1] = swiz->mask.y; + orig_swizzle[2] = swiz->mask.z; + orig_swizzle[3] = swiz->mask.w; + } + + /* Move the swizzle channels out to the positions they match in the + * destination. We don't want to have to rewrite the swizzle[] + * array every time we clear a bit of the write_mask. + */ + int j = 0; + for (int i = 0; i < 4; i++) { + if (ir->write_mask & (1 << i)) + swizzle[i] = orig_swizzle[j++]; + } + + int write_mask = ir->write_mask; + if (lhs->var == rhs->var) { + /* If this is a copy from the variable to itself, then we need + * to be sure not to include the updated channels from this + * instruction in the set of new source channels to be + * copy-propagated from. + */ + for (int i = 0; i < 4; i++) { + if (ir->write_mask & (1 << orig_swizzle[i])) + write_mask &= ~(1 << i); + } + } + + entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask, + swizzle); + this->acp->push_tail(entry); +} + +bool +do_copy_propagation_elements(exec_list *instructions) +{ + ir_copy_propagation_elements_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_dead_builtin_variables.cpp b/src/compiler/glsl/opt_dead_builtin_variables.cpp new file mode 100644 index 0000000..03e5789 --- /dev/null +++ b/src/compiler/glsl/opt_dead_builtin_variables.cpp @@ -0,0 +1,103 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" + +/** + * Pre-linking, optimize unused built-in variables + * + * Uniforms, constants, system values, inputs (vertex shader only), and + * outputs (fragment shader only) that are not used can be removed. + */ +void +optimize_dead_builtin_variables(exec_list *instructions, + enum ir_variable_mode other) +{ + foreach_in_list_safe(ir_variable, var, instructions) { + if (var->ir_type != ir_type_variable || var->data.used) + continue; + + if (var->data.mode != ir_var_uniform + && var->data.mode != ir_var_auto + && var->data.mode != ir_var_system_value + && var->data.mode != other) + continue; + + /* So that linker rules can later be enforced, we cannot elimate + * variables that were redeclared in the shader code. + */ + if ((var->data.mode == other || var->data.mode == ir_var_system_value) + && var->data.how_declared != ir_var_declared_implicitly) + continue; + + if (!is_gl_identifier(var->name)) + continue; + + /* gl_ModelViewProjectionMatrix and gl_Vertex are special because they + * are used by ftransform. No other built-in variable is used by a + * built-in function. The forward declarations of these variables in + * the built-in function shader does not have the "state slot" + * information, so removing these variables from the user shader will + * cause problems later. + * + * For compute shaders, gl_GlobalInvocationID has some dependencies, so + * we avoid removing these dependencies. + * + * We also avoid removing gl_GlobalInvocationID at this stage because it + * might be used by a linked shader. In this case it still needs to be + * initialized by the main function. + * + * gl_GlobalInvocationID = + * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID + * + * Similarly, we initialize gl_LocalInvocationIndex in the main function: + * + * gl_LocalInvocationIndex = + * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + + * gl_LocalInvocationID.y * gl_WorkGroupSize.x + + * gl_LocalInvocationID.x; + * + * Matrix uniforms with "Transpose" are not eliminated because there's + * an optimization pass that can turn references to the regular matrix + * into references to the transpose matrix. Eliminating the transpose + * matrix would cause that pass to generate references to undeclareds + * variables (thank you, ir_validate). + * + * It doesn't seem worth the effort to track when the transpose could be + * eliminated (i.e., when the non-transpose was eliminated). + */ + if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0 + || strcmp(var->name, "gl_Vertex") == 0 + || strcmp(var->name, "gl_WorkGroupID") == 0 + || strcmp(var->name, "gl_WorkGroupSize") == 0 + || strcmp(var->name, "gl_LocalInvocationID") == 0 + || strcmp(var->name, "gl_GlobalInvocationID") == 0 + || strcmp(var->name, "gl_LocalInvocationIndex") == 0 + || strstr(var->name, "Transpose") != NULL) + continue; + + var->remove(); + } +} diff --git a/src/compiler/glsl/opt_dead_builtin_varyings.cpp b/src/compiler/glsl/opt_dead_builtin_varyings.cpp new file mode 100644 index 0000000..37bcbcc --- /dev/null +++ b/src/compiler/glsl/opt_dead_builtin_varyings.cpp @@ -0,0 +1,606 @@ +/* + * Copyright © 2013 Marek Olšák <maraeo@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_dead_builtin_varyings.cpp + * + * This eliminates the built-in shader outputs which are either not written + * at all or not used by the next stage. It also eliminates unused elements + * of gl_TexCoord inputs, which reduces the overall varying usage. + * The varyings handled here are the primary and secondary color, the fog, + * and the texture coordinates (gl_TexCoord). + * + * This pass is necessary, because the Mesa GLSL linker cannot eliminate + * built-in varyings like it eliminates user-defined varyings, because + * the built-in varyings have pre-assigned locations. Also, the elimination + * of unused gl_TexCoord elements requires its own lowering pass anyway. + * + * It's implemented by replacing all occurrences of dead varyings with + * temporary variables, which creates dead code. It is recommended to run + * a dead-code elimination pass after this. + * + * If any texture coordinate slots can be eliminated, the gl_TexCoord array is + * broken down into separate vec4 variables with locations equal to + * VARYING_SLOT_TEX0 + i. + * + * The same is done for the gl_FragData fragment shader output. + */ + +#include "main/core.h" /* for snprintf and ARRAY_SIZE */ +#include "ir.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "ir_print_visitor.h" +#include "compiler/glsl_types.h" +#include "link_varyings.h" + +namespace { + +/** + * This obtains detailed information about built-in varyings from shader code. + */ +class varying_info_visitor : public ir_hierarchical_visitor { +public: + /* "mode" can be either ir_var_shader_in or ir_var_shader_out */ + varying_info_visitor(ir_variable_mode mode, bool find_frag_outputs = false) + : lower_texcoord_array(true), + texcoord_array(NULL), + texcoord_usage(0), + find_frag_outputs(find_frag_outputs), + lower_fragdata_array(true), + fragdata_array(NULL), + fragdata_usage(0), + color_usage(0), + tfeedback_color_usage(0), + fog(NULL), + has_fog(false), + tfeedback_has_fog(false), + mode(mode) + { + memset(color, 0, sizeof(color)); + memset(backcolor, 0, sizeof(backcolor)); + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *ir) + { + ir_variable *var = ir->variable_referenced(); + + if (!var || var->data.mode != this->mode || !var->type->is_array()) + return visit_continue; + + if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) { + this->fragdata_array = var; + + ir_constant *index = ir->array_index->as_constant(); + if (index == NULL) { + /* This is variable indexing. */ + this->fragdata_usage |= (1 << var->type->array_size()) - 1; + this->lower_fragdata_array = false; + } + else { + this->fragdata_usage |= 1 << index->get_uint_component(0); + /* Don't lower fragdata array if the output variable + * is not a float variable (or float vector) because it will + * generate wrong register assignments because of different + * data types. + */ + if (var->type->gl_type != GL_FLOAT && + var->type->gl_type != GL_FLOAT_VEC2 && + var->type->gl_type != GL_FLOAT_VEC3 && + var->type->gl_type != GL_FLOAT_VEC4) + this->lower_fragdata_array = false; + } + + /* Don't visit the leaves of ir_dereference_array. */ + return visit_continue_with_parent; + } + + if (!this->find_frag_outputs && var->data.location == VARYING_SLOT_TEX0) { + this->texcoord_array = var; + + ir_constant *index = ir->array_index->as_constant(); + if (index == NULL) { + /* There is variable indexing, we can't lower the texcoord array. + */ + this->texcoord_usage |= (1 << var->type->array_size()) - 1; + this->lower_texcoord_array = false; + } + else { + this->texcoord_usage |= 1 << index->get_uint_component(0); + } + + /* Don't visit the leaves of ir_dereference_array. */ + return visit_continue_with_parent; + } + + return visit_continue; + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + ir_variable *var = ir->variable_referenced(); + + if (var->data.mode != this->mode || !var->type->is_array()) + return visit_continue; + + if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) { + /* This is a whole array dereference. */ + this->fragdata_usage |= (1 << var->type->array_size()) - 1; + this->lower_fragdata_array = false; + return visit_continue; + } + + if (!this->find_frag_outputs && var->data.location == VARYING_SLOT_TEX0) { + /* This is a whole array dereference like "gl_TexCoord = x;", + * there's probably no point in lowering that. + */ + this->texcoord_usage |= (1 << var->type->array_size()) - 1; + this->lower_texcoord_array = false; + } + return visit_continue; + } + + virtual ir_visitor_status visit(ir_variable *var) + { + if (var->data.mode != this->mode) + return visit_continue; + + /* Nothing to do here for fragment outputs. */ + if (this->find_frag_outputs) + return visit_continue; + + /* Handle colors and fog. */ + switch (var->data.location) { + case VARYING_SLOT_COL0: + this->color[0] = var; + this->color_usage |= 1; + break; + case VARYING_SLOT_COL1: + this->color[1] = var; + this->color_usage |= 2; + break; + case VARYING_SLOT_BFC0: + this->backcolor[0] = var; + this->color_usage |= 1; + break; + case VARYING_SLOT_BFC1: + this->backcolor[1] = var; + this->color_usage |= 2; + break; + case VARYING_SLOT_FOGC: + this->fog = var; + this->has_fog = true; + break; + } + + return visit_continue; + } + + void get(exec_list *ir, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls) + { + /* Handle the transform feedback varyings. */ + for (unsigned i = 0; i < num_tfeedback_decls; i++) { + if (!tfeedback_decls[i].is_varying()) + continue; + + unsigned location = tfeedback_decls[i].get_location(); + + switch (location) { + case VARYING_SLOT_COL0: + case VARYING_SLOT_BFC0: + this->tfeedback_color_usage |= 1; + break; + case VARYING_SLOT_COL1: + case VARYING_SLOT_BFC1: + this->tfeedback_color_usage |= 2; + break; + case VARYING_SLOT_FOGC: + this->tfeedback_has_fog = true; + break; + default: + if (location >= VARYING_SLOT_TEX0 && + location <= VARYING_SLOT_TEX7) { + this->lower_texcoord_array = false; + } + } + } + + /* Process the shader. */ + visit_list_elements(this, ir); + + if (!this->texcoord_array) { + this->lower_texcoord_array = false; + } + if (!this->fragdata_array) { + this->lower_fragdata_array = false; + } + } + + bool lower_texcoord_array; + ir_variable *texcoord_array; + unsigned texcoord_usage; /* bitmask */ + + bool find_frag_outputs; /* false if it's looking for varyings */ + bool lower_fragdata_array; + ir_variable *fragdata_array; + unsigned fragdata_usage; /* bitmask */ + + ir_variable *color[2]; + ir_variable *backcolor[2]; + unsigned color_usage; /* bitmask */ + unsigned tfeedback_color_usage; /* bitmask */ + + ir_variable *fog; + bool has_fog; + bool tfeedback_has_fog; + + ir_variable_mode mode; +}; + + +/** + * This replaces unused varyings with temporary variables. + * + * If "ir" is the producer, the "external" usage should come from + * the consumer. It also works the other way around. If either one is + * missing, set the "external" usage to a full mask. + */ +class replace_varyings_visitor : public ir_rvalue_visitor { +public: + replace_varyings_visitor(struct gl_shader *sha, + const varying_info_visitor *info, + unsigned external_texcoord_usage, + unsigned external_color_usage, + bool external_has_fog) + : shader(sha), info(info), new_fog(NULL) + { + void *const ctx = shader->ir; + + memset(this->new_fragdata, 0, sizeof(this->new_fragdata)); + memset(this->new_texcoord, 0, sizeof(this->new_texcoord)); + memset(this->new_color, 0, sizeof(this->new_color)); + memset(this->new_backcolor, 0, sizeof(this->new_backcolor)); + + const char *mode_str = + info->mode == ir_var_shader_in ? "in" : "out"; + + /* Handle texcoord outputs. + * + * We're going to break down the gl_TexCoord array into separate + * variables. First, add declarations of the new variables all + * occurrences of gl_TexCoord will be replaced with. + */ + if (info->lower_texcoord_array) { + prepare_array(shader->ir, this->new_texcoord, + ARRAY_SIZE(this->new_texcoord), + VARYING_SLOT_TEX0, "TexCoord", mode_str, + info->texcoord_usage, external_texcoord_usage); + } + + /* Handle gl_FragData in the same way like gl_TexCoord. */ + if (info->lower_fragdata_array) { + prepare_array(shader->ir, this->new_fragdata, + ARRAY_SIZE(this->new_fragdata), + FRAG_RESULT_DATA0, "FragData", mode_str, + info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) - 1); + } + + /* Create dummy variables which will replace set-but-unused color and + * fog outputs. + */ + external_color_usage |= info->tfeedback_color_usage; + + for (int i = 0; i < 2; i++) { + char name[32]; + + if (!(external_color_usage & (1 << i))) { + if (info->color[i]) { + snprintf(name, 32, "gl_%s_FrontColor%i_dummy", mode_str, i); + this->new_color[i] = + new (ctx) ir_variable(glsl_type::vec4_type, name, + ir_var_temporary); + } + + if (info->backcolor[i]) { + snprintf(name, 32, "gl_%s_BackColor%i_dummy", mode_str, i); + this->new_backcolor[i] = + new (ctx) ir_variable(glsl_type::vec4_type, name, + ir_var_temporary); + } + } + } + + if (!external_has_fog && !info->tfeedback_has_fog && + info->fog) { + char name[32]; + + snprintf(name, 32, "gl_%s_FogFragCoord_dummy", mode_str); + this->new_fog = new (ctx) ir_variable(glsl_type::float_type, name, + ir_var_temporary); + } + + /* Now do the replacing. */ + visit_list_elements(this, shader->ir); + } + + void prepare_array(exec_list *ir, + ir_variable **new_var, + int max_elements, unsigned start_location, + const char *var_name, const char *mode_str, + unsigned usage, unsigned external_usage) + { + void *const ctx = ir; + + for (int i = max_elements-1; i >= 0; i--) { + if (usage & (1 << i)) { + char name[32]; + + if (!(external_usage & (1 << i))) { + /* This varying is unused in the next stage. Declare + * a temporary instead of an output. */ + snprintf(name, 32, "gl_%s_%s%i_dummy", mode_str, var_name, i); + new_var[i] = + new (ctx) ir_variable(glsl_type::vec4_type, name, + ir_var_temporary); + } + else { + snprintf(name, 32, "gl_%s_%s%i", mode_str, var_name, i); + new_var[i] = + new(ctx) ir_variable(glsl_type::vec4_type, name, + this->info->mode); + new_var[i]->data.location = start_location + i; + new_var[i]->data.explicit_location = true; + new_var[i]->data.explicit_index = 0; + } + + ir->head->insert_before(new_var[i]); + } + } + } + + virtual ir_visitor_status visit(ir_variable *var) + { + /* Remove the gl_TexCoord array. */ + if (this->info->lower_texcoord_array && + var == this->info->texcoord_array) { + var->remove(); + } + + /* Remove the gl_FragData array. */ + if (this->info->lower_fragdata_array && + var == this->info->fragdata_array) { + + /* Clone variable for program resource list before it is removed. */ + if (!shader->fragdata_arrays) + shader->fragdata_arrays = new (shader) exec_list; + + shader->fragdata_arrays->push_tail(var->clone(shader, NULL)); + + var->remove(); + } + + /* Replace set-but-unused color and fog outputs with dummy variables. */ + for (int i = 0; i < 2; i++) { + if (var == this->info->color[i] && this->new_color[i]) { + var->replace_with(this->new_color[i]); + } + if (var == this->info->backcolor[i] && + this->new_backcolor[i]) { + var->replace_with(this->new_backcolor[i]); + } + } + + if (var == this->info->fog && this->new_fog) { + var->replace_with(this->new_fog); + } + + return visit_continue; + } + + virtual void handle_rvalue(ir_rvalue **rvalue) + { + if (!*rvalue) + return; + + void *ctx = ralloc_parent(*rvalue); + + /* Replace an array dereference gl_TexCoord[i] with a single + * variable dereference representing gl_TexCoord[i]. + */ + if (this->info->lower_texcoord_array) { + /* gl_TexCoord[i] occurrence */ + ir_dereference_array *const da = (*rvalue)->as_dereference_array(); + + if (da && da->variable_referenced() == + this->info->texcoord_array) { + unsigned i = da->array_index->as_constant()->get_uint_component(0); + + *rvalue = new(ctx) ir_dereference_variable(this->new_texcoord[i]); + return; + } + } + + /* Same for gl_FragData. */ + if (this->info->lower_fragdata_array) { + /* gl_FragData[i] occurrence */ + ir_dereference_array *const da = (*rvalue)->as_dereference_array(); + + if (da && da->variable_referenced() == this->info->fragdata_array) { + unsigned i = da->array_index->as_constant()->get_uint_component(0); + + *rvalue = new(ctx) ir_dereference_variable(this->new_fragdata[i]); + return; + } + } + + /* Replace set-but-unused color and fog outputs with dummy variables. */ + ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); + if (!dv) + return; + + ir_variable *var = dv->variable_referenced(); + + for (int i = 0; i < 2; i++) { + if (var == this->info->color[i] && this->new_color[i]) { + *rvalue = new(ctx) ir_dereference_variable(this->new_color[i]); + return; + } + if (var == this->info->backcolor[i] && + this->new_backcolor[i]) { + *rvalue = new(ctx) ir_dereference_variable(this->new_backcolor[i]); + return; + } + } + + if (var == this->info->fog && this->new_fog) { + *rvalue = new(ctx) ir_dereference_variable(this->new_fog); + } + } + + virtual ir_visitor_status visit_leave(ir_assignment *ir) + { + handle_rvalue(&ir->rhs); + handle_rvalue(&ir->condition); + + /* We have to use set_lhs when changing the LHS of an assignment. */ + ir_rvalue *lhs = ir->lhs; + + handle_rvalue(&lhs); + if (lhs != ir->lhs) { + ir->set_lhs(lhs); + } + + return visit_continue; + } + +private: + struct gl_shader *shader; + const varying_info_visitor *info; + ir_variable *new_fragdata[MAX_DRAW_BUFFERS]; + ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS]; + ir_variable *new_color[2]; + ir_variable *new_backcolor[2]; + ir_variable *new_fog; +}; + +} /* anonymous namespace */ + +static void +lower_texcoord_array(struct gl_shader *shader, const varying_info_visitor *info) +{ + replace_varyings_visitor(shader, info, + (1 << MAX_TEXTURE_COORD_UNITS) - 1, + 1 | 2, true); +} + +static void +lower_fragdata_array(struct gl_shader *shader) +{ + varying_info_visitor info(ir_var_shader_out, true); + info.get(shader->ir, 0, NULL); + + replace_varyings_visitor(shader, &info, 0, 0, 0); +} + + +void +do_dead_builtin_varyings(struct gl_context *ctx, + gl_shader *producer, gl_shader *consumer, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls) +{ + /* Lower the gl_FragData array to separate variables. */ + if (consumer && consumer->Stage == MESA_SHADER_FRAGMENT) { + lower_fragdata_array(consumer); + } + + /* Lowering of built-in varyings has no effect with the core context and + * GLES2, because they are not available there. + */ + if (ctx->API == API_OPENGL_CORE || + ctx->API == API_OPENGLES2) { + return; + } + + /* Information about built-in varyings. */ + varying_info_visitor producer_info(ir_var_shader_out); + varying_info_visitor consumer_info(ir_var_shader_in); + + if (producer) { + producer_info.get(producer->ir, num_tfeedback_decls, tfeedback_decls); + + if (!consumer) { + /* At least eliminate unused gl_TexCoord elements. */ + if (producer_info.lower_texcoord_array) { + lower_texcoord_array(producer, &producer_info); + } + return; + } + } + + if (consumer) { + consumer_info.get(consumer->ir, 0, NULL); + + if (!producer) { + /* At least eliminate unused gl_TexCoord elements. */ + if (consumer_info.lower_texcoord_array) { + lower_texcoord_array(consumer, &consumer_info); + } + return; + } + } + + /* Eliminate the outputs unused by the consumer. */ + if (producer_info.lower_texcoord_array || + producer_info.color_usage || + producer_info.has_fog) { + replace_varyings_visitor(producer, + &producer_info, + consumer_info.texcoord_usage, + consumer_info.color_usage, + consumer_info.has_fog); + } + + /* The gl_TexCoord fragment shader inputs can be initialized + * by GL_COORD_REPLACE, so we can't eliminate them. + * + * This doesn't prevent elimination of the gl_TexCoord elements which + * are not read by the fragment shader. We want to eliminate those anyway. + */ + if (consumer->Stage == MESA_SHADER_FRAGMENT) { + producer_info.texcoord_usage = (1 << MAX_TEXTURE_COORD_UNITS) - 1; + } + + /* Eliminate the inputs uninitialized by the producer. */ + if (consumer_info.lower_texcoord_array || + consumer_info.color_usage || + consumer_info.has_fog) { + replace_varyings_visitor(consumer, + &consumer_info, + producer_info.texcoord_usage, + producer_info.color_usage, + producer_info.has_fog); + } +} diff --git a/src/compiler/glsl/opt_dead_code.cpp b/src/compiler/glsl/opt_dead_code.cpp new file mode 100644 index 0000000..dbdb7de --- /dev/null +++ b/src/compiler/glsl/opt_dead_code.cpp @@ -0,0 +1,197 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_dead_code.cpp + * + * Eliminates dead assignments and variable declarations from the code. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_variable_refcount.h" +#include "compiler/glsl_types.h" +#include "util/hash_table.h" + +static bool debug = false; + +/** + * Do a dead code pass over instructions and everything that instructions + * references. + * + * Note that this will remove assignments to globals, so it is not suitable + * for usage on an unlinked instruction stream. + */ +bool +do_dead_code(exec_list *instructions, bool uniform_locations_assigned) +{ + ir_variable_refcount_visitor v; + bool progress = false; + + v.run(instructions); + + struct hash_entry *e; + hash_table_foreach(v.ht, e) { + ir_variable_refcount_entry *entry = (ir_variable_refcount_entry *)e->data; + + /* Since each assignment is a reference, the refereneced count must be + * greater than or equal to the assignment count. If they are equal, + * then all of the references are assignments, and the variable is + * dead. + * + * Note that if the variable is neither assigned nor referenced, both + * counts will be zero and will be caught by the equality test. + */ + assert(entry->referenced_count >= entry->assigned_count); + + if (debug) { + printf("%s@%p: %d refs, %d assigns, %sdeclared in our scope\n", + entry->var->name, (void *) entry->var, + entry->referenced_count, entry->assigned_count, + entry->declaration ? "" : "not "); + } + + if ((entry->referenced_count > entry->assigned_count) + || !entry->declaration) + continue; + + /* Section 7.4.1 (Shader Interface Matching) of the OpenGL 4.5 + * (Core Profile) spec says: + * + * "With separable program objects, interfaces between shader + * stages may involve the outputs from one program object and the + * inputs from a second program object. For such interfaces, it is + * not possible to detect mismatches at link time, because the + * programs are linked separately. When each such program is + * linked, all inputs or outputs interfacing with another program + * stage are treated as active." + */ + if (entry->var->data.always_active_io) + continue; + + if (!entry->assign_list.is_empty()) { + /* Remove all the dead assignments to the variable we found. + * Don't do so if it's a shader or function output, though. + */ + if (entry->var->data.mode != ir_var_function_out && + entry->var->data.mode != ir_var_function_inout && + entry->var->data.mode != ir_var_shader_out && + entry->var->data.mode != ir_var_shader_storage) { + + while (!entry->assign_list.is_empty()) { + struct assignment_entry *assignment_entry = + exec_node_data(struct assignment_entry, + entry->assign_list.head, link); + + assignment_entry->assign->remove(); + + if (debug) { + printf("Removed assignment to %s@%p\n", + entry->var->name, (void *) entry->var); + } + + assignment_entry->link.remove(); + free(assignment_entry); + } + progress = true; + } + } + + if (entry->assign_list.is_empty()) { + /* If there are no assignments or references to the variable left, + * then we can remove its declaration. + */ + + /* uniform initializers are precious, and could get used by another + * stage. Also, once uniform locations have been assigned, the + * declaration cannot be deleted. + */ + if (entry->var->data.mode == ir_var_uniform || + entry->var->data.mode == ir_var_shader_storage) { + if (uniform_locations_assigned || entry->var->constant_initializer) + continue; + + /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec + * says: + * + * "All members of a named uniform block declared with a + * shared or std140 layout qualifier are considered active, + * even if they are not referenced in any shader in the + * program. The uniform block itself is also considered + * active, even if no member of the block is referenced." + * + * If the variable is in a uniform block with one of those + * layouts, do not eliminate it. + */ + if (entry->var->is_in_buffer_block()) { + if (entry->var->get_interface_type()->interface_packing != + GLSL_INTERFACE_PACKING_PACKED) + continue; + } + + if (entry->var->type->is_subroutine()) + continue; + } + + entry->var->remove(); + progress = true; + + if (debug) { + printf("Removed declaration of %s@%p\n", + entry->var->name, (void *) entry->var); + } + } + } + + return progress; +} + +/** + * Does a dead code pass on the functions present in the instruction stream. + * + * This is suitable for use while the program is not linked, as it will + * ignore variable declarations (and the assignments to them) for variables + * with global scope. + */ +bool +do_dead_code_unlinked(exec_list *instructions) +{ + bool progress = false; + + foreach_in_list(ir_instruction, ir, instructions) { + ir_function *f = ir->as_function(); + if (f) { + foreach_in_list(ir_function_signature, sig, &f->signatures) { + /* The setting of the uniform_locations_assigned flag here is + * irrelevent. If there is a uniform declaration encountered + * inside the body of the function, something has already gone + * terribly, terribly wrong. + */ + if (do_dead_code(&sig->body, false)) + progress = true; + } + } + } + + return progress; +} diff --git a/src/compiler/glsl/opt_dead_code_local.cpp b/src/compiler/glsl/opt_dead_code_local.cpp new file mode 100644 index 0000000..d38fd2b --- /dev/null +++ b/src/compiler/glsl/opt_dead_code_local.cpp @@ -0,0 +1,336 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_dead_code_local.cpp + * + * Eliminates local dead assignments from the code. + * + * This operates on basic blocks, tracking assignments and finding if + * they're used before the variable is completely reassigned. + * + * Compare this to ir_dead_code.cpp, which operates globally looking + * for assignments to variables that are never read. + */ + +#include "ir.h" +#include "ir_basic_block.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +static bool debug = false; + +namespace { + +class assignment_entry : public exec_node +{ +public: + assignment_entry(ir_variable *lhs, ir_assignment *ir) + { + assert(lhs); + assert(ir); + this->lhs = lhs; + this->ir = ir; + this->unused = ir->write_mask; + } + + ir_variable *lhs; + ir_assignment *ir; + + /* bitmask of xyzw channels written that haven't been used so far. */ + int unused; +}; + +class kill_for_derefs_visitor : public ir_hierarchical_visitor { +public: + kill_for_derefs_visitor(exec_list *assignments) + { + this->assignments = assignments; + } + + void use_channels(ir_variable *const var, int used) + { + foreach_in_list_safe(assignment_entry, entry, this->assignments) { + if (entry->lhs == var) { + if (var->type->is_scalar() || var->type->is_vector()) { + if (debug) + printf("used %s (0x%01x - 0x%01x)\n", entry->lhs->name, + entry->unused, used & 0xf); + entry->unused &= ~used; + if (!entry->unused) + entry->remove(); + } else { + if (debug) + printf("used %s\n", entry->lhs->name); + entry->remove(); + } + } + } + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + use_channels(ir->var, ~0); + + return visit_continue; + } + + virtual ir_visitor_status visit(ir_swizzle *ir) + { + ir_dereference_variable *deref = ir->val->as_dereference_variable(); + if (!deref) + return visit_continue; + + int used = 0; + used |= 1 << ir->mask.x; + used |= 1 << ir->mask.y; + used |= 1 << ir->mask.z; + used |= 1 << ir->mask.w; + + use_channels(deref->var, used); + + return visit_continue_with_parent; + } + + virtual ir_visitor_status visit_leave(ir_emit_vertex *) + { + /* For the purpose of dead code elimination, emitting a vertex counts as + * "reading" all of the currently assigned output variables. + */ + foreach_in_list_safe(assignment_entry, entry, this->assignments) { + if (entry->lhs->data.mode == ir_var_shader_out) { + if (debug) + printf("kill %s\n", entry->lhs->name); + entry->remove(); + } + } + + return visit_continue; + } + +private: + exec_list *assignments; +}; + +class array_index_visit : public ir_hierarchical_visitor { +public: + array_index_visit(ir_hierarchical_visitor *v) + { + this->visitor = v; + } + + virtual ir_visitor_status visit_enter(class ir_dereference_array *ir) + { + ir->array_index->accept(visitor); + return visit_continue; + } + + static void run(ir_instruction *ir, ir_hierarchical_visitor *v) + { + array_index_visit top_visit(v); + ir->accept(& top_visit); + } + + ir_hierarchical_visitor *visitor; +}; + +} /* unnamed namespace */ + +/** + * Adds an entry to the available copy list if it's a plain assignment + * of a variable to a variable. + */ +static bool +process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments) +{ + ir_variable *var = NULL; + bool progress = false; + kill_for_derefs_visitor v(assignments); + + /* Kill assignment entries for things used to produce this assignment. */ + ir->rhs->accept(&v); + if (ir->condition) { + ir->condition->accept(&v); + } + + /* Kill assignment enties used as array indices. + */ + array_index_visit::run(ir->lhs, &v); + var = ir->lhs->variable_referenced(); + assert(var); + + /* Now, check if we did a whole-variable assignment. */ + if (!ir->condition) { + ir_dereference_variable *deref_var = ir->lhs->as_dereference_variable(); + + /* If it's a vector type, we can do per-channel elimination of + * use of the RHS. + */ + if (deref_var && (deref_var->var->type->is_scalar() || + deref_var->var->type->is_vector())) { + + if (debug) + printf("looking for %s.0x%01x to remove\n", var->name, + ir->write_mask); + + foreach_in_list_safe(assignment_entry, entry, assignments) { + if (entry->lhs != var) + continue; + + /* Skip if the assignment we're trying to eliminate isn't a plain + * variable deref. */ + if (entry->ir->lhs->ir_type != ir_type_dereference_variable) + continue; + + int remove = entry->unused & ir->write_mask; + if (debug) { + printf("%s 0x%01x - 0x%01x = 0x%01x\n", + var->name, + entry->ir->write_mask, + remove, entry->ir->write_mask & ~remove); + } + if (remove) { + progress = true; + + if (debug) { + printf("rewriting:\n "); + entry->ir->print(); + printf("\n"); + } + + entry->ir->write_mask &= ~remove; + entry->unused &= ~remove; + if (entry->ir->write_mask == 0) { + /* Delete the dead assignment. */ + entry->ir->remove(); + entry->remove(); + } else { + void *mem_ctx = ralloc_parent(entry->ir); + /* Reswizzle the RHS arguments according to the new + * write_mask. + */ + unsigned components[4]; + unsigned channels = 0; + unsigned next = 0; + + for (int i = 0; i < 4; i++) { + if ((entry->ir->write_mask | remove) & (1 << i)) { + if (!(remove & (1 << i))) + components[channels++] = next; + next++; + } + } + + entry->ir->rhs = new(mem_ctx) ir_swizzle(entry->ir->rhs, + components, + channels); + if (debug) { + printf("to:\n "); + entry->ir->print(); + printf("\n"); + } + } + } + } + } else if (ir->whole_variable_written() != NULL) { + /* We did a whole-variable assignment. So, any instruction in + * the assignment list with the same LHS is dead. + */ + if (debug) + printf("looking for %s to remove\n", var->name); + foreach_in_list_safe(assignment_entry, entry, assignments) { + if (entry->lhs == var) { + if (debug) + printf("removing %s\n", var->name); + entry->ir->remove(); + entry->remove(); + progress = true; + } + } + } + } + + /* Add this instruction to the assignment list available to be removed. */ + assignment_entry *entry = new(ctx) assignment_entry(var, ir); + assignments->push_tail(entry); + + if (debug) { + printf("add %s\n", var->name); + + printf("current entries\n"); + foreach_in_list(assignment_entry, entry, assignments) { + printf(" %s (0x%01x)\n", entry->lhs->name, entry->unused); + } + } + + return progress; +} + +static void +dead_code_local_basic_block(ir_instruction *first, + ir_instruction *last, + void *data) +{ + ir_instruction *ir, *ir_next; + /* List of avaialble_copy */ + exec_list assignments; + bool *out_progress = (bool *)data; + bool progress = false; + + void *ctx = ralloc_context(NULL); + /* Safe looping, since process_assignment */ + for (ir = first, ir_next = (ir_instruction *)first->next;; + ir = ir_next, ir_next = (ir_instruction *)ir->next) { + ir_assignment *ir_assign = ir->as_assignment(); + + if (debug) { + ir->print(); + printf("\n"); + } + + if (ir_assign) { + progress = process_assignment(ctx, ir_assign, &assignments) || progress; + } else { + kill_for_derefs_visitor kill(&assignments); + ir->accept(&kill); + } + + if (ir == last) + break; + } + *out_progress = progress; + ralloc_free(ctx); +} + +/** + * Does a copy propagation pass on the code present in the instruction stream. + */ +bool +do_dead_code_local(exec_list *instructions) +{ + bool progress = false; + + call_for_basic_blocks(instructions, dead_code_local_basic_block, &progress); + + return progress; +} diff --git a/src/compiler/glsl/opt_dead_functions.cpp b/src/compiler/glsl/opt_dead_functions.cpp new file mode 100644 index 0000000..2e90b65 --- /dev/null +++ b/src/compiler/glsl/opt_dead_functions.cpp @@ -0,0 +1,152 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_dead_functions.cpp + * + * Eliminates unused functions from the linked program. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_expression_flattening.h" +#include "compiler/glsl_types.h" + +namespace { + +class signature_entry : public exec_node +{ +public: + signature_entry(ir_function_signature *sig) + { + this->signature = sig; + this->used = false; + } + + ir_function_signature *signature; + bool used; +}; + +class ir_dead_functions_visitor : public ir_hierarchical_visitor { +public: + ir_dead_functions_visitor() + { + this->mem_ctx = ralloc_context(NULL); + } + + ~ir_dead_functions_visitor() + { + ralloc_free(this->mem_ctx); + } + + virtual ir_visitor_status visit_enter(ir_function_signature *); + virtual ir_visitor_status visit_enter(ir_call *); + + signature_entry *get_signature_entry(ir_function_signature *var); + + /* List of signature_entry */ + exec_list signature_list; + void *mem_ctx; +}; + +} /* unnamed namespace */ + +signature_entry * +ir_dead_functions_visitor::get_signature_entry(ir_function_signature *sig) +{ + foreach_in_list(signature_entry, entry, &this->signature_list) { + if (entry->signature == sig) + return entry; + } + + signature_entry *entry = new(mem_ctx) signature_entry(sig); + this->signature_list.push_tail(entry); + return entry; +} + + +ir_visitor_status +ir_dead_functions_visitor::visit_enter(ir_function_signature *ir) +{ + signature_entry *entry = this->get_signature_entry(ir); + + if (strcmp(ir->function_name(), "main") == 0) { + entry->used = true; + } + + + + return visit_continue; +} + + +ir_visitor_status +ir_dead_functions_visitor::visit_enter(ir_call *ir) +{ + signature_entry *entry = this->get_signature_entry(ir->callee); + + entry->used = true; + + return visit_continue; +} + +bool +do_dead_functions(exec_list *instructions) +{ + ir_dead_functions_visitor v; + bool progress = false; + + visit_list_elements(&v, instructions); + + /* Now that we've figured out which function signatures are used, remove + * the unused ones, and remove function definitions that have no more + * signatures. + */ + foreach_in_list_safe(signature_entry, entry, &v.signature_list) { + if (!entry->used) { + entry->signature->remove(); + delete entry->signature; + progress = true; + } + delete(entry); + } + + /* We don't just do this above when we nuked a signature because of + * const pointers. + */ + foreach_in_list_safe(ir_instruction, ir, instructions) { + ir_function *func = ir->as_function(); + + if (func && func->signatures.is_empty()) { + /* At this point (post-linking), the symbol table is no + * longer in use, so not removing the function from the + * symbol table should be OK. + */ + func->remove(); + delete func; + progress = true; + } + } + + return progress; +} diff --git a/src/compiler/glsl/opt_flatten_nested_if_blocks.cpp b/src/compiler/glsl/opt_flatten_nested_if_blocks.cpp new file mode 100644 index 0000000..c702102 --- /dev/null +++ b/src/compiler/glsl/opt_flatten_nested_if_blocks.cpp @@ -0,0 +1,103 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_flatten_nested_if_blocks.cpp + * + * Flattens nested if blocks such as: + * + * if (x) { + * if (y) { + * ... + * } + * } + * + * into a single if block with a combined condition: + * + * if (x && y) { + * ... + * } + */ + +#include "ir.h" +#include "ir_builder.h" + +using namespace ir_builder; + +namespace { + +class nested_if_flattener : public ir_hierarchical_visitor { +public: + nested_if_flattener() + { + progress = false; + } + + ir_visitor_status visit_leave(ir_if *); + ir_visitor_status visit_enter(ir_assignment *); + + bool progress; +}; + +} /* unnamed namespace */ + +/* We only care about the top level "if" instructions, so don't + * descend into expressions. + */ +ir_visitor_status +nested_if_flattener::visit_enter(ir_assignment *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + +bool +opt_flatten_nested_if_blocks(exec_list *instructions) +{ + nested_if_flattener v; + + v.run(instructions); + return v.progress; +} + + +ir_visitor_status +nested_if_flattener::visit_leave(ir_if *ir) +{ + /* Only handle a single ir_if within the then clause of an ir_if. No extra + * instructions, no else clauses, nothing. + */ + if (ir->then_instructions.is_empty() || !ir->else_instructions.is_empty()) + return visit_continue; + + ir_if *inner = ((ir_instruction *) ir->then_instructions.head)->as_if(); + if (!inner || !inner->next->is_tail_sentinel() || + !inner->else_instructions.is_empty()) + return visit_continue; + + ir->condition = logic_and(ir->condition, inner->condition); + inner->then_instructions.move_nodes_to(&ir->then_instructions); + + progress = true; + return visit_continue; +} diff --git a/src/compiler/glsl/opt_flip_matrices.cpp b/src/compiler/glsl/opt_flip_matrices.cpp new file mode 100644 index 0000000..04c6170 --- /dev/null +++ b/src/compiler/glsl/opt_flip_matrices.cpp @@ -0,0 +1,123 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_flip_matrices.cpp + * + * Convert (matrix * vector) operations to (vector * matrixTranspose), + * which can be done using dot products rather than multiplies and adds. + * On some hardware, this is more efficient. + * + * This currently only does the conversion for built-in matrices which + * already have transposed equivalents. Namely, gl_ModelViewProjectionMatrix + * and gl_TextureMatrix. + */ +#include "ir.h" +#include "ir_optimization.h" +#include "main/macros.h" + +namespace { +class matrix_flipper : public ir_hierarchical_visitor { +public: + matrix_flipper(exec_list *instructions) + { + progress = false; + mvp_transpose = NULL; + texmat_transpose = NULL; + + foreach_in_list(ir_instruction, ir, instructions) { + ir_variable *var = ir->as_variable(); + if (!var) + continue; + if (strcmp(var->name, "gl_ModelViewProjectionMatrixTranspose") == 0) + mvp_transpose = var; + if (strcmp(var->name, "gl_TextureMatrixTranspose") == 0) + texmat_transpose = var; + } + } + + ir_visitor_status visit_enter(ir_expression *ir); + + bool progress; + +private: + ir_variable *mvp_transpose; + ir_variable *texmat_transpose; +}; +} + +ir_visitor_status +matrix_flipper::visit_enter(ir_expression *ir) +{ + if (ir->operation != ir_binop_mul || + !ir->operands[0]->type->is_matrix() || + !ir->operands[1]->type->is_vector()) + return visit_continue; + + ir_variable *mat_var = ir->operands[0]->variable_referenced(); + if (!mat_var) + return visit_continue; + + if (mvp_transpose && + strcmp(mat_var->name, "gl_ModelViewProjectionMatrix") == 0) { +#ifndef NDEBUG + ir_dereference_variable *deref = ir->operands[0]->as_dereference_variable(); + assert(deref && deref->var == mat_var); +#endif + + void *mem_ctx = ralloc_parent(ir); + + ir->operands[0] = ir->operands[1]; + ir->operands[1] = new(mem_ctx) ir_dereference_variable(mvp_transpose); + + progress = true; + } else if (texmat_transpose && + strcmp(mat_var->name, "gl_TextureMatrix") == 0) { + ir_dereference_array *array_ref = ir->operands[0]->as_dereference_array(); + assert(array_ref != NULL); + ir_dereference_variable *var_ref = array_ref->array->as_dereference_variable(); + assert(var_ref && var_ref->var == mat_var); + + ir->operands[0] = ir->operands[1]; + ir->operands[1] = array_ref; + + var_ref->var = texmat_transpose; + + texmat_transpose->data.max_array_access = + MAX2(texmat_transpose->data.max_array_access, mat_var->data.max_array_access); + + progress = true; + } + + return visit_continue; +} + +bool +opt_flip_matrices(struct exec_list *instructions) +{ + matrix_flipper v(instructions); + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_function_inlining.cpp b/src/compiler/glsl/opt_function_inlining.cpp new file mode 100644 index 0000000..19f5fae --- /dev/null +++ b/src/compiler/glsl/opt_function_inlining.cpp @@ -0,0 +1,360 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_function_inlining.cpp + * + * Replaces calls to functions with the body of the function. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_function_inlining.h" +#include "ir_expression_flattening.h" +#include "compiler/glsl_types.h" +#include "program/hash_table.h" + +static void +do_variable_replacement(exec_list *instructions, + ir_variable *orig, + ir_dereference *repl); + +namespace { + +class ir_function_inlining_visitor : public ir_hierarchical_visitor { +public: + ir_function_inlining_visitor() + { + progress = false; + } + + virtual ~ir_function_inlining_visitor() + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_call *); + virtual ir_visitor_status visit_enter(ir_return *); + virtual ir_visitor_status visit_enter(ir_texture *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + + bool progress; +}; + +} /* unnamed namespace */ + +bool +do_function_inlining(exec_list *instructions) +{ + ir_function_inlining_visitor v; + + v.run(instructions); + + return v.progress; +} + +static void +replace_return_with_assignment(ir_instruction *ir, void *data) +{ + void *ctx = ralloc_parent(ir); + ir_dereference *orig_deref = (ir_dereference *) data; + ir_return *ret = ir->as_return(); + + if (ret) { + if (ret->value) { + ir_rvalue *lhs = orig_deref->clone(ctx, NULL); + ret->replace_with(new(ctx) ir_assignment(lhs, ret->value, NULL)); + } else { + /* un-valued return has to be the last return, or we shouldn't + * have reached here. (see can_inline()). + */ + assert(ret->next->is_tail_sentinel()); + ret->remove(); + } + } +} + +void +ir_call::generate_inline(ir_instruction *next_ir) +{ + void *ctx = ralloc_parent(this); + ir_variable **parameters; + unsigned num_parameters; + int i; + struct hash_table *ht; + + ht = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); + + num_parameters = this->callee->parameters.length(); + parameters = new ir_variable *[num_parameters]; + + /* Generate the declarations for the parameters to our inlined code, + * and set up the mapping of real function body variables to ours. + */ + i = 0; + foreach_two_lists(formal_node, &this->callee->parameters, + actual_node, &this->actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + ir_rvalue *param = (ir_rvalue *) actual_node; + + /* Generate a new variable for the parameter. */ + if (sig_param->type->contains_opaque()) { + /* For opaque types, we want the inlined variable references + * referencing the passed in variable, since that will have + * the location information, which an assignment of an opaque + * variable wouldn't. Fix it up below. + */ + parameters[i] = NULL; + } else { + parameters[i] = sig_param->clone(ctx, ht); + parameters[i]->data.mode = ir_var_auto; + + /* Remove the read-only decoration because we're going to write + * directly to this variable. If the cloned variable is left + * read-only and the inlined function is inside a loop, the loop + * analysis code will get confused. + */ + parameters[i]->data.read_only = false; + next_ir->insert_before(parameters[i]); + } + + /* Move the actual param into our param variable if it's an 'in' type. */ + if (parameters[i] && (sig_param->data.mode == ir_var_function_in || + sig_param->data.mode == ir_var_const_in || + sig_param->data.mode == ir_var_function_inout)) { + ir_assignment *assign; + + assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]), + param, NULL); + next_ir->insert_before(assign); + } + + ++i; + } + + exec_list new_instructions; + + /* Generate the inlined body of the function to a new list */ + foreach_in_list(ir_instruction, ir, &callee->body) { + ir_instruction *new_ir = ir->clone(ctx, ht); + + new_instructions.push_tail(new_ir); + visit_tree(new_ir, replace_return_with_assignment, this->return_deref); + } + + /* If any opaque types were passed in, replace any deref of the + * opaque variable with a deref of the argument. + */ + foreach_two_lists(formal_node, &this->callee->parameters, + actual_node, &this->actual_parameters) { + ir_rvalue *const param = (ir_rvalue *) actual_node; + ir_variable *sig_param = (ir_variable *) formal_node; + + if (sig_param->type->contains_opaque()) { + ir_dereference *deref = param->as_dereference(); + + assert(deref); + do_variable_replacement(&new_instructions, sig_param, deref); + } + } + + /* Now push those new instructions in. */ + next_ir->insert_before(&new_instructions); + + /* Copy back the value of any 'out' parameters from the function body + * variables to our own. + */ + i = 0; + foreach_two_lists(formal_node, &this->callee->parameters, + actual_node, &this->actual_parameters) { + ir_rvalue *const param = (ir_rvalue *) actual_node; + const ir_variable *const sig_param = (ir_variable *) formal_node; + + /* Move our param variable into the actual param if it's an 'out' type. */ + if (parameters[i] && (sig_param->data.mode == ir_var_function_out || + sig_param->data.mode == ir_var_function_inout)) { + ir_assignment *assign; + + assign = new(ctx) ir_assignment(param->clone(ctx, NULL)->as_rvalue(), + new(ctx) ir_dereference_variable(parameters[i]), + NULL); + next_ir->insert_before(assign); + } + + ++i; + } + + delete [] parameters; + + hash_table_dtor(ht); +} + + +ir_visitor_status +ir_function_inlining_visitor::visit_enter(ir_expression *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + + +ir_visitor_status +ir_function_inlining_visitor::visit_enter(ir_return *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + + +ir_visitor_status +ir_function_inlining_visitor::visit_enter(ir_texture *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + + +ir_visitor_status +ir_function_inlining_visitor::visit_enter(ir_swizzle *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + + +ir_visitor_status +ir_function_inlining_visitor::visit_enter(ir_call *ir) +{ + if (can_inline(ir)) { + ir->generate_inline(ir); + ir->remove(); + this->progress = true; + } + + return visit_continue; +} + + +/** + * Replaces references to the "orig" variable with a clone of "repl." + * + * From the spec, opaque types can appear in the tree as function + * (non-out) parameters and as the result of array indexing and + * structure field selection. In our builtin implementation, they + * also appear in the sampler field of an ir_tex instruction. + */ + +class ir_variable_replacement_visitor : public ir_hierarchical_visitor { +public: + ir_variable_replacement_visitor(ir_variable *orig, ir_dereference *repl) + { + this->orig = orig; + this->repl = repl; + } + + virtual ~ir_variable_replacement_visitor() + { + } + + virtual ir_visitor_status visit_leave(ir_call *); + virtual ir_visitor_status visit_leave(ir_dereference_array *); + virtual ir_visitor_status visit_leave(ir_dereference_record *); + virtual ir_visitor_status visit_leave(ir_texture *); + + void replace_deref(ir_dereference **deref); + void replace_rvalue(ir_rvalue **rvalue); + + ir_variable *orig; + ir_dereference *repl; +}; + +void +ir_variable_replacement_visitor::replace_deref(ir_dereference **deref) +{ + ir_dereference_variable *deref_var = (*deref)->as_dereference_variable(); + if (deref_var && deref_var->var == this->orig) { + *deref = this->repl->clone(ralloc_parent(*deref), NULL); + } +} + +void +ir_variable_replacement_visitor::replace_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + + if (!deref) + return; + + replace_deref(&deref); + *rvalue = deref; +} + +ir_visitor_status +ir_variable_replacement_visitor::visit_leave(ir_texture *ir) +{ + replace_deref(&ir->sampler); + + return visit_continue; +} + +ir_visitor_status +ir_variable_replacement_visitor::visit_leave(ir_dereference_array *ir) +{ + replace_rvalue(&ir->array); + return visit_continue; +} + +ir_visitor_status +ir_variable_replacement_visitor::visit_leave(ir_dereference_record *ir) +{ + replace_rvalue(&ir->record); + return visit_continue; +} + +ir_visitor_status +ir_variable_replacement_visitor::visit_leave(ir_call *ir) +{ + foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { + ir_rvalue *new_param = param; + replace_rvalue(&new_param); + + if (new_param != param) { + param->replace_with(new_param); + } + } + return visit_continue; +} + +static void +do_variable_replacement(exec_list *instructions, + ir_variable *orig, + ir_dereference *repl) +{ + ir_variable_replacement_visitor v(orig, repl); + + visit_list_elements(&v, instructions); +} diff --git a/src/compiler/glsl/opt_if_simplification.cpp b/src/compiler/glsl/opt_if_simplification.cpp new file mode 100644 index 0000000..e05f031 --- /dev/null +++ b/src/compiler/glsl/opt_if_simplification.cpp @@ -0,0 +1,126 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_if_simplification.cpp + * + * Moves constant branches of if statements out to the surrounding + * instruction stream, and inverts if conditionals to avoid empty + * "then" blocks. + */ + +#include "ir.h" + +namespace { + +class ir_if_simplification_visitor : public ir_hierarchical_visitor { +public: + ir_if_simplification_visitor() + { + this->made_progress = false; + } + + ir_visitor_status visit_leave(ir_if *); + ir_visitor_status visit_enter(ir_assignment *); + + bool made_progress; +}; + +} /* unnamed namespace */ + +/* We only care about the top level "if" instructions, so don't + * descend into expressions. + */ +ir_visitor_status +ir_if_simplification_visitor::visit_enter(ir_assignment *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + +bool +do_if_simplification(exec_list *instructions) +{ + ir_if_simplification_visitor v; + + v.run(instructions); + return v.made_progress; +} + + +ir_visitor_status +ir_if_simplification_visitor::visit_leave(ir_if *ir) +{ + /* If the if statement has nothing on either side, remove it. */ + if (ir->then_instructions.is_empty() && + ir->else_instructions.is_empty()) { + ir->remove(); + this->made_progress = true; + return visit_continue; + } + + /* FINISHME: Ideally there would be a way to note that the condition results + * FINISHME: in a constant before processing both of the other subtrees. + * FINISHME: This can probably be done with some flags, but it would take + * FINISHME: some work to get right. + */ + ir_constant *condition_constant = ir->condition->constant_expression_value(); + if (condition_constant) { + /* Move the contents of the one branch of the conditional + * that matters out. + */ + if (condition_constant->value.b[0]) { + ir->insert_before(&ir->then_instructions); + } else { + ir->insert_before(&ir->else_instructions); + } + ir->remove(); + this->made_progress = true; + return visit_continue; + } + + /* Turn: + * + * if (cond) { + * } else { + * do_work(); + * } + * + * into : + * + * if (!cond) + * do_work(); + * + * which avoids control flow for "else" (which is usually more + * expensive than normal operations), and the "not" can usually be + * folded into the generation of "cond" anyway. + */ + if (ir->then_instructions.is_empty()) { + ir->condition = new(ralloc_parent(ir->condition)) + ir_expression(ir_unop_logic_not, ir->condition); + ir->else_instructions.move_nodes_to(&ir->then_instructions); + this->made_progress = true; + } + + return visit_continue; +} diff --git a/src/compiler/glsl/opt_minmax.cpp b/src/compiler/glsl/opt_minmax.cpp new file mode 100644 index 0000000..29482ee --- /dev/null +++ b/src/compiler/glsl/opt_minmax.cpp @@ -0,0 +1,488 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_minmax.cpp + * + * Drop operands from an expression tree of only min/max operations if they + * can be proven to not contribute to the final result. + * + * The algorithm is similar to alpha-beta pruning on a minmax search. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "ir_builder.h" +#include "program/prog_instruction.h" +#include "compiler/glsl_types.h" +#include "main/macros.h" + +using namespace ir_builder; + +namespace { + +enum compare_components_result { + LESS, + LESS_OR_EQUAL, + EQUAL, + GREATER_OR_EQUAL, + GREATER, + MIXED +}; + +class minmax_range { +public: + minmax_range(ir_constant *low = NULL, ir_constant *high = NULL) + { + this->low = low; + this->high = high; + } + + /* low is the lower limit of the range, high is the higher limit. NULL on + * low means negative infinity (unlimited) and on high positive infinity + * (unlimited). Because of the two interpretations of the value NULL, + * arbitrary comparison between ir_constants is impossible. + */ + ir_constant *low; + ir_constant *high; +}; + +class ir_minmax_visitor : public ir_rvalue_enter_visitor { +public: + ir_minmax_visitor() + : progress(false) + { + } + + ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange); + + void handle_rvalue(ir_rvalue **rvalue); + + bool progress; +}; + +/* + * Returns LESS if all vector components of `a' are strictly lower than of `b', + * GREATER if all vector components of `a' are strictly greater than of `b', + * MIXED if some vector components of `a' are strictly lower than of `b' while + * others are strictly greater, or EQUAL otherwise. + */ +static enum compare_components_result +compare_components(ir_constant *a, ir_constant *b) +{ + assert(a != NULL); + assert(b != NULL); + + assert(a->type->base_type == b->type->base_type); + + unsigned a_inc = a->type->is_scalar() ? 0 : 1; + unsigned b_inc = b->type->is_scalar() ? 0 : 1; + unsigned components = MAX2(a->type->components(), b->type->components()); + + bool foundless = false; + bool foundgreater = false; + bool foundequal = false; + + for (unsigned i = 0, c0 = 0, c1 = 0; + i < components; + c0 += a_inc, c1 += b_inc, ++i) { + switch (a->type->base_type) { + case GLSL_TYPE_UINT: + if (a->value.u[c0] < b->value.u[c1]) + foundless = true; + else if (a->value.u[c0] > b->value.u[c1]) + foundgreater = true; + else + foundequal = true; + break; + case GLSL_TYPE_INT: + if (a->value.i[c0] < b->value.i[c1]) + foundless = true; + else if (a->value.i[c0] > b->value.i[c1]) + foundgreater = true; + else + foundequal = true; + break; + case GLSL_TYPE_FLOAT: + if (a->value.f[c0] < b->value.f[c1]) + foundless = true; + else if (a->value.f[c0] > b->value.f[c1]) + foundgreater = true; + else + foundequal = true; + break; + case GLSL_TYPE_DOUBLE: + if (a->value.d[c0] < b->value.d[c1]) + foundless = true; + else if (a->value.d[c0] > b->value.d[c1]) + foundgreater = true; + else + foundequal = true; + break; + default: + unreachable("not reached"); + } + } + + if (foundless && foundgreater) { + /* Some components are strictly lower, others are strictly greater */ + return MIXED; + } + + if (foundequal) { + /* It is not mixed, but it is not strictly lower or greater */ + if (foundless) + return LESS_OR_EQUAL; + if (foundgreater) + return GREATER_OR_EQUAL; + return EQUAL; + } + + /* All components are strictly lower or strictly greater */ + return foundless ? LESS : GREATER; +} + +static ir_constant * +combine_constant(bool ismin, ir_constant *a, ir_constant *b) +{ + void *mem_ctx = ralloc_parent(a); + ir_constant *c = a->clone(mem_ctx, NULL); + for (unsigned i = 0; i < c->type->components(); i++) { + switch (c->type->base_type) { + case GLSL_TYPE_UINT: + if ((ismin && b->value.u[i] < c->value.u[i]) || + (!ismin && b->value.u[i] > c->value.u[i])) + c->value.u[i] = b->value.u[i]; + break; + case GLSL_TYPE_INT: + if ((ismin && b->value.i[i] < c->value.i[i]) || + (!ismin && b->value.i[i] > c->value.i[i])) + c->value.i[i] = b->value.i[i]; + break; + case GLSL_TYPE_FLOAT: + if ((ismin && b->value.f[i] < c->value.f[i]) || + (!ismin && b->value.f[i] > c->value.f[i])) + c->value.f[i] = b->value.f[i]; + break; + case GLSL_TYPE_DOUBLE: + if ((ismin && b->value.d[i] < c->value.d[i]) || + (!ismin && b->value.d[i] > c->value.d[i])) + c->value.d[i] = b->value.d[i]; + break; + default: + assert(!"not reached"); + } + } + return c; +} + +static ir_constant * +smaller_constant(ir_constant *a, ir_constant *b) +{ + assert(a != NULL); + assert(b != NULL); + + enum compare_components_result ret = compare_components(a, b); + if (ret == MIXED) + return combine_constant(true, a, b); + else if (ret < EQUAL) + return a; + else + return b; +} + +static ir_constant * +larger_constant(ir_constant *a, ir_constant *b) +{ + assert(a != NULL); + assert(b != NULL); + + enum compare_components_result ret = compare_components(a, b); + if (ret == MIXED) + return combine_constant(false, a, b); + else if (ret < EQUAL) + return b; + else + return a; +} + +/* Combines two ranges by doing an element-wise min() / max() depending on the + * operation. + */ +static minmax_range +combine_range(minmax_range r0, minmax_range r1, bool ismin) +{ + minmax_range ret; + + if (!r0.low) { + ret.low = ismin ? r0.low : r1.low; + } else if (!r1.low) { + ret.low = ismin ? r1.low : r0.low; + } else { + ret.low = ismin ? smaller_constant(r0.low, r1.low) : + larger_constant(r0.low, r1.low); + } + + if (!r0.high) { + ret.high = ismin ? r1.high : r0.high; + } else if (!r1.high) { + ret.high = ismin ? r0.high : r1.high; + } else { + ret.high = ismin ? smaller_constant(r0.high, r1.high) : + larger_constant(r0.high, r1.high); + } + + return ret; +} + +/* Returns a range so that lower limit is the larger of the two lower limits, + * and higher limit is the smaller of the two higher limits. + */ +static minmax_range +range_intersection(minmax_range r0, minmax_range r1) +{ + minmax_range ret; + + if (!r0.low) + ret.low = r1.low; + else if (!r1.low) + ret.low = r0.low; + else + ret.low = larger_constant(r0.low, r1.low); + + if (!r0.high) + ret.high = r1.high; + else if (!r1.high) + ret.high = r0.high; + else + ret.high = smaller_constant(r0.high, r1.high); + + return ret; +} + +static minmax_range +get_range(ir_rvalue *rval) +{ + ir_expression *expr = rval->as_expression(); + if (expr && (expr->operation == ir_binop_min || + expr->operation == ir_binop_max)) { + minmax_range r0 = get_range(expr->operands[0]); + minmax_range r1 = get_range(expr->operands[1]); + return combine_range(r0, r1, expr->operation == ir_binop_min); + } + + ir_constant *c = rval->as_constant(); + if (c) { + return minmax_range(c, c); + } + + return minmax_range(); +} + +/** + * Prunes a min/max expression considering the base range of the parent + * min/max expression. + * + * @param baserange the range that the parents of this min/max expression + * in the min/max tree will clamp its value to. + */ +ir_rvalue * +ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange) +{ + assert(expr->operation == ir_binop_min || + expr->operation == ir_binop_max); + + bool ismin = expr->operation == ir_binop_min; + minmax_range limits[2]; + + /* Recurse to get the ranges for each of the subtrees of this + * expression. We need to do this as a separate step because we need to + * know the ranges of each of the subtrees before we prune either one. + * Consider something like this: + * + * max + * / \ + * max max + * / \ / \ + * 3 a b 2 + * + * We would like to prune away the max on the bottom-right, but to do so + * we need to know the range of the expression on the left beforehand, + * and there's no guarantee that we will visit either subtree in a + * particular order. + */ + for (unsigned i = 0; i < 2; ++i) + limits[i] = get_range(expr->operands[i]); + + for (unsigned i = 0; i < 2; ++i) { + bool is_redundant = false; + + enum compare_components_result cr = LESS; + if (ismin) { + /* If this operand will always be greater than the other one, it's + * redundant. + */ + if (limits[i].low && limits[1 - i].high) { + cr = compare_components(limits[i].low, limits[1 - i].high); + if (cr >= EQUAL && cr != MIXED) + is_redundant = true; + } + /* If this operand is always greater than baserange, then even if + * it's smaller than the other one it'll get clamped, so it's + * redundant. + */ + if (!is_redundant && limits[i].low && baserange.high) { + cr = compare_components(limits[i].low, baserange.high); + if (cr >= EQUAL && cr != MIXED) + is_redundant = true; + } + } else { + /* If this operand will always be lower than the other one, it's + * redundant. + */ + if (limits[i].high && limits[1 - i].low) { + cr = compare_components(limits[i].high, limits[1 - i].low); + if (cr <= EQUAL) + is_redundant = true; + } + /* If this operand is always lower than baserange, then even if + * it's greater than the other one it'll get clamped, so it's + * redundant. + */ + if (!is_redundant && limits[i].high && baserange.low) { + cr = compare_components(limits[i].high, baserange.low); + if (cr <= EQUAL) + is_redundant = true; + } + } + + if (is_redundant) { + progress = true; + + /* Recurse if necessary. */ + ir_expression *op_expr = expr->operands[1 - i]->as_expression(); + if (op_expr && (op_expr->operation == ir_binop_min || + op_expr->operation == ir_binop_max)) { + return prune_expression(op_expr, baserange); + } + + return expr->operands[1 - i]; + } else if (cr == MIXED) { + /* If we have mixed vector operands, we can try to resolve the minmax + * expression by doing a component-wise minmax: + * + * min min + * / \ / \ + * min a ===> [1,1] a + * / \ + * [1,3] [3,1] + * + */ + ir_constant *a = expr->operands[0]->as_constant(); + ir_constant *b = expr->operands[1]->as_constant(); + if (a && b) + return combine_constant(ismin, a, b); + } + } + + /* Now recurse to operands giving them the proper baserange. The baserange + * to pass is the intersection of our baserange and the other operand's + * limit with one of the ranges unlimited. If we can't compute a valid + * intersection, we use the current baserange. + */ + for (unsigned i = 0; i < 2; ++i) { + ir_expression *op_expr = expr->operands[i]->as_expression(); + if (op_expr && (op_expr->operation == ir_binop_min || + op_expr->operation == ir_binop_max)) { + /* We can only compute a new baserange for this operand if we managed + * to compute a valid range for the other operand. + */ + if (ismin) + limits[1 - i].low = NULL; + else + limits[1 - i].high = NULL; + minmax_range base = range_intersection(limits[1 - i], baserange); + expr->operands[i] = prune_expression(op_expr, base); + } + } + + /* If we got here we could not discard any of the operands of the minmax + * expression, but we can still try to resolve the expression if both + * operands are constant. We do this after the loop above, to make sure + * that if our operands are minmax expressions we have tried to prune them + * first (hopefully reducing them to constants). + */ + ir_constant *a = expr->operands[0]->as_constant(); + ir_constant *b = expr->operands[1]->as_constant(); + if (a && b) + return combine_constant(ismin, a, b); + + return expr; +} + +static ir_rvalue * +swizzle_if_required(ir_expression *expr, ir_rvalue *rval) +{ + if (expr->type->is_vector() && rval->type->is_scalar()) { + return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements); + } else { + return rval; + } +} + +void +ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr || (expr->operation != ir_binop_min && + expr->operation != ir_binop_max)) + return; + + ir_rvalue *new_rvalue = prune_expression(expr, minmax_range()); + if (new_rvalue == *rvalue) + return; + + /* If the expression type is a vector and the optimization leaves a scalar + * as the result, we need to turn it into a vector. + */ + *rvalue = swizzle_if_required(expr, new_rvalue); + + progress = true; +} + +} + +bool +do_minmax_prune(exec_list *instructions) +{ + ir_minmax_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_noop_swizzle.cpp b/src/compiler/glsl/opt_noop_swizzle.cpp new file mode 100644 index 0000000..41890ab --- /dev/null +++ b/src/compiler/glsl/opt_noop_swizzle.cpp @@ -0,0 +1,83 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_noop_swizzle.cpp + * + * If a swizzle doesn't change the order or count of components, then + * remove the swizzle so that other optimization passes see the value + * behind it. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "compiler/glsl_types.h" + +namespace { + +class ir_noop_swizzle_visitor : public ir_rvalue_visitor { +public: + ir_noop_swizzle_visitor() + { + this->progress = false; + } + + void handle_rvalue(ir_rvalue **rvalue); + bool progress; +}; + +} /* unnamed namespace */ + +void +ir_noop_swizzle_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_swizzle *swiz = (*rvalue)->as_swizzle(); + if (!swiz || swiz->type != swiz->val->type) + return; + + int elems = swiz->val->type->vector_elements; + if (swiz->mask.x != 0) + return; + if (elems >= 2 && swiz->mask.y != 1) + return; + if (elems >= 3 && swiz->mask.z != 2) + return; + if (elems >= 4 && swiz->mask.w != 3) + return; + + this->progress = true; + *rvalue = swiz->val; +} + +bool +do_noop_swizzle(exec_list *instructions) +{ + ir_noop_swizzle_visitor v; + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_rebalance_tree.cpp b/src/compiler/glsl/opt_rebalance_tree.cpp new file mode 100644 index 0000000..095f2d7 --- /dev/null +++ b/src/compiler/glsl/opt_rebalance_tree.cpp @@ -0,0 +1,321 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_rebalance_tree.cpp + * + * Rebalances a reduction expression tree. + * + * For reduction operations (e.g., x + y + z + w) we generate an expression + * tree like + * + * + + * / \ + * + w + * / \ + * + z + * / \ + * x y + * + * which we can rebalance into + * + * + + * / \ + * / \ + * + + + * / \ / \ + * x y z w + * + * to get a better instruction scheduling. + * + * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout + * and Bette L. Warren. + * + * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable + * explanation of the of the tree_to_vine() (rightward rotation) and + * vine_to_tree() (leftward rotation) algorithms. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "main/macros.h" /* for MAX2 */ + +/* The DSW algorithm generates a degenerate tree (really, a linked list) in + * tree_to_vine(). We'd rather not leave a binary expression with only one + * operand, so trivial modifications (the ternary operators below) are needed + * to ensure that we only rotate around the ir_expression nodes of the tree. + */ +static unsigned +tree_to_vine(ir_expression *root) +{ + unsigned size = 0; + ir_rvalue *vine_tail = root; + ir_rvalue *remainder = root->operands[1]; + + while (remainder != NULL) { + ir_expression *remainder_temp = remainder->as_expression(); + ir_expression *remainder_left = remainder_temp ? + remainder_temp->operands[0]->as_expression() : NULL; + + if (remainder_left == NULL) { + /* move vine_tail down one */ + vine_tail = remainder; + remainder = remainder->as_expression() ? + ((ir_expression *)remainder)->operands[1] : NULL; + size++; + } else { + /* rotate */ + ir_expression *tempptr = remainder_left; + ((ir_expression *)remainder)->operands[0] = tempptr->operands[1]; + tempptr->operands[1] = remainder; + remainder = tempptr; + ((ir_expression *)vine_tail)->operands[1] = tempptr; + } + } + + return size; +} + +static void +compression(ir_expression *root, unsigned count) +{ + ir_expression *scanner = root; + + for (unsigned i = 0; i < count; i++) { + ir_expression *child = (ir_expression *)scanner->operands[1]; + scanner->operands[1] = child->operands[1]; + scanner = (ir_expression *)scanner->operands[1]; + child->operands[1] = scanner->operands[0]; + scanner->operands[0] = child; + } +} + +static void +vine_to_tree(ir_expression *root, unsigned size) +{ + int n = size - 1; + for (int m = n / 2; m > 0; m = n / 2) { + compression(root, m); + n -= m + 1; + } +} + +namespace { + +class ir_rebalance_visitor : public ir_rvalue_enter_visitor { +public: + ir_rebalance_visitor() + { + progress = false; + } + + void handle_rvalue(ir_rvalue **rvalue); + + bool progress; +}; + +struct is_reduction_data { + ir_expression_operation operation; + const glsl_type *type; + unsigned num_expr; + bool is_reduction; + bool contains_constant; +}; + +} /* anonymous namespace */ + +static bool +is_reduction_operation(ir_expression_operation operation) +{ + switch (operation) { + case ir_binop_add: + case ir_binop_mul: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + case ir_binop_min: + case ir_binop_max: + return true; + default: + return false; + } +} + +/* Note that this function does not attempt to recognize that reduction trees + * are already balanced. + * + * We return false from this function for a number of reasons other than an + * expression tree not being a mathematical reduction. Namely, + * + * - if the tree contains multiple constants that we may be able to combine. + * - if the tree contains matrices: + * - they might contain vec4's with many constant components that we can + * simplify after splitting. + * - applying the matrix chain ordering optimization is more than just + * balancing an expression tree. + * - if the tree contains operations on multiple types. + * - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c + * would trick the visiting pass. + */ +static void +is_reduction(ir_instruction *ir, void *data) +{ + struct is_reduction_data *ird = (struct is_reduction_data *)data; + if (!ird->is_reduction) + return; + + /* We don't want to balance a tree that contains multiple constants, since + * we'll be able to constant fold them if they're not in separate subtrees. + */ + if (ir->as_constant()) { + if (ird->contains_constant) { + ird->is_reduction = false; + } + ird->contains_constant = true; + return; + } + + /* Array/record dereferences have subtrees that are not part of the expr + * tree we're balancing. Skip trees containing them. + */ + if (ir->ir_type == ir_type_dereference_array || + ir->ir_type == ir_type_dereference_record) { + ird->is_reduction = false; + return; + } + + ir_expression *expr = ir->as_expression(); + if (!expr) + return; + + /* Non-constant matrices might still contain constant vec4 that we can + * constant fold once split up. Handling matrices will need some more + * work. + */ + if (expr->type->is_matrix() || + expr->operands[0]->type->is_matrix() || + (expr->operands[1] && expr->operands[1]->type->is_matrix())) { + ird->is_reduction = false; + return; + } + + if (ird->type != NULL && ird->type != expr->type) { + ird->is_reduction = false; + return; + } + ird->type = expr->type; + + ird->num_expr++; + if (is_reduction_operation(expr->operation)) { + if (ird->operation != 0 && ird->operation != expr->operation) + ird->is_reduction = false; + ird->operation = expr->operation; + } else { + ird->is_reduction = false; + } +} + +static ir_rvalue * +handle_expression(ir_expression *expr) +{ + struct is_reduction_data ird; + ird.operation = (ir_expression_operation)0; + ird.type = NULL; + ird.num_expr = 0; + ird.is_reduction = true; + ird.contains_constant = false; + + visit_tree(expr, is_reduction, (void *)&ird); + + if (ird.is_reduction && ird.num_expr > 2) { + ir_constant z = ir_constant(0.0f); + ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr); + + unsigned size = tree_to_vine(&pseudo_root); + vine_to_tree(&pseudo_root, size); + + expr = (ir_expression *)pseudo_root.operands[1]; + } + return expr; +} + +static void +update_types(ir_instruction *ir, void *) +{ + ir_expression *expr = ir->as_expression(); + if (!expr) + return; + + const glsl_type *const new_type = + glsl_type::get_instance(expr->type->base_type, + MAX2(expr->operands[0]->type->vector_elements, + expr->operands[1]->type->vector_elements), + 1); + assert(new_type != glsl_type::error_type); + expr->type = new_type; +} + +void +ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr || !is_reduction_operation(expr->operation)) + return; + + ir_rvalue *new_rvalue = handle_expression(expr); + + /* If we failed to rebalance the tree (e.g., because it wasn't a reduction, + * or some other set of cases) new_rvalue will point to the same root as + * before. + * + * Similarly, if the tree rooted at *rvalue was a reduction and was already + * balanced, the algorithm will rearrange the tree but will ultimately + * return an identical tree, so this check will handle that as well and + * will not set progress = true. + */ + if (new_rvalue == *rvalue) + return; + + visit_tree(new_rvalue, NULL, NULL, update_types); + + *rvalue = new_rvalue; + this->progress = true; +} + +bool +do_rebalance_tree(exec_list *instructions) +{ + ir_rebalance_visitor v; + + v.run(instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_redundant_jumps.cpp b/src/compiler/glsl/opt_redundant_jumps.cpp new file mode 100644 index 0000000..ee384d0 --- /dev/null +++ b/src/compiler/glsl/opt_redundant_jumps.cpp @@ -0,0 +1,124 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_redundant_jumps.cpp + * Remove certain types of redundant jumps + */ + +#include "ir.h" + +namespace { + +class redundant_jumps_visitor : public ir_hierarchical_visitor { +public: + redundant_jumps_visitor() + { + this->progress = false; + } + + virtual ir_visitor_status visit_leave(ir_if *); + virtual ir_visitor_status visit_leave(ir_loop *); + virtual ir_visitor_status visit_enter(ir_assignment *); + + bool progress; +}; + +} /* unnamed namespace */ + +/* We only care about the top level instructions, so don't descend + * into expressions. + */ +ir_visitor_status +redundant_jumps_visitor::visit_enter(ir_assignment *) +{ + return visit_continue_with_parent; +} + +ir_visitor_status +redundant_jumps_visitor::visit_leave(ir_if *ir) +{ + /* If the last instruction in both branches is a 'break' or a 'continue', + * pull it out of the branches and insert it after the if-statment. Note + * that both must be the same type (either 'break' or 'continue'). + */ + ir_instruction *const last_then = + (ir_instruction *) ir->then_instructions.get_tail(); + ir_instruction *const last_else = + (ir_instruction *) ir->else_instructions.get_tail(); + + if ((last_then == NULL) || (last_else == NULL)) + return visit_continue; + + if ((last_then->ir_type != ir_type_loop_jump) + || (last_else->ir_type != ir_type_loop_jump)) + return visit_continue; + + ir_loop_jump *const then_jump = (ir_loop_jump *) last_then; + ir_loop_jump *const else_jump = (ir_loop_jump *) last_else; + + if (then_jump->mode != else_jump->mode) + return visit_continue; + + then_jump->remove(); + else_jump->remove(); + this->progress = true; + + ir->insert_after(then_jump); + + /* If both branchs of the if-statement are now empty, remove the + * if-statement. + */ + if (ir->then_instructions.is_empty() && ir->else_instructions.is_empty()) + ir->remove(); + + return visit_continue; +} + + +ir_visitor_status +redundant_jumps_visitor::visit_leave(ir_loop *ir) +{ + /* If the last instruction of a loop body is a 'continue', remove it. + */ + ir_instruction *const last = + (ir_instruction *) ir->body_instructions.get_tail(); + + if (last && (last->ir_type == ir_type_loop_jump) + && (((ir_loop_jump *) last)->mode == ir_loop_jump::jump_continue)) { + last->remove(); + this->progress = true; + } + + return visit_continue; +} + + +bool +optimize_redundant_jumps(exec_list *instructions) +{ + redundant_jumps_visitor v; + + v.run(instructions); + return v.progress; +} diff --git a/src/compiler/glsl/opt_structure_splitting.cpp b/src/compiler/glsl/opt_structure_splitting.cpp new file mode 100644 index 0000000..0d18a2f --- /dev/null +++ b/src/compiler/glsl/opt_structure_splitting.cpp @@ -0,0 +1,367 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_structure_splitting.cpp + * + * If a structure is only ever referenced by its components, then + * split those components out to individual variables so they can be + * handled normally by other optimization passes. + * + * This skips structures like uniforms, which need to be accessible as + * structures for their access by the GL. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "compiler/glsl_types.h" + +namespace { + +static bool debug = false; + +class variable_entry : public exec_node +{ +public: + variable_entry(ir_variable *var) + { + this->var = var; + this->whole_structure_access = 0; + this->declaration = false; + this->components = NULL; + this->mem_ctx = NULL; + } + + ir_variable *var; /* The key: the variable's pointer. */ + + /** Number of times the variable is referenced, including assignments. */ + unsigned whole_structure_access; + + /* If the variable had a decl we can work with in the instruction + * stream. We can't do splitting on function arguments, which + * don't get this variable set. + */ + bool declaration; + + ir_variable **components; + + /** ralloc_parent(this->var) -- the shader's ralloc context. */ + void *mem_ctx; +}; + + +class ir_structure_reference_visitor : public ir_hierarchical_visitor { +public: + ir_structure_reference_visitor(void) + { + this->mem_ctx = ralloc_context(NULL); + this->variable_list.make_empty(); + } + + ~ir_structure_reference_visitor(void) + { + ralloc_free(mem_ctx); + } + + virtual ir_visitor_status visit(ir_variable *); + virtual ir_visitor_status visit(ir_dereference_variable *); + virtual ir_visitor_status visit_enter(ir_dereference_record *); + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_function_signature *); + + variable_entry *get_variable_entry(ir_variable *var); + + /* List of variable_entry */ + exec_list variable_list; + + void *mem_ctx; +}; + +variable_entry * +ir_structure_reference_visitor::get_variable_entry(ir_variable *var) +{ + assert(var); + + if (!var->type->is_record() || + var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage || + var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) + return NULL; + + foreach_in_list(variable_entry, entry, &this->variable_list) { + if (entry->var == var) + return entry; + } + + variable_entry *entry = new(mem_ctx) variable_entry(var); + this->variable_list.push_tail(entry); + return entry; +} + + +ir_visitor_status +ir_structure_reference_visitor::visit(ir_variable *ir) +{ + variable_entry *entry = this->get_variable_entry(ir); + + if (entry) + entry->declaration = true; + + return visit_continue; +} + +ir_visitor_status +ir_structure_reference_visitor::visit(ir_dereference_variable *ir) +{ + ir_variable *const var = ir->variable_referenced(); + variable_entry *entry = this->get_variable_entry(var); + + if (entry) + entry->whole_structure_access++; + + return visit_continue; +} + +ir_visitor_status +ir_structure_reference_visitor::visit_enter(ir_dereference_record *ir) +{ + (void) ir; + /* Don't descend into the ir_dereference_variable below. */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_structure_reference_visitor::visit_enter(ir_assignment *ir) +{ + /* If there are no structure references yet, no need to bother with + * processing the expression tree. + */ + if (this->variable_list.is_empty()) + return visit_continue_with_parent; + + if (ir->lhs->as_dereference_variable() && + ir->rhs->as_dereference_variable() && + !ir->condition) { + /* We'll split copies of a structure to copies of components, so don't + * descend to the ir_dereference_variables. + */ + return visit_continue_with_parent; + } + return visit_continue; +} + +ir_visitor_status +ir_structure_reference_visitor::visit_enter(ir_function_signature *ir) +{ + /* We don't have logic for structure-splitting function arguments, + * so just look at the body instructions and not the parameter + * declarations. + */ + visit_list_elements(this, &ir->body); + return visit_continue_with_parent; +} + +class ir_structure_splitting_visitor : public ir_rvalue_visitor { +public: + ir_structure_splitting_visitor(exec_list *vars) + { + this->variable_list = vars; + } + + virtual ~ir_structure_splitting_visitor() + { + } + + virtual ir_visitor_status visit_leave(ir_assignment *); + + void split_deref(ir_dereference **deref); + void handle_rvalue(ir_rvalue **rvalue); + variable_entry *get_splitting_entry(ir_variable *var); + + exec_list *variable_list; +}; + +variable_entry * +ir_structure_splitting_visitor::get_splitting_entry(ir_variable *var) +{ + assert(var); + + if (!var->type->is_record()) + return NULL; + + foreach_in_list(variable_entry, entry, this->variable_list) { + if (entry->var == var) { + return entry; + } + } + + return NULL; +} + +void +ir_structure_splitting_visitor::split_deref(ir_dereference **deref) +{ + if ((*deref)->ir_type != ir_type_dereference_record) + return; + + ir_dereference_record *deref_record = (ir_dereference_record *)*deref; + ir_dereference_variable *deref_var = deref_record->record->as_dereference_variable(); + if (!deref_var) + return; + + variable_entry *entry = get_splitting_entry(deref_var->var); + if (!entry) + return; + + unsigned int i; + for (i = 0; i < entry->var->type->length; i++) { + if (strcmp(deref_record->field, + entry->var->type->fields.structure[i].name) == 0) + break; + } + assert(i != entry->var->type->length); + + *deref = new(entry->mem_ctx) ir_dereference_variable(entry->components[i]); +} + +void +ir_structure_splitting_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + + if (!deref) + return; + + split_deref(&deref); + *rvalue = deref; +} + +ir_visitor_status +ir_structure_splitting_visitor::visit_leave(ir_assignment *ir) +{ + ir_dereference_variable *lhs_deref = ir->lhs->as_dereference_variable(); + ir_dereference_variable *rhs_deref = ir->rhs->as_dereference_variable(); + variable_entry *lhs_entry = lhs_deref ? get_splitting_entry(lhs_deref->var) : NULL; + variable_entry *rhs_entry = rhs_deref ? get_splitting_entry(rhs_deref->var) : NULL; + const glsl_type *type = ir->rhs->type; + + if ((lhs_entry || rhs_entry) && !ir->condition) { + for (unsigned int i = 0; i < type->length; i++) { + ir_dereference *new_lhs, *new_rhs; + void *mem_ctx = lhs_entry ? lhs_entry->mem_ctx : rhs_entry->mem_ctx; + + if (lhs_entry) { + new_lhs = new(mem_ctx) ir_dereference_variable(lhs_entry->components[i]); + } else { + new_lhs = new(mem_ctx) + ir_dereference_record(ir->lhs->clone(mem_ctx, NULL), + type->fields.structure[i].name); + } + + if (rhs_entry) { + new_rhs = new(mem_ctx) ir_dereference_variable(rhs_entry->components[i]); + } else { + new_rhs = new(mem_ctx) + ir_dereference_record(ir->rhs->clone(mem_ctx, NULL), + type->fields.structure[i].name); + } + + ir->insert_before(new(mem_ctx) ir_assignment(new_lhs, + new_rhs, + NULL)); + } + ir->remove(); + } else { + handle_rvalue(&ir->rhs); + split_deref(&ir->lhs); + } + + handle_rvalue(&ir->condition); + + return visit_continue; +} + +} /* unnamed namespace */ + +bool +do_structure_splitting(exec_list *instructions) +{ + ir_structure_reference_visitor refs; + + visit_list_elements(&refs, instructions); + + /* Trim out variables we can't split. */ + foreach_in_list_safe(variable_entry, entry, &refs.variable_list) { + if (debug) { + printf("structure %s@%p: decl %d, whole_access %d\n", + entry->var->name, (void *) entry->var, entry->declaration, + entry->whole_structure_access); + } + + if (!entry->declaration || entry->whole_structure_access) { + entry->remove(); + } + } + + if (refs.variable_list.is_empty()) + return false; + + void *mem_ctx = ralloc_context(NULL); + + /* Replace the decls of the structures to be split with their split + * components. + */ + foreach_in_list_safe(variable_entry, entry, &refs.variable_list) { + const struct glsl_type *type = entry->var->type; + + entry->mem_ctx = ralloc_parent(entry->var); + + entry->components = ralloc_array(mem_ctx, + ir_variable *, + type->length); + + for (unsigned int i = 0; i < entry->var->type->length; i++) { + const char *name = ralloc_asprintf(mem_ctx, "%s_%s", + entry->var->name, + type->fields.structure[i].name); + + entry->components[i] = + new(entry->mem_ctx) ir_variable(type->fields.structure[i].type, + name, + ir_var_temporary); + entry->var->insert_before(entry->components[i]); + } + + entry->var->remove(); + } + + ir_structure_splitting_visitor split(&refs.variable_list); + visit_list_elements(&split, instructions); + + ralloc_free(mem_ctx); + + return true; +} diff --git a/src/compiler/glsl/opt_swizzle_swizzle.cpp b/src/compiler/glsl/opt_swizzle_swizzle.cpp new file mode 100644 index 0000000..7285474 --- /dev/null +++ b/src/compiler/glsl/opt_swizzle_swizzle.cpp @@ -0,0 +1,97 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_swizzle_swizzle.cpp + * + * Eliminates the second swizzle in a swizzle chain. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +namespace { + +class ir_swizzle_swizzle_visitor : public ir_hierarchical_visitor { +public: + ir_swizzle_swizzle_visitor() + { + progress = false; + } + + virtual ir_visitor_status visit_enter(ir_swizzle *); + + bool progress; +}; + +} /* unnamed namespace */ + +ir_visitor_status +ir_swizzle_swizzle_visitor::visit_enter(ir_swizzle *ir) +{ + int mask2[4]; + + ir_swizzle *swiz2 = ir->val->as_swizzle(); + if (!swiz2) + return visit_continue; + + memset(&mask2, 0, sizeof(mask2)); + if (swiz2->mask.num_components >= 1) + mask2[0] = swiz2->mask.x; + if (swiz2->mask.num_components >= 2) + mask2[1] = swiz2->mask.y; + if (swiz2->mask.num_components >= 3) + mask2[2] = swiz2->mask.z; + if (swiz2->mask.num_components >= 4) + mask2[3] = swiz2->mask.w; + + if (ir->mask.num_components >= 1) + ir->mask.x = mask2[ir->mask.x]; + if (ir->mask.num_components >= 2) + ir->mask.y = mask2[ir->mask.y]; + if (ir->mask.num_components >= 3) + ir->mask.z = mask2[ir->mask.z]; + if (ir->mask.num_components >= 4) + ir->mask.w = mask2[ir->mask.w]; + + ir->val = swiz2->val; + + this->progress = true; + + return visit_continue; +} + +/** + * Does a copy propagation pass on the code present in the instruction stream. + */ +bool +do_swizzle_swizzle(exec_list *instructions) +{ + ir_swizzle_swizzle_visitor v; + + v.run(instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_tree_grafting.cpp b/src/compiler/glsl/opt_tree_grafting.cpp new file mode 100644 index 0000000..83effb7 --- /dev/null +++ b/src/compiler/glsl/opt_tree_grafting.cpp @@ -0,0 +1,404 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_tree_grafting.cpp + * + * Takes assignments to variables that are dereferenced only once and + * pastes the RHS expression into where the variable is dereferenced. + * + * In the process of various operations like function inlining and + * tertiary op handling, we'll end up with our expression trees having + * been chopped up into a series of assignments of short expressions + * to temps. Other passes like ir_algebraic.cpp would prefer to see + * the deepest expression trees they can to try to optimize them. + * + * This is a lot like copy propagaton. In comparison, copy + * propagation only acts on plain copies, not arbitrary expressions on + * the RHS. Generally, we wouldn't want to go pasting some + * complicated expression everywhere it got used, though, so we don't + * handle expressions in that pass. + * + * The hard part is making sure we don't move an expression across + * some other assignments that would change the value of the + * expression. So we split this into two passes: First, find the + * variables in our scope which are written to once and read once, and + * then go through basic blocks seeing if we find an opportunity to + * move those expressions safely. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_variable_refcount.h" +#include "ir_basic_block.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +namespace { + +static bool debug = false; + +class ir_tree_grafting_visitor : public ir_hierarchical_visitor { +public: + ir_tree_grafting_visitor(ir_assignment *graft_assign, + ir_variable *graft_var) + { + this->progress = false; + this->graft_assign = graft_assign; + this->graft_var = graft_var; + } + + virtual ir_visitor_status visit_leave(class ir_assignment *); + virtual ir_visitor_status visit_enter(class ir_call *); + virtual ir_visitor_status visit_enter(class ir_expression *); + virtual ir_visitor_status visit_enter(class ir_function *); + virtual ir_visitor_status visit_enter(class ir_function_signature *); + virtual ir_visitor_status visit_enter(class ir_if *); + virtual ir_visitor_status visit_enter(class ir_loop *); + virtual ir_visitor_status visit_enter(class ir_swizzle *); + virtual ir_visitor_status visit_enter(class ir_texture *); + + ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var); + + bool do_graft(ir_rvalue **rvalue); + + bool progress; + ir_variable *graft_var; + ir_assignment *graft_assign; +}; + +struct find_deref_info { + ir_variable *var; + bool found; +}; + +void +dereferences_variable_callback(ir_instruction *ir, void *data) +{ + struct find_deref_info *info = (struct find_deref_info *)data; + ir_dereference_variable *deref = ir->as_dereference_variable(); + + if (deref && deref->var == info->var) + info->found = true; +} + +static bool +dereferences_variable(ir_instruction *ir, ir_variable *var) +{ + struct find_deref_info info; + + info.var = var; + info.found = false; + + visit_tree(ir, dereferences_variable_callback, &info); + + return info.found; +} + +bool +ir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue) +{ + if (!*rvalue) + return false; + + ir_dereference_variable *deref = (*rvalue)->as_dereference_variable(); + + if (!deref || deref->var != this->graft_var) + return false; + + if (debug) { + fprintf(stderr, "GRAFTING:\n"); + this->graft_assign->fprint(stderr); + fprintf(stderr, "\n"); + fprintf(stderr, "TO:\n"); + (*rvalue)->fprint(stderr); + fprintf(stderr, "\n"); + } + + this->graft_assign->remove(); + *rvalue = this->graft_assign->rhs; + + this->progress = true; + return true; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_loop *ir) +{ + (void)ir; + /* Do not traverse into the body of the loop since that is a + * different basic block. + */ + return visit_stop; +} + +/** + * Check if we can continue grafting after writing to a variable. If the + * expression we're trying to graft references the variable, we must stop. + * + * \param ir An instruction that writes to a variable. + * \param var The variable being updated. + */ +ir_visitor_status +ir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var) +{ + if (dereferences_variable(this->graft_assign->rhs, var)) { + if (debug) { + fprintf(stderr, "graft killed by: "); + ir->fprint(stderr); + fprintf(stderr, "\n"); + } + return visit_stop; + } + + return visit_continue; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_leave(ir_assignment *ir) +{ + if (do_graft(&ir->rhs) || + do_graft(&ir->condition)) + return visit_stop; + + /* If this assignment updates a variable used in the assignment + * we're trying to graft, then we're done. + */ + return check_graft(ir, ir->lhs->variable_referenced()); +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_function *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_function_signature *ir) +{ + (void)ir; + return visit_continue_with_parent; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_call *ir) +{ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + ir_rvalue *ir = (ir_rvalue *) actual_node; + ir_rvalue *new_ir = ir; + + if (sig_param->data.mode != ir_var_function_in + && sig_param->data.mode != ir_var_const_in) { + if (check_graft(ir, sig_param) == visit_stop) + return visit_stop; + continue; + } + + if (do_graft(&new_ir)) { + ir->replace_with(new_ir); + return visit_stop; + } + } + + if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop) + return visit_stop; + + return visit_continue; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_expression *ir) +{ + for (unsigned int i = 0; i < ir->get_num_operands(); i++) { + if (do_graft(&ir->operands[i])) + return visit_stop; + } + + return visit_continue; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_if *ir) +{ + if (do_graft(&ir->condition)) + return visit_stop; + + /* Do not traverse into the body of the if-statement since that is a + * different basic block. + */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_swizzle *ir) +{ + if (do_graft(&ir->val)) + return visit_stop; + + return visit_continue; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_texture *ir) +{ + if (do_graft(&ir->coordinate) || + do_graft(&ir->projector) || + do_graft(&ir->offset) || + do_graft(&ir->shadow_comparitor)) + return visit_stop; + + switch (ir->op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + if (do_graft(&ir->lod_info.bias)) + return visit_stop; + break; + case ir_txf: + case ir_txl: + case ir_txs: + if (do_graft(&ir->lod_info.lod)) + return visit_stop; + break; + case ir_txf_ms: + if (do_graft(&ir->lod_info.sample_index)) + return visit_stop; + break; + case ir_txd: + if (do_graft(&ir->lod_info.grad.dPdx) || + do_graft(&ir->lod_info.grad.dPdy)) + return visit_stop; + break; + case ir_tg4: + if (do_graft(&ir->lod_info.component)) + return visit_stop; + break; + } + + return visit_continue; +} + +struct tree_grafting_info { + ir_variable_refcount_visitor *refs; + bool progress; +}; + +static bool +try_tree_grafting(ir_assignment *start, + ir_variable *lhs_var, + ir_instruction *bb_last) +{ + ir_tree_grafting_visitor v(start, lhs_var); + + if (debug) { + fprintf(stderr, "trying to graft: "); + lhs_var->fprint(stderr); + fprintf(stderr, "\n"); + } + + for (ir_instruction *ir = (ir_instruction *)start->next; + ir != bb_last->next; + ir = (ir_instruction *)ir->next) { + + if (debug) { + fprintf(stderr, "- "); + ir->fprint(stderr); + fprintf(stderr, "\n"); + } + + ir_visitor_status s = ir->accept(&v); + if (s == visit_stop) + return v.progress; + } + + return false; +} + +static void +tree_grafting_basic_block(ir_instruction *bb_first, + ir_instruction *bb_last, + void *data) +{ + struct tree_grafting_info *info = (struct tree_grafting_info *)data; + ir_instruction *ir, *next; + + for (ir = bb_first, next = (ir_instruction *)ir->next; + ir != bb_last->next; + ir = next, next = (ir_instruction *)ir->next) { + ir_assignment *assign = ir->as_assignment(); + + if (!assign) + continue; + + ir_variable *lhs_var = assign->whole_variable_written(); + if (!lhs_var) + continue; + + if (lhs_var->data.mode == ir_var_function_out || + lhs_var->data.mode == ir_var_function_inout || + lhs_var->data.mode == ir_var_shader_out || + lhs_var->data.mode == ir_var_shader_storage) + continue; + + ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var); + + if (!entry->declaration || + entry->assigned_count != 1 || + entry->referenced_count != 2) + continue; + + /* Found a possibly graftable assignment. Now, walk through the + * rest of the BB seeing if the deref is here, and if nothing interfered with + * pasting its expression's values in between. + */ + info->progress |= try_tree_grafting(assign, lhs_var, bb_last); + } +} + +} /* unnamed namespace */ + +/** + * Does a copy propagation pass on the code present in the instruction stream. + */ +bool +do_tree_grafting(exec_list *instructions) +{ + ir_variable_refcount_visitor refs; + struct tree_grafting_info info; + + info.progress = false; + info.refs = &refs; + + visit_list_elements(info.refs, instructions); + + call_for_basic_blocks(instructions, tree_grafting_basic_block, &info); + + return info.progress; +} diff --git a/src/compiler/glsl/opt_vectorize.cpp b/src/compiler/glsl/opt_vectorize.cpp new file mode 100644 index 0000000..88318cd --- /dev/null +++ b/src/compiler/glsl/opt_vectorize.cpp @@ -0,0 +1,407 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_vectorize.cpp + * + * Combines scalar assignments of the same expression (modulo swizzle) to + * multiple channels of the same variable into a single vectorized expression + * and assignment. + * + * Many generated shaders contain scalarized code. That is, they contain + * + * r1.x = log2(v0.x); + * r1.y = log2(v0.y); + * r1.z = log2(v0.z); + * + * rather than + * + * r1.xyz = log2(v0.xyz); + * + * We look for consecutive assignments of the same expression (modulo swizzle) + * to each channel of the same variable. + * + * For instance, we want to convert these three scalar operations + * + * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0)))) + * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0)))) + * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0)))) + * + * into a single vector operation + * + * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0)))) + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" +#include "program/prog_instruction.h" + +namespace { + +class ir_vectorize_visitor : public ir_hierarchical_visitor { +public: + void clear() + { + assignment[0] = NULL; + assignment[1] = NULL; + assignment[2] = NULL; + assignment[3] = NULL; + current_assignment = NULL; + last_assignment = NULL; + channels = 0; + has_swizzle = false; + } + + ir_vectorize_visitor() + { + clear(); + progress = false; + } + + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_if *); + virtual ir_visitor_status visit_enter(ir_loop *); + virtual ir_visitor_status visit_enter(ir_texture *); + + virtual ir_visitor_status visit_leave(ir_assignment *); + + void try_vectorize(); + + ir_assignment *assignment[4]; + ir_assignment *current_assignment, *last_assignment; + unsigned channels; + bool has_swizzle; + + bool progress; +}; + +} /* unnamed namespace */ + +/** + * Rewrites the swizzles and types of a right-hand side of an assignment. + * + * From the example above, this function would be called (by visit_tree()) on + * the nodes of the tree (expression float log2 (swiz z (var_ref v0))), + * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))). + * + * The function operates on ir_expressions (and its operands) and ir_swizzles. + * For expressions it sets a new type and swizzles any non-expression and non- + * swizzle scalar operands into appropriately sized vector arguments. For + * example, if combining + * + * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1)))) + * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1)))) + * + * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on + * (var_ref v1) such that the final result was + * + * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0)) + * (swiz xx (var_ref v1)))) + * + * For swizzles, it sets a new type, and if the variable being swizzled is a + * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the + * data parameter. If the swizzled variable is scalar, then the swizzle was + * added by an earlier call to rewrite_swizzle() on an expression, so the + * mask should not be modified. + */ +static void +rewrite_swizzle(ir_instruction *ir, void *data) +{ + ir_swizzle_mask *mask = (ir_swizzle_mask *)data; + + switch (ir->ir_type) { + case ir_type_swizzle: { + ir_swizzle *swz = (ir_swizzle *)ir; + if (swz->val->type->is_vector()) { + swz->mask = *mask; + } + swz->type = glsl_type::get_instance(swz->type->base_type, + mask->num_components, 1); + break; + } + case ir_type_expression: { + ir_expression *expr = (ir_expression *)ir; + expr->type = glsl_type::get_instance(expr->type->base_type, + mask->num_components, 1); + for (unsigned i = 0; i < 4; i++) { + if (expr->operands[i]) { + ir_rvalue *rval = expr->operands[i]->as_rvalue(); + if (rval && rval->type->is_scalar() && + !rval->as_expression() && !rval->as_swizzle()) { + expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0, + mask->num_components); + } + } + } + break; + } + default: + break; + } +} + +/** + * Attempt to vectorize the previously saved assignments, and clear them from + * consideration. + * + * If the assignments are able to be combined, it modifies in-place the last + * assignment seen to be an equivalent vector form of the scalar assignments. + * It then removes the other now obsolete scalar assignments. + */ +void +ir_vectorize_visitor::try_vectorize() +{ + if (this->last_assignment && this->channels > 1) { + ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0}; + + this->last_assignment->write_mask = 0; + + for (unsigned i = 0, j = 0; i < 4; i++) { + if (this->assignment[i]) { + this->last_assignment->write_mask |= 1 << i; + + if (this->assignment[i] != this->last_assignment) { + this->assignment[i]->remove(); + } + + switch (j) { + case 0: mask.x = i; break; + case 1: mask.y = i; break; + case 2: mask.z = i; break; + case 3: mask.w = i; break; + } + + j++; + } + } + + visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); + + this->progress = true; + } + clear(); +} + +/** + * Returns whether the write mask is a single channel. + */ +static bool +single_channel_write_mask(unsigned write_mask) +{ + return write_mask != 0 && (write_mask & (write_mask - 1)) == 0; +} + +/** + * Translates single-channeled write mask to single-channeled swizzle. + */ +static unsigned +write_mask_to_swizzle(unsigned write_mask) +{ + switch (write_mask) { + case WRITEMASK_X: return SWIZZLE_X; + case WRITEMASK_Y: return SWIZZLE_Y; + case WRITEMASK_Z: return SWIZZLE_Z; + case WRITEMASK_W: return SWIZZLE_W; + } + unreachable("not reached"); +} + +/** + * Returns whether a single-channeled write mask matches a swizzle. + */ +static bool +write_mask_matches_swizzle(unsigned write_mask, + const ir_swizzle *swz) +{ + return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) || + (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) || + (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) || + (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W)); +} + +/** + * Upon entering an ir_assignment, attempt to vectorize the currently tracked + * assignments if the current assignment is not suitable. Keep a pointer to + * the current assignment. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_assignment *ir) +{ + ir_dereference *lhs = this->last_assignment != NULL ? + this->last_assignment->lhs : NULL; + ir_rvalue *rhs = this->last_assignment != NULL ? + this->last_assignment->rhs : NULL; + + if (ir->condition || + this->channels >= 4 || + !single_channel_write_mask(ir->write_mask) || + this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL || + (lhs && !ir->lhs->equals(lhs)) || + (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) { + try_vectorize(); + } + + this->current_assignment = ir; + + return visit_continue; +} + +/** + * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an + * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask + * matches the current assignment's write mask. + * + * If the write mask doesn't match the swizzle mask, remove the current + * assignment from further consideration. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_swizzle *ir) +{ + if (this->current_assignment) { + if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) { + this->has_swizzle = true; + } else { + this->current_assignment = NULL; + } + } + return visit_continue; +} + +/* Upon entering an ir_array_dereference, remove the current assignment from + * further consideration. Since the index of an array dereference must scalar, + * we are not able to vectorize it. + * + * FINISHME: If all of scalar indices are identical we could vectorize. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_dereference_array *) +{ + this->current_assignment = NULL; + return visit_continue_with_parent; +} + +/** + * Upon entering an ir_expression, remove the current assignment from further + * consideration if the expression operates horizontally on vectors. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_expression *ir) +{ + if (ir->is_horizontal()) { + this->current_assignment = NULL; + return visit_continue_with_parent; + } + return visit_continue; +} + +/* Since there is no statement to visit between the "then" and "else" + * instructions try to vectorize before, in between, and after them to avoid + * combining statements from different basic blocks. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_if *ir) +{ + try_vectorize(); + + visit_list_elements(this, &ir->then_instructions); + try_vectorize(); + + visit_list_elements(this, &ir->else_instructions); + try_vectorize(); + + return visit_continue_with_parent; +} + +/* Since there is no statement to visit between the instructions in the body of + * the loop and the instructions after it try to vectorize before and after the + * body to avoid combining statements from different basic blocks. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_loop *ir) +{ + try_vectorize(); + + visit_list_elements(this, &ir->body_instructions); + try_vectorize(); + + return visit_continue_with_parent; +} + +/** + * Upon entering an ir_texture, remove the current assignment from + * further consideration. Vectorizing multiple texture lookups into one + * is wrong. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_texture *) +{ + this->current_assignment = NULL; + return visit_continue_with_parent; +} + +/** + * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if + * the swizzle mask(s) found were appropriate. Also save a pointer in + * ::last_assignment so that we can compare future assignments with it. + * + * Finally, clear ::current_assignment and ::has_swizzle. + */ +ir_visitor_status +ir_vectorize_visitor::visit_leave(ir_assignment *ir) +{ + if (this->has_swizzle && this->current_assignment) { + assert(this->current_assignment == ir); + + unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask); + this->assignment[channel] = ir; + this->channels++; + + this->last_assignment = this->current_assignment; + } + this->current_assignment = NULL; + this->has_swizzle = false; + return visit_continue; +} + +/** + * Combines scalar assignments of the same expression (modulo swizzle) to + * multiple channels of the same variable into a single vectorized expression + * and assignment. + */ +bool +do_vectorize(exec_list *instructions) +{ + ir_vectorize_visitor v; + + v.run(instructions); + + /* Try to vectorize the last assignments seen. */ + v.try_vectorize(); + + return v.progress; +} diff --git a/src/compiler/glsl/program.h b/src/compiler/glsl/program.h new file mode 100644 index 0000000..64f5463 --- /dev/null +++ b/src/compiler/glsl/program.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern void +_mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, + bool dump_ast, bool dump_hir); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +extern void +link_shaders(struct gl_context *ctx, struct gl_shader_program *prog); + +extern void +build_program_resource_list(struct gl_shader_program *shProg); + +extern void +linker_error(struct gl_shader_program *prog, const char *fmt, ...) + PRINTFLIKE(2, 3); + +extern void +linker_warning(struct gl_shader_program *prog, const char *fmt, ...) + PRINTFLIKE(2, 3); + +extern long +parse_program_resource_name(const GLchar *name, + const GLchar **out_base_name_end); diff --git a/src/compiler/glsl/s_expression.cpp b/src/compiler/glsl/s_expression.cpp new file mode 100644 index 0000000..f82e155 --- /dev/null +++ b/src/compiler/glsl/s_expression.cpp @@ -0,0 +1,218 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdio.h> +#include <math.h> +#include "s_expression.h" + +s_symbol::s_symbol(const char *str, size_t n) +{ + /* Assume the given string is already nul-terminated and in memory that + * will live as long as this node. + */ + assert(str[n] == '\0'); + this->str = str; +} + +s_list::s_list() +{ +} + +static void +skip_whitespace(const char *&src, char *&symbol_buffer) +{ + size_t n = strspn(src, " \v\t\r\n"); + src += n; + symbol_buffer += n; + /* Also skip Scheme-style comments: semi-colon 'til end of line */ + if (src[0] == ';') { + n = strcspn(src, "\n"); + src += n; + symbol_buffer += n; + skip_whitespace(src, symbol_buffer); + } +} + +static s_expression * +read_atom(void *ctx, const char *&src, char *&symbol_buffer) +{ + s_expression *expr = NULL; + + skip_whitespace(src, symbol_buffer); + + size_t n = strcspn(src, "( \v\t\r\n);"); + if (n == 0) + return NULL; // no atom + + // Check for the special symbol '+INF', which means +Infinity. Note: C99 + // requires strtof to parse '+INF' as +Infinity, but we still support some + // non-C99-compliant compilers (e.g. MSVC). + if (n == 4 && strncmp(src, "+INF", 4) == 0) { + expr = new(ctx) s_float(INFINITY); + } else { + // Check if the atom is a number. + char *float_end = NULL; + float f = _mesa_strtof(src, &float_end); + if (float_end != src) { + char *int_end = NULL; + int i = strtol(src, &int_end, 10); + // If strtof matched more characters, it must have a decimal part + if (float_end > int_end) + expr = new(ctx) s_float(f); + else + expr = new(ctx) s_int(i); + } else { + // Not a number; return a symbol. + symbol_buffer[n] = '\0'; + expr = new(ctx) s_symbol(symbol_buffer, n); + } + } + + src += n; + symbol_buffer += n; + + return expr; +} + +static s_expression * +__read_expression(void *ctx, const char *&src, char *&symbol_buffer) +{ + s_expression *atom = read_atom(ctx, src, symbol_buffer); + if (atom != NULL) + return atom; + + skip_whitespace(src, symbol_buffer); + if (src[0] == '(') { + ++src; + ++symbol_buffer; + + s_list *list = new(ctx) s_list; + s_expression *expr; + + while ((expr = __read_expression(ctx, src, symbol_buffer)) != NULL) { + list->subexpressions.push_tail(expr); + } + skip_whitespace(src, symbol_buffer); + if (src[0] != ')') { + printf("Unclosed expression (check your parenthesis).\n"); + return NULL; + } + ++src; + ++symbol_buffer; + return list; + } + return NULL; +} + +s_expression * +s_expression::read_expression(void *ctx, const char *&src) +{ + assert(src != NULL); + + /* When we encounter a Symbol, we need to save a nul-terminated copy of + * the string. However, ralloc_strndup'ing every individual Symbol is + * extremely expensive. We could avoid this by simply overwriting the + * next character (guaranteed to be whitespace, parens, or semicolon) with + * a nul-byte. But overwriting non-whitespace would mess up parsing. + * + * So, just copy the whole buffer ahead of time. Walk both, leaving the + * original source string unmodified, and altering the copy to contain the + * necessary nul-bytes whenever we encounter a symbol. + */ + char *symbol_buffer = ralloc_strdup(ctx, src); + return __read_expression(ctx, src, symbol_buffer); +} + +void s_int::print() +{ + printf("%d", this->val); +} + +void s_float::print() +{ + printf("%f", this->val); +} + +void s_symbol::print() +{ + printf("%s", this->str); +} + +void s_list::print() +{ + printf("("); + foreach_in_list(s_expression, expr, &this->subexpressions) { + expr->print(); + if (!expr->next->is_tail_sentinel()) + printf(" "); + } + printf(")"); +} + +// -------------------------------------------------- + +bool +s_pattern::match(s_expression *expr) +{ + switch (type) + { + case EXPR: *p_expr = expr; break; + case LIST: if (expr->is_list()) *p_list = (s_list *) expr; break; + case SYMBOL: if (expr->is_symbol()) *p_symbol = (s_symbol *) expr; break; + case NUMBER: if (expr->is_number()) *p_number = (s_number *) expr; break; + case INT: if (expr->is_int()) *p_int = (s_int *) expr; break; + case STRING: + s_symbol *sym = SX_AS_SYMBOL(expr); + if (sym != NULL && strcmp(sym->value(), literal) == 0) + return true; + return false; + }; + + return *p_expr == expr; +} + +bool +s_match(s_expression *top, unsigned n, s_pattern *pattern, bool partial) +{ + s_list *list = SX_AS_LIST(top); + if (list == NULL) + return false; + + unsigned i = 0; + foreach_in_list(s_expression, expr, &list->subexpressions) { + if (i >= n) + return partial; /* More actual items than the pattern expected */ + + if (expr == NULL || !pattern[i].match(expr)) + return false; + + i++; + } + + if (i < n) + return false; /* Less actual items than the pattern expected */ + + return true; +} diff --git a/src/compiler/glsl/s_expression.h b/src/compiler/glsl/s_expression.h new file mode 100644 index 0000000..f0dffb1 --- /dev/null +++ b/src/compiler/glsl/s_expression.h @@ -0,0 +1,180 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef S_EXPRESSION_H +#define S_EXPRESSION_H + +#include "main/core.h" /* for Elements */ +#include "util/strtod.h" +#include "list.h" + +/* Type-safe downcasting macros (also safe to pass NULL) */ +#define SX_AS_(t,x) ((x) && ((s_expression*) x)->is_##t()) ? ((s_##t*) (x)) \ + : NULL +#define SX_AS_LIST(x) SX_AS_(list, x) +#define SX_AS_SYMBOL(x) SX_AS_(symbol, x) +#define SX_AS_NUMBER(x) SX_AS_(number, x) +#define SX_AS_INT(x) SX_AS_(int, x) + +/* Pattern matching macros */ +#define MATCH(list, pat) s_match(list, ARRAY_SIZE(pat), pat, false) +#define PARTIAL_MATCH(list, pat) s_match(list, ARRAY_SIZE(pat), pat, true) + +/* For our purposes, S-Expressions are: + * - <int> + * - <float> + * - symbol + * - (expr1 expr2 ... exprN) where exprN is an S-Expression + * + * Unlike LISP/Scheme, we do not support (foo . bar) pairs. + */ +class s_expression : public exec_node +{ +public: + /** + * Read an S-Expression from the given string. + * Advances the supplied pointer to just after the expression read. + * + * Any allocation will be performed with 'ctx' as the ralloc owner. + */ + static s_expression *read_expression(void *ctx, const char *&src); + + /** + * Print out an S-Expression. Useful for debugging. + */ + virtual void print() = 0; + + virtual bool is_list() const { return false; } + virtual bool is_symbol() const { return false; } + virtual bool is_number() const { return false; } + virtual bool is_int() const { return false; } + +protected: + s_expression() { } +}; + +/* Atoms */ + +class s_number : public s_expression +{ +public: + bool is_number() const { return true; } + + virtual float fvalue() = 0; + +protected: + s_number() { } +}; + +class s_int : public s_number +{ +public: + s_int(int x) : val(x) { } + + bool is_int() const { return true; } + + float fvalue() { return float(this->val); } + int value() { return this->val; } + + void print(); + +private: + int val; +}; + +class s_float : public s_number +{ +public: + s_float(float x) : val(x) { } + + float fvalue() { return this->val; } + + void print(); + +private: + float val; +}; + +class s_symbol : public s_expression +{ +public: + s_symbol(const char *, size_t); + + bool is_symbol() const { return true; } + + const char *value() { return this->str; } + + void print(); + +private: + const char *str; +}; + +/* Lists of expressions: (expr1 ... exprN) */ +class s_list : public s_expression +{ +public: + s_list(); + + virtual bool is_list() const { return true; } + + void print(); + + exec_list subexpressions; +}; + +// ------------------------------------------------------------ + +/** + * Part of a pattern to match - essentially a record holding a pointer to the + * storage for the component to match, along with the appropriate type. + */ +class s_pattern { +public: + s_pattern(s_expression *&s) : p_expr(&s), type(EXPR) { } + s_pattern(s_list *&s) : p_list(&s), type(LIST) { } + s_pattern(s_symbol *&s) : p_symbol(&s), type(SYMBOL) { } + s_pattern(s_number *&s) : p_number(&s), type(NUMBER) { } + s_pattern(s_int *&s) : p_int(&s), type(INT) { } + s_pattern(const char *str) : literal(str), type(STRING) { } + + bool match(s_expression *expr); + +private: + union { + s_expression **p_expr; + s_list **p_list; + s_symbol **p_symbol; + s_number **p_number; + s_int **p_int; + const char *literal; + }; + enum { EXPR, LIST, SYMBOL, NUMBER, INT, STRING } type; +}; + +bool +s_match(s_expression *top, unsigned n, s_pattern *pattern, bool partial); + +#endif /* S_EXPRESSION_H */ diff --git a/src/compiler/glsl/standalone_scaffolding.cpp b/src/compiler/glsl/standalone_scaffolding.cpp new file mode 100644 index 0000000..d5d214b --- /dev/null +++ b/src/compiler/glsl/standalone_scaffolding.cpp @@ -0,0 +1,221 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* This file declares stripped-down versions of functions that + * normally exist outside of the glsl folder, so that they can be used + * when running the GLSL compiler standalone (for unit testing or + * compiling builtins). + */ + +#include "standalone_scaffolding.h" + +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include "util/ralloc.h" +#include "util/strtod.h" + +void +_mesa_warning(struct gl_context *ctx, const char *fmt, ...) +{ + va_list vargs; + (void) ctx; + + va_start(vargs, fmt); + + /* This output is not thread-safe, but that's good enough for the + * standalone compiler. + */ + fprintf(stderr, "Mesa warning: "); + vfprintf(stderr, fmt, vargs); + fprintf(stderr, "\n"); + + va_end(vargs); +} + +void +_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh) +{ + (void) ctx; + *ptr = sh; +} + +void +_mesa_shader_debug(struct gl_context *, GLenum, GLuint *, + const char *) +{ +} + +struct gl_shader * +_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type) +{ + struct gl_shader *shader; + + (void) ctx; + + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER); + shader = rzalloc(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Stage = _mesa_shader_enum_to_shader_stage(type); + shader->Name = name; + shader->RefCount = 1; + } + return shader; +} + +void +_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh) +{ + free((void *)sh->Source); + free(sh->Label); + ralloc_free(sh); +} + +void +_mesa_clear_shader_program_data(struct gl_shader_program *shProg) +{ + unsigned i; + + shProg->NumUniformStorage = 0; + shProg->UniformStorage = NULL; + shProg->NumUniformRemapTable = 0; + shProg->UniformRemapTable = NULL; + shProg->UniformHash = NULL; + + ralloc_free(shProg->InfoLog); + shProg->InfoLog = ralloc_strdup(shProg, ""); + + ralloc_free(shProg->BufferInterfaceBlocks); + shProg->BufferInterfaceBlocks = NULL; + shProg->NumBufferInterfaceBlocks = 0; + + ralloc_free(shProg->UniformBlocks); + shProg->UniformBlocks = NULL; + shProg->NumUniformBlocks = 0; + + ralloc_free(shProg->ShaderStorageBlocks); + shProg->ShaderStorageBlocks = NULL; + shProg->NumShaderStorageBlocks = 0; + + for (i = 0; i < MESA_SHADER_STAGES; i++) { + ralloc_free(shProg->InterfaceBlockStageIndex[i]); + shProg->InterfaceBlockStageIndex[i] = NULL; + } + + ralloc_free(shProg->UboInterfaceBlockIndex); + shProg->UboInterfaceBlockIndex = NULL; + ralloc_free(shProg->SsboInterfaceBlockIndex); + shProg->SsboInterfaceBlockIndex = NULL; + + ralloc_free(shProg->AtomicBuffers); + shProg->AtomicBuffers = NULL; + shProg->NumAtomicBuffers = 0; +} + +void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) +{ + memset(ctx, 0, sizeof(*ctx)); + + ctx->API = api; + + ctx->Extensions.dummy_false = false; + ctx->Extensions.dummy_true = true; + ctx->Extensions.ARB_compute_shader = true; + ctx->Extensions.ARB_conservative_depth = true; + ctx->Extensions.ARB_draw_instanced = true; + ctx->Extensions.ARB_ES2_compatibility = true; + ctx->Extensions.ARB_ES3_compatibility = true; + ctx->Extensions.ARB_explicit_attrib_location = true; + ctx->Extensions.ARB_fragment_coord_conventions = true; + ctx->Extensions.ARB_fragment_layer_viewport = true; + ctx->Extensions.ARB_gpu_shader5 = true; + ctx->Extensions.ARB_gpu_shader_fp64 = true; + ctx->Extensions.ARB_sample_shading = true; + ctx->Extensions.ARB_shader_bit_encoding = true; + ctx->Extensions.ARB_shader_draw_parameters = true; + ctx->Extensions.ARB_shader_stencil_export = true; + ctx->Extensions.ARB_shader_subroutine = true; + ctx->Extensions.ARB_shader_texture_lod = true; + ctx->Extensions.ARB_shading_language_420pack = true; + ctx->Extensions.ARB_shading_language_packing = true; + ctx->Extensions.ARB_tessellation_shader = true; + ctx->Extensions.ARB_texture_cube_map_array = true; + ctx->Extensions.ARB_texture_gather = true; + ctx->Extensions.ARB_texture_multisample = true; + ctx->Extensions.ARB_texture_query_levels = true; + ctx->Extensions.ARB_texture_query_lod = true; + ctx->Extensions.ARB_uniform_buffer_object = true; + ctx->Extensions.ARB_viewport_array = true; + + ctx->Extensions.OES_EGL_image_external = true; + ctx->Extensions.OES_standard_derivatives = true; + + ctx->Extensions.EXT_shader_integer_mix = true; + ctx->Extensions.EXT_texture_array = true; + + ctx->Extensions.NV_texture_rectangle = true; + + ctx->Const.GLSLVersion = 120; + + /* 1.20 minimums. */ + ctx->Const.MaxLights = 8; + ctx->Const.MaxClipPlanes = 6; + ctx->Const.MaxTextureUnits = 2; + ctx->Const.MaxTextureCoordUnits = 2; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; + ctx->Const.MaxVarying = 8; /* == gl_MaxVaryingFloats / 4 */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; + ctx->Const.MaxCombinedTextureImageUnits = 2; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 2; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 32; + + ctx->Const.MaxDrawBuffers = 1; + ctx->Const.MaxComputeWorkGroupCount[0] = 65535; + ctx->Const.MaxComputeWorkGroupCount[1] = 65535; + ctx->Const.MaxComputeWorkGroupCount[2] = 65535; + ctx->Const.MaxComputeWorkGroupSize[0] = 1024; + ctx->Const.MaxComputeWorkGroupSize[1] = 1024; + ctx->Const.MaxComputeWorkGroupSize[2] = 64; + ctx->Const.MaxComputeWorkGroupInvocations = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */ + + /* Set up default shader compiler options. */ + struct gl_shader_compiler_options options; + memset(&options, 0, sizeof(options)); + options.MaxUnrollIterations = 32; + options.MaxIfDepth = UINT_MAX; + + for (int sh = 0; sh < MESA_SHADER_STAGES; ++sh) + memcpy(&ctx->Const.ShaderCompilerOptions[sh], &options, sizeof(options)); + + _mesa_locale_init(); +} diff --git a/src/compiler/glsl/standalone_scaffolding.h b/src/compiler/glsl/standalone_scaffolding.h new file mode 100644 index 0000000..f853a18 --- /dev/null +++ b/src/compiler/glsl/standalone_scaffolding.h @@ -0,0 +1,90 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* This file declares stripped-down versions of functions that + * normally exist outside of the glsl folder, so that they can be used + * when running the GLSL compiler standalone (for unit testing or + * compiling builtins). + */ + +#pragma once +#ifndef STANDALONE_SCAFFOLDING_H +#define STANDALONE_SCAFFOLDING_H + +#include <assert.h> +#include "main/mtypes.h" + +extern "C" void +_mesa_warning(struct gl_context *ctx, const char *fmtString, ... ); + +extern "C" void +_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh); + +extern "C" struct gl_shader * +_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type); + +extern "C" void +_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh); + +extern "C" void +_mesa_clear_shader_program_data(struct gl_shader_program *); + +extern "C" void +_mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id, + const char *msg); + +static inline gl_shader_stage +_mesa_shader_enum_to_shader_stage(GLenum v) +{ + switch (v) { + case GL_VERTEX_SHADER: + return MESA_SHADER_VERTEX; + case GL_FRAGMENT_SHADER: + return MESA_SHADER_FRAGMENT; + case GL_GEOMETRY_SHADER: + return MESA_SHADER_GEOMETRY; + case GL_TESS_CONTROL_SHADER: + return MESA_SHADER_TESS_CTRL; + case GL_TESS_EVALUATION_SHADER: + return MESA_SHADER_TESS_EVAL; + case GL_COMPUTE_SHADER: + return MESA_SHADER_COMPUTE; + default: + assert(!"bad value in _mesa_shader_enum_to_shader_stage()"); + return MESA_SHADER_VERTEX; + } +} + +/** + * Initialize the given gl_context structure to a reasonable set of + * defaults representing the minimum capabilities required by the + * OpenGL spec. + * + * This is used when compiling builtin functions and in testing, when + * we don't have a connection to an actual driver. + */ +void initialize_context_to_defaults(struct gl_context *ctx, gl_api api); + + +#endif /* STANDALONE_SCAFFOLDING_H */ diff --git a/src/compiler/glsl/test.cpp b/src/compiler/glsl/test.cpp new file mode 100644 index 0000000..b1ff92e --- /dev/null +++ b/src/compiler/glsl/test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file test.cpp + * + * Standalone tests for the GLSL compiler. + * + * This file provides a standalone executable which can be used to + * test components of the GLSL. + * + * Each test is a function with the same signature as main(). The + * main function interprets its first argument as the name of the test + * to run, strips out that argument, and then calls the test function. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "test_optpass.h" + +/** + * Print proper usage and exit with failure. + */ +static void +usage_fail(const char *name) +{ + printf("*** usage: %s <command> <options>\n", name); + printf("\n"); + printf("Possible commands are:\n"); + printf(" optpass: test an optimization pass in isolation\n"); + exit(EXIT_FAILURE); +} + +static const char *extract_command_from_argv(int *argc, char **argv) +{ + if (*argc < 2) { + usage_fail(argv[0]); + } + const char *command = argv[1]; + --*argc; + memmove(&argv[1], &argv[2], (*argc) * sizeof(argv[1])); + return command; +} + +int main(int argc, char **argv) +{ + const char *command = extract_command_from_argv(&argc, argv); + if (strcmp(command, "optpass") == 0) { + return test_optpass(argc, argv); + } else { + usage_fail(argv[0]); + } + + /* Execution should never reach here. */ + return EXIT_FAILURE; +} diff --git a/src/compiler/glsl/test_optpass.cpp b/src/compiler/glsl/test_optpass.cpp new file mode 100644 index 0000000..fed1fab --- /dev/null +++ b/src/compiler/glsl/test_optpass.cpp @@ -0,0 +1,276 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file test_optpass.cpp + * + * Standalone test for optimization passes. + * + * This file provides the "optpass" command for the standalone + * glsl_test app. It accepts either GLSL or high-level IR as input, + * and performs the optimiation passes specified on the command line. + * It outputs the IR, both before and after optimiations. + */ + +#include <string> +#include <iostream> +#include <sstream> +#include <getopt.h> + +#include "ast.h" +#include "ir_optimization.h" +#include "program.h" +#include "ir_reader.h" +#include "standalone_scaffolding.h" + +using namespace std; + +static string read_stdin_to_eof() +{ + stringbuf sb; + cin.get(sb, '\0'); + return sb.str(); +} + +static GLboolean +do_optimization(struct exec_list *ir, const char *optimization, + const struct gl_shader_compiler_options *options) +{ + int int_0; + int int_1; + int int_2; + int int_3; + int int_4; + + if (sscanf(optimization, "do_common_optimization ( %d ) ", &int_0) == 1) { + return do_common_optimization(ir, int_0 != 0, false, options, true); + } else if (strcmp(optimization, "do_algebraic") == 0) { + return do_algebraic(ir, true, options); + } else if (strcmp(optimization, "do_constant_folding") == 0) { + return do_constant_folding(ir); + } else if (strcmp(optimization, "do_constant_variable") == 0) { + return do_constant_variable(ir); + } else if (strcmp(optimization, "do_constant_variable_unlinked") == 0) { + return do_constant_variable_unlinked(ir); + } else if (strcmp(optimization, "do_copy_propagation") == 0) { + return do_copy_propagation(ir); + } else if (strcmp(optimization, "do_copy_propagation_elements") == 0) { + return do_copy_propagation_elements(ir); + } else if (strcmp(optimization, "do_constant_propagation") == 0) { + return do_constant_propagation(ir); + } else if (strcmp(optimization, "do_dead_code") == 0) { + return do_dead_code(ir, false); + } else if (strcmp(optimization, "do_dead_code_local") == 0) { + return do_dead_code_local(ir); + } else if (strcmp(optimization, "do_dead_code_unlinked") == 0) { + return do_dead_code_unlinked(ir); + } else if (strcmp(optimization, "do_dead_functions") == 0) { + return do_dead_functions(ir); + } else if (strcmp(optimization, "do_function_inlining") == 0) { + return do_function_inlining(ir); + } else if (sscanf(optimization, + "do_lower_jumps ( %d , %d , %d , %d , %d ) ", + &int_0, &int_1, &int_2, &int_3, &int_4) == 5) { + return do_lower_jumps(ir, int_0 != 0, int_1 != 0, int_2 != 0, + int_3 != 0, int_4 != 0); + } else if (strcmp(optimization, "do_lower_texture_projection") == 0) { + return do_lower_texture_projection(ir); + } else if (strcmp(optimization, "do_if_simplification") == 0) { + return do_if_simplification(ir); + } else if (sscanf(optimization, "lower_if_to_cond_assign ( %d ) ", + &int_0) == 1) { + return lower_if_to_cond_assign(ir, int_0); + } else if (strcmp(optimization, "do_mat_op_to_vec") == 0) { + return do_mat_op_to_vec(ir); + } else if (strcmp(optimization, "do_noop_swizzle") == 0) { + return do_noop_swizzle(ir); + } else if (strcmp(optimization, "do_structure_splitting") == 0) { + return do_structure_splitting(ir); + } else if (strcmp(optimization, "do_swizzle_swizzle") == 0) { + return do_swizzle_swizzle(ir); + } else if (strcmp(optimization, "do_tree_grafting") == 0) { + return do_tree_grafting(ir); + } else if (strcmp(optimization, "do_vec_index_to_cond_assign") == 0) { + return do_vec_index_to_cond_assign(ir); + } else if (strcmp(optimization, "do_vec_index_to_swizzle") == 0) { + return do_vec_index_to_swizzle(ir); + } else if (strcmp(optimization, "lower_discard") == 0) { + return lower_discard(ir); + } else if (sscanf(optimization, "lower_instructions ( %d ) ", + &int_0) == 1) { + return lower_instructions(ir, int_0); + } else if (strcmp(optimization, "lower_noise") == 0) { + return lower_noise(ir); + } else if (sscanf(optimization, "lower_variable_index_to_cond_assign " + "( %d , %d , %d , %d ) ", &int_0, &int_1, &int_2, + &int_3) == 4) { + return lower_variable_index_to_cond_assign(MESA_SHADER_VERTEX, ir, + int_0 != 0, int_1 != 0, + int_2 != 0, int_3 != 0); + } else if (sscanf(optimization, "lower_quadop_vector ( %d ) ", + &int_0) == 1) { + return lower_quadop_vector(ir, int_0 != 0); + } else if (strcmp(optimization, "optimize_redundant_jumps") == 0) { + return optimize_redundant_jumps(ir); + } else { + printf("Unrecognized optimization %s\n", optimization); + exit(EXIT_FAILURE); + return false; + } +} + +static GLboolean +do_optimization_passes(struct exec_list *ir, char **optimizations, + int num_optimizations, bool quiet, + const struct gl_shader_compiler_options *options) +{ + GLboolean overall_progress = false; + + for (int i = 0; i < num_optimizations; ++i) { + const char *optimization = optimizations[i]; + if (!quiet) { + printf("*** Running optimization %s...", optimization); + } + GLboolean progress = do_optimization(ir, optimization, options); + if (!quiet) { + printf("%s\n", progress ? "progress" : "no progress"); + } + validate_ir_tree(ir); + + overall_progress = overall_progress || progress; + } + + return overall_progress; +} + +int test_optpass(int argc, char **argv) +{ + int input_format_ir = 0; /* 0=glsl, 1=ir */ + int loop = 0; + int shader_type = GL_VERTEX_SHADER; + int quiet = 0; + + const struct option optpass_opts[] = { + { "input-ir", no_argument, &input_format_ir, 1 }, + { "input-glsl", no_argument, &input_format_ir, 0 }, + { "loop", no_argument, &loop, 1 }, + { "vertex-shader", no_argument, &shader_type, GL_VERTEX_SHADER }, + { "fragment-shader", no_argument, &shader_type, GL_FRAGMENT_SHADER }, + { "quiet", no_argument, &quiet, 1 }, + { NULL, 0, NULL, 0 } + }; + + int idx = 0; + int c; + while ((c = getopt_long(argc, argv, "", optpass_opts, &idx)) != -1) { + if (c != 0) { + printf("*** usage: %s optpass <optimizations> <options>\n", argv[0]); + printf("\n"); + printf("Possible options are:\n"); + printf(" --input-ir: input format is IR\n"); + printf(" --input-glsl: input format is GLSL (the default)\n"); + printf(" --loop: run optimizations repeatedly until no progress\n"); + printf(" --vertex-shader: test with a vertex shader (the default)\n"); + printf(" --fragment-shader: test with a fragment shader\n"); + exit(EXIT_FAILURE); + } + } + + struct gl_context local_ctx; + struct gl_context *ctx = &local_ctx; + initialize_context_to_defaults(ctx, API_OPENGL_COMPAT); + + ctx->Driver.NewShader = _mesa_new_shader; + ir_variable::temporaries_allocate_names = true; + + struct gl_shader *shader = rzalloc(NULL, struct gl_shader); + shader->Type = shader_type; + shader->Stage = _mesa_shader_enum_to_shader_stage(shader_type); + + string input = read_stdin_to_eof(); + + struct _mesa_glsl_parse_state *state + = new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader); + + if (input_format_ir) { + shader->ir = new(shader) exec_list; + _mesa_glsl_initialize_types(state); + _mesa_glsl_read_ir(state, shader->ir, input.c_str(), true); + } else { + shader->Source = input.c_str(); + const char *source = shader->Source; + state->error = glcpp_preprocess(state, &source, &state->info_log, + state->extensions, ctx) != 0; + + if (!state->error) { + _mesa_glsl_lexer_ctor(state, source); + _mesa_glsl_parse(state); + _mesa_glsl_lexer_dtor(state); + } + + shader->ir = new(shader) exec_list; + if (!state->error && !state->translation_unit.is_empty()) + _mesa_ast_to_hir(shader->ir, state); + } + + /* Print out the initial IR */ + if (!state->error && !quiet) { + printf("*** pre-optimization IR:\n"); + _mesa_print_ir(stdout, shader->ir, state); + printf("\n--\n"); + } + + /* Optimization passes */ + if (!state->error) { + GLboolean progress; + const struct gl_shader_compiler_options *options = + &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader_type)]; + do { + progress = do_optimization_passes(shader->ir, &argv[optind], + argc - optind, quiet != 0, options); + } while (loop && progress); + } + + /* Print out the resulting IR */ + if (!state->error) { + if (!quiet) { + printf("*** resulting IR:\n"); + } + _mesa_print_ir(stdout, shader->ir, state); + if (!quiet) { + printf("\n--\n"); + } + } + + if (state->error) { + printf("*** error(s) occurred:\n"); + printf("%s\n", state->info_log); + printf("--\n"); + } + + ralloc_free(state); + ralloc_free(shader); + + return state->error; +} + diff --git a/src/compiler/glsl/test_optpass.h b/src/compiler/glsl/test_optpass.h new file mode 100644 index 0000000..923ccf3 --- /dev/null +++ b/src/compiler/glsl/test_optpass.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef TEST_OPTPASS_H +#define TEST_OPTPASS_H + +int test_optpass(int argc, char **argv); + +#endif /* TEST_OPTPASS_H */ diff --git a/src/compiler/glsl/tests/.gitignore b/src/compiler/glsl/tests/.gitignore new file mode 100644 index 0000000..13dcdc4 --- /dev/null +++ b/src/compiler/glsl/tests/.gitignore @@ -0,0 +1,5 @@ +blob-test +ralloc-test +uniform-initializer-test +sampler-types-test +general-ir-test diff --git a/src/compiler/glsl/tests/blob_test.c b/src/compiler/glsl/tests/blob_test.c new file mode 100644 index 0000000..4806029 --- /dev/null +++ b/src/compiler/glsl/tests/blob_test.c @@ -0,0 +1,320 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* A collection of unit tests for blob.c */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> + +#include "util/ralloc.h" +#include "blob.h" + +#define bytes_test_str "bytes_test" +#define reserve_test_str "reserve_test" + +/* This placeholder must be the same length as the next overwrite_test_str */ +#define placeholder_str "XXXXXXXXXXXXXX" +#define overwrite_test_str "overwrite_test" +#define uint32_test 0x12345678 +#define uint32_placeholder 0xDEADBEEF +#define uint32_overwrite 0xA1B2C3D4 +#define uint64_test 0x1234567890ABCDEF +#define string_test_str "string_test" + +bool error = false; + +static void +expect_equal(uint64_t expected, uint64_t actual, const char *test) +{ + if (actual != expected) { + fprintf (stderr, "Error: Test '%s' failed: Expected=%ld, Actual=%ld\n", + test, expected, actual); + error = true; + } +} + +static void +expect_unequal(uint64_t expected, uint64_t actual, const char *test) +{ + if (actual == expected) { + fprintf (stderr, "Error: Test '%s' failed: Result=%ld, but expected something different.\n", + test, actual); + error = true; + } +} + +static void +expect_equal_str(const char *expected, const char *actual, const char *test) +{ + if (strcmp(expected, actual)) { + fprintf (stderr, "Error: Test '%s' failed:\n\t" + "Expected=\"%s\", Actual=\"%s\"\n", + test, expected, actual); + error = true; + } +} + +static void +expect_equal_bytes(uint8_t *expected, uint8_t *actual, + size_t num_bytes, const char *test) +{ + size_t i; + + if (memcmp(expected, actual, num_bytes)) { + fprintf (stderr, "Error: Test '%s' failed:\n\t", test); + + fprintf (stderr, "Expected=["); + for (i = 0; i < num_bytes; i++) { + if (i != 0) + fprintf(stderr, ", "); + fprintf(stderr, "0x%02x", expected[i]); + } + fprintf (stderr, "]"); + + fprintf (stderr, "Actual=["); + for (i = 0; i < num_bytes; i++) { + if (i != 0) + fprintf(stderr, ", "); + fprintf(stderr, "0x%02x", actual[i]); + } + fprintf (stderr, "]\n"); + + error = true; + } +} + +/* Test at least one call of each blob_write_foo and blob_read_foo function, + * verifying that we read out everything we wrote, that every bytes is + * consumed, and that the overrun bit is not set. + */ +static void +test_write_and_read_functions (void) +{ + void *ctx = ralloc_context(NULL); + struct blob *blob; + struct blob_reader reader; + uint8_t *reserved; + size_t str_offset, uint_offset; + uint8_t reserve_buf[sizeof(reserve_test_str)]; + + blob = blob_create(ctx); + + /*** Test blob by writing one of every possible kind of value. */ + + blob_write_bytes(blob, bytes_test_str, sizeof(bytes_test_str)); + + reserved = blob_reserve_bytes(blob, sizeof(reserve_test_str)); + memcpy(reserved, reserve_test_str, sizeof(reserve_test_str)); + + /* Write a placeholder, (to be replaced later via overwrite_bytes) */ + str_offset = blob->size; + blob_write_bytes(blob, placeholder_str, sizeof(placeholder_str)); + + blob_write_uint32(blob, uint32_test); + + /* Write a placeholder, (to be replaced later via overwrite_uint32) */ + uint_offset = blob->size; + blob_write_uint32(blob, uint32_placeholder); + + blob_write_uint64(blob, uint64_test); + + blob_write_intptr(blob, (intptr_t) blob); + + blob_write_string(blob, string_test_str); + + /* Finally, overwrite our placeholders. */ + blob_overwrite_bytes(blob, str_offset, overwrite_test_str, + sizeof(overwrite_test_str)); + blob_overwrite_uint32(blob, uint_offset, uint32_overwrite); + + /*** Now read each value and verify. */ + blob_reader_init(&reader, blob->data, blob->size); + + expect_equal_str(bytes_test_str, + blob_read_bytes(&reader, sizeof(bytes_test_str)), + "blob_write/read_bytes"); + + blob_copy_bytes(&reader, reserve_buf, sizeof(reserve_buf)); + expect_equal_str(reserve_test_str, (char *) reserve_buf, + "blob_reserve_bytes/blob_copy_bytes"); + + expect_equal_str(overwrite_test_str, + blob_read_bytes(&reader, sizeof(overwrite_test_str)), + "blob_overwrite_bytes"); + + expect_equal(uint32_test, blob_read_uint32(&reader), + "blob_write/read_uint32"); + expect_equal(uint32_overwrite, blob_read_uint32(&reader), + "blob_overwrite_uint32"); + expect_equal(uint64_test, blob_read_uint64(&reader), + "blob_write/read_uint64"); + expect_equal((intptr_t) blob, blob_read_intptr(&reader), + "blob_write/read_intptr"); + expect_equal_str(string_test_str, blob_read_string(&reader), + "blob_write/read_string"); + + expect_equal(reader.end - reader.data, reader.current - reader.data, + "read_consumes_all_bytes"); + expect_equal(false, reader.overrun, "read_does_not_overrun"); + + ralloc_free(ctx); +} + +/* Test that data values are written and read with proper alignment. */ +static void +test_alignment(void) +{ + void *ctx = ralloc_context(NULL); + struct blob *blob; + struct blob_reader reader; + uint8_t bytes[] = "ABCDEFGHIJKLMNOP"; + size_t delta, last, num_bytes; + + blob = blob_create(ctx); + + /* First, write an intptr value to the blob and capture that size. This is + * the expected offset between any pair of intptr values (if written with + * alignment). + */ + blob_write_intptr(blob, (intptr_t) blob); + + delta = blob->size; + last = blob->size; + + /* Then loop doing the following: + * + * 1. Write an unaligned number of bytes + * 2. Verify that write results in an unaligned size + * 3. Write an intptr_t value + * 2. Verify that that write results in an aligned size + */ + for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) { + blob_write_bytes(blob, bytes, num_bytes); + + expect_unequal(delta, blob->size - last, "unaligned write of bytes"); + + blob_write_intptr(blob, (intptr_t) blob); + + expect_equal(2 * delta, blob->size - last, "aligned write of intptr"); + + last = blob->size; + } + + /* Finally, test that reading also does proper alignment. Since we know + * that values were written with all the right alignment, all we have to do + * here is verify that correct values are read. + */ + blob_reader_init(&reader, blob->data, blob->size); + + expect_equal((intptr_t) blob, blob_read_intptr(&reader), + "read of initial, aligned intptr_t"); + + for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) { + expect_equal_bytes(bytes, blob_read_bytes(&reader, num_bytes), + num_bytes, "unaligned read of bytes"); + expect_equal((intptr_t) blob, blob_read_intptr(&reader), + "aligned read of intptr_t"); + } + + ralloc_free(ctx); +} + +/* Test that we detect overrun. */ +static void +test_overrun(void) +{ + void *ctx =ralloc_context(NULL); + struct blob *blob; + struct blob_reader reader; + uint32_t value = 0xdeadbeef; + + blob = blob_create(ctx); + + blob_write_uint32(blob, value); + + blob_reader_init(&reader, blob->data, blob->size); + + expect_equal(value, blob_read_uint32(&reader), "read before overrun"); + expect_equal(false, reader.overrun, "overrun flag not set"); + expect_equal(0, blob_read_uint32(&reader), "read at overrun"); + expect_equal(true, reader.overrun, "overrun flag set"); + + ralloc_free(ctx); +} + +/* Test that we can read and write some large objects, (exercising the code in + * the blob_write functions to realloc blob->data. + */ +static void +test_big_objects(void) +{ + void *ctx = ralloc_context(NULL); + struct blob *blob; + struct blob_reader reader; + int size = 1000; + int count = 1000; + size_t i; + char *buf; + + blob = blob_create(ctx); + + /* Initialize our buffer. */ + buf = ralloc_size(ctx, size); + for (i = 0; i < size; i++) { + buf[i] = i % 256; + } + + /* Write it many times. */ + for (i = 0; i < count; i++) { + blob_write_bytes(blob, buf, size); + } + + blob_reader_init(&reader, blob->data, blob->size); + + /* Read and verify it many times. */ + for (i = 0; i < count; i++) { + expect_equal_bytes((uint8_t *) buf, blob_read_bytes(&reader, size), size, + "read of large objects"); + } + + expect_equal(reader.end - reader.data, reader.current - reader.data, + "number of bytes read reading large objects"); + + expect_equal(false, reader.overrun, + "overrun flag not set reading large objects"); + + ralloc_free(ctx); +} + +int +main (void) +{ + test_write_and_read_functions (); + test_alignment (); + test_overrun (); + test_big_objects (); + + return error ? 1 : 0; +} diff --git a/src/compiler/glsl/tests/builtin_variable_test.cpp b/src/compiler/glsl/tests/builtin_variable_test.cpp new file mode 100644 index 0000000..11e384a --- /dev/null +++ b/src/compiler/glsl/tests/builtin_variable_test.cpp @@ -0,0 +1,393 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <gtest/gtest.h> +#include "standalone_scaffolding.h" +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "ir.h" +#include "glsl_parser_extras.h" +#include "glsl_symbol_table.h" + +class common_builtin : public ::testing::Test { +public: + common_builtin(GLenum shader_type) + : shader_type(shader_type) + { + /* empty */ + } + + virtual void SetUp(); + virtual void TearDown(); + + void string_starts_with_prefix(const char *str, const char *prefix); + void names_start_with_gl(); + void uniforms_and_system_values_dont_have_explicit_location(); + void constants_are_constant(); + void no_invalid_variable_modes(); + + GLenum shader_type; + struct _mesa_glsl_parse_state *state; + struct gl_shader *shader; + void *mem_ctx; + gl_context ctx; + exec_list ir; +}; + +void +common_builtin::SetUp() +{ + this->mem_ctx = ralloc_context(NULL); + this->ir.make_empty(); + + initialize_context_to_defaults(&this->ctx, API_OPENGL_COMPAT); + + this->shader = rzalloc(this->mem_ctx, gl_shader); + this->shader->Type = this->shader_type; + this->shader->Stage = _mesa_shader_enum_to_shader_stage(this->shader_type); + + this->state = + new(mem_ctx) _mesa_glsl_parse_state(&this->ctx, this->shader->Stage, + this->shader); + + _mesa_glsl_initialize_types(this->state); + _mesa_glsl_initialize_variables(&this->ir, this->state); +} + +void +common_builtin::TearDown() +{ + ralloc_free(this->mem_ctx); + this->mem_ctx = NULL; +} + +void +common_builtin::string_starts_with_prefix(const char *str, const char *prefix) +{ + const size_t len = strlen(prefix); + char *const name_prefix = new char[len + 1]; + + strncpy(name_prefix, str, len); + name_prefix[len] = '\0'; + EXPECT_STREQ(prefix, name_prefix) << "Bad name " << str; + + delete [] name_prefix; +} + +void +common_builtin::names_start_with_gl() +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + string_starts_with_prefix(var->name, "gl_"); + } +} + +void +common_builtin::uniforms_and_system_values_dont_have_explicit_location() +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_uniform && var->data.mode != ir_var_system_value) + continue; + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + } +} + +void +common_builtin::constants_are_constant() +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_auto) + continue; + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + EXPECT_TRUE(var->data.read_only); + } +} + +void +common_builtin::no_invalid_variable_modes() +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + switch (var->data.mode) { + case ir_var_auto: + case ir_var_uniform: + case ir_var_shader_in: + case ir_var_shader_out: + case ir_var_system_value: + break; + + default: + ADD_FAILURE() << "Built-in variable " << var->name + << " has an invalid mode " << int(var->data.mode); + break; + } + } +} + +/************************************************************/ + +class vertex_builtin : public common_builtin { +public: + vertex_builtin() + : common_builtin(GL_VERTEX_SHADER) + { + /* empty */ + } +}; + +TEST_F(vertex_builtin, names_start_with_gl) +{ + common_builtin::names_start_with_gl(); +} + +TEST_F(vertex_builtin, inputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_in) + continue; + + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + EXPECT_GT(VERT_ATTRIB_GENERIC0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + } +} + +TEST_F(vertex_builtin, outputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_out) + continue; + + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + + /* Several varyings only exist in the fragment shader. Be sure that no + * outputs with these locations exist. + */ + EXPECT_NE(VARYING_SLOT_PNTC, var->data.location); + EXPECT_NE(VARYING_SLOT_FACE, var->data.location); + EXPECT_NE(VARYING_SLOT_PRIMITIVE_ID, var->data.location); + } +} + +TEST_F(vertex_builtin, uniforms_and_system_values_dont_have_explicit_location) +{ + common_builtin::uniforms_and_system_values_dont_have_explicit_location(); +} + +TEST_F(vertex_builtin, constants_are_constant) +{ + common_builtin::constants_are_constant(); +} + +TEST_F(vertex_builtin, no_invalid_variable_modes) +{ + common_builtin::no_invalid_variable_modes(); +} + +/********************************************************************/ + +class fragment_builtin : public common_builtin { +public: + fragment_builtin() + : common_builtin(GL_FRAGMENT_SHADER) + { + /* empty */ + } +}; + +TEST_F(fragment_builtin, names_start_with_gl) +{ + common_builtin::names_start_with_gl(); +} + +TEST_F(fragment_builtin, inputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_in) + continue; + + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + + /* Several varyings only exist in the vertex / geometry shader. Be sure + * that no inputs with these locations exist. + */ + EXPECT_TRUE(_mesa_varying_slot_in_fs((gl_varying_slot) var->data.location)); + } +} + +TEST_F(fragment_builtin, outputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_out) + continue; + + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + + /* gl_FragData[] has location FRAG_RESULT_DATA0. Locations beyond that + * are invalid. + */ + EXPECT_GE(FRAG_RESULT_DATA0, var->data.location); + + EXPECT_EQ(0u, var->data.location_frac); + } +} + +TEST_F(fragment_builtin, uniforms_and_system_values_dont_have_explicit_location) +{ + common_builtin::uniforms_and_system_values_dont_have_explicit_location(); +} + +TEST_F(fragment_builtin, constants_are_constant) +{ + common_builtin::constants_are_constant(); +} + +TEST_F(fragment_builtin, no_invalid_variable_modes) +{ + common_builtin::no_invalid_variable_modes(); +} + +/********************************************************************/ + +class geometry_builtin : public common_builtin { +public: + geometry_builtin() + : common_builtin(GL_GEOMETRY_SHADER) + { + /* empty */ + } +}; + +TEST_F(geometry_builtin, names_start_with_gl) +{ + common_builtin::names_start_with_gl(); +} + +TEST_F(geometry_builtin, inputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_in) + continue; + + if (var->is_interface_instance()) { + EXPECT_STREQ("gl_in", var->name); + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + ASSERT_TRUE(var->type->is_array()); + + const glsl_type *const instance_type = var->type->fields.array; + + for (unsigned i = 0; i < instance_type->length; i++) { + const glsl_struct_field *const input = + &instance_type->fields.structure[i]; + + string_starts_with_prefix(input->name, "gl_"); + EXPECT_NE(-1, input->location); + EXPECT_GT(VARYING_SLOT_VAR0, input->location); + + /* Several varyings only exist in the fragment shader. Be sure + * that no inputs with these locations exist. + */ + EXPECT_NE(VARYING_SLOT_PNTC, input->location); + EXPECT_NE(VARYING_SLOT_FACE, input->location); + } + } else { + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + } + + /* Several varyings only exist in the fragment shader. Be sure that no + * inputs with these locations exist. + */ + EXPECT_NE(VARYING_SLOT_PNTC, var->data.location); + EXPECT_NE(VARYING_SLOT_FACE, var->data.location); + } +} + +TEST_F(geometry_builtin, outputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_out) + continue; + + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + + /* Several varyings only exist in the fragment shader. Be sure that no + * outputs with these locations exist. + */ + EXPECT_NE(VARYING_SLOT_PNTC, var->data.location); + EXPECT_NE(VARYING_SLOT_FACE, var->data.location); + } +} + +TEST_F(geometry_builtin, uniforms_and_system_values_dont_have_explicit_location) +{ + common_builtin::uniforms_and_system_values_dont_have_explicit_location(); +} + +TEST_F(geometry_builtin, constants_are_constant) +{ + common_builtin::constants_are_constant(); +} + +TEST_F(geometry_builtin, no_invalid_variable_modes) +{ + common_builtin::no_invalid_variable_modes(); +} diff --git a/src/compiler/glsl/tests/compare_ir b/src/compiler/glsl/tests/compare_ir new file mode 100755 index 0000000..a40fc81 --- /dev/null +++ b/src/compiler/glsl/tests/compare_ir @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Compare two files containing IR code. Ignore formatting differences +# and declaration order. + +import os +import os.path +import subprocess +import sys +import tempfile + +from sexps import * + +if len(sys.argv) != 3: + print 'Usage: compare_ir <file1> <file2>' + exit(1) + +with open(sys.argv[1]) as f: + ir1 = sort_decls(parse_sexp(f.read())) +with open(sys.argv[2]) as f: + ir2 = sort_decls(parse_sexp(f.read())) + +if ir1 == ir2: + exit(0) +else: + file1, path1 = tempfile.mkstemp(os.path.basename(sys.argv[1])) + file2, path2 = tempfile.mkstemp(os.path.basename(sys.argv[2])) + try: + os.write(file1, '{0}\n'.format(sexp_to_string(ir1))) + os.close(file1) + os.write(file2, '{0}\n'.format(sexp_to_string(ir2))) + os.close(file2) + subprocess.call(['diff', '-u', path1, path2]) + finally: + os.remove(path1) + os.remove(path2) + exit(1) diff --git a/src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp b/src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp new file mode 100644 index 0000000..cd48bc5 --- /dev/null +++ b/src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp @@ -0,0 +1,300 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <gtest/gtest.h> +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "util/ralloc.h" +#include "uniform_initializer_utils.h" + +namespace linker { +extern void +copy_constant_to_storage(union gl_constant_value *storage, + const ir_constant *val, + const enum glsl_base_type base_type, + const unsigned int elements, + unsigned int boolean_true); +} + +class copy_constant_to_storage : public ::testing::Test { +public: + void int_test(unsigned rows); + void uint_test(unsigned rows); + void bool_test(unsigned rows); + void sampler_test(); + void float_test(unsigned columns, unsigned rows); + + virtual void SetUp(); + virtual void TearDown(); + + gl_constant_value storage[17]; + void *mem_ctx; +}; + +void +copy_constant_to_storage::SetUp() +{ + this->mem_ctx = ralloc_context(NULL); +} + +void +copy_constant_to_storage::TearDown() +{ + ralloc_free(this->mem_ctx); + this->mem_ctx = NULL; +} + +void +copy_constant_to_storage::int_test(unsigned rows) +{ + ir_constant *val; + generate_data(mem_ctx, GLSL_TYPE_INT, 1, rows, val); + + const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); + fill_storage_array_with_sentinels(storage, + val->type->components(), + red_zone_size); + + linker::copy_constant_to_storage(storage, + val, + val->type->base_type, + val->type->components(), + 0xF00F); + + verify_data(storage, 0, val, red_zone_size, 0xF00F); +} + +void +copy_constant_to_storage::uint_test(unsigned rows) +{ + ir_constant *val; + generate_data(mem_ctx, GLSL_TYPE_UINT, 1, rows, val); + + const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); + fill_storage_array_with_sentinels(storage, + val->type->components(), + red_zone_size); + + linker::copy_constant_to_storage(storage, + val, + val->type->base_type, + val->type->components(), + 0xF00F); + + verify_data(storage, 0, val, red_zone_size, 0xF00F); +} + +void +copy_constant_to_storage::float_test(unsigned columns, unsigned rows) +{ + ir_constant *val; + generate_data(mem_ctx, GLSL_TYPE_FLOAT, columns, rows, val); + + const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); + fill_storage_array_with_sentinels(storage, + val->type->components(), + red_zone_size); + + linker::copy_constant_to_storage(storage, + val, + val->type->base_type, + val->type->components(), + 0xF00F); + + verify_data(storage, 0, val, red_zone_size, 0xF00F); +} + +void +copy_constant_to_storage::bool_test(unsigned rows) +{ + ir_constant *val; + generate_data(mem_ctx, GLSL_TYPE_BOOL, 1, rows, val); + + const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); + fill_storage_array_with_sentinels(storage, + val->type->components(), + red_zone_size); + + linker::copy_constant_to_storage(storage, + val, + val->type->base_type, + val->type->components(), + 0xF00F); + + verify_data(storage, 0, val, red_zone_size, 0xF00F); +} + +/** + * The only difference between this test and int_test is that the base type + * passed to \c linker::copy_constant_to_storage is hard-coded to \c + * GLSL_TYPE_SAMPLER instead of using the base type from the constant. + */ +void +copy_constant_to_storage::sampler_test(void) +{ + ir_constant *val; + generate_data(mem_ctx, GLSL_TYPE_INT, 1, 1, val); + + const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); + fill_storage_array_with_sentinels(storage, + val->type->components(), + red_zone_size); + + linker::copy_constant_to_storage(storage, + val, + GLSL_TYPE_SAMPLER, + val->type->components(), + 0xF00F); + + verify_data(storage, 0, val, red_zone_size, 0xF00F); +} + +TEST_F(copy_constant_to_storage, bool_uniform) +{ + bool_test(1); +} + +TEST_F(copy_constant_to_storage, bvec2_uniform) +{ + bool_test(2); +} + +TEST_F(copy_constant_to_storage, bvec3_uniform) +{ + bool_test(3); +} + +TEST_F(copy_constant_to_storage, bvec4_uniform) +{ + bool_test(4); +} + +TEST_F(copy_constant_to_storage, int_uniform) +{ + int_test(1); +} + +TEST_F(copy_constant_to_storage, ivec2_uniform) +{ + int_test(2); +} + +TEST_F(copy_constant_to_storage, ivec3_uniform) +{ + int_test(3); +} + +TEST_F(copy_constant_to_storage, ivec4_uniform) +{ + int_test(4); +} + +TEST_F(copy_constant_to_storage, uint_uniform) +{ + uint_test(1); +} + +TEST_F(copy_constant_to_storage, uvec2_uniform) +{ + uint_test(2); +} + +TEST_F(copy_constant_to_storage, uvec3_uniform) +{ + uint_test(3); +} + +TEST_F(copy_constant_to_storage, uvec4_uniform) +{ + uint_test(4); +} + +TEST_F(copy_constant_to_storage, float_uniform) +{ + float_test(1, 1); +} + +TEST_F(copy_constant_to_storage, vec2_uniform) +{ + float_test(1, 2); +} + +TEST_F(copy_constant_to_storage, vec3_uniform) +{ + float_test(1, 3); +} + +TEST_F(copy_constant_to_storage, vec4_uniform) +{ + float_test(1, 4); +} + +TEST_F(copy_constant_to_storage, mat2x2_uniform) +{ + float_test(2, 2); +} + +TEST_F(copy_constant_to_storage, mat2x3_uniform) +{ + float_test(2, 3); +} + +TEST_F(copy_constant_to_storage, mat2x4_uniform) +{ + float_test(2, 4); +} + +TEST_F(copy_constant_to_storage, mat3x2_uniform) +{ + float_test(3, 2); +} + +TEST_F(copy_constant_to_storage, mat3x3_uniform) +{ + float_test(3, 3); +} + +TEST_F(copy_constant_to_storage, mat3x4_uniform) +{ + float_test(3, 4); +} + +TEST_F(copy_constant_to_storage, mat4x2_uniform) +{ + float_test(4, 2); +} + +TEST_F(copy_constant_to_storage, mat4x3_uniform) +{ + float_test(4, 3); +} + +TEST_F(copy_constant_to_storage, mat4x4_uniform) +{ + float_test(4, 4); +} + +TEST_F(copy_constant_to_storage, sampler_uniform) +{ + sampler_test(); +} diff --git a/src/compiler/glsl/tests/general_ir_test.cpp b/src/compiler/glsl/tests/general_ir_test.cpp new file mode 100644 index 0000000..217305b --- /dev/null +++ b/src/compiler/glsl/tests/general_ir_test.cpp @@ -0,0 +1,80 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <gtest/gtest.h> +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "ir.h" + +TEST(ir_variable_constructor, interface) +{ + void *mem_ctx = ralloc_context(NULL); + + static const glsl_struct_field f[] = { + glsl_struct_field(glsl_type::vec(4), "v") + }; + + const glsl_type *const interface = + glsl_type::get_interface_instance(f, + ARRAY_SIZE(f), + GLSL_INTERFACE_PACKING_STD140, + "simple_interface"); + + static const char name[] = "named_instance"; + + ir_variable *const v = + new(mem_ctx) ir_variable(interface, name, ir_var_uniform); + + EXPECT_STREQ(name, v->name); + EXPECT_NE(name, v->name); + EXPECT_EQ(interface, v->type); + EXPECT_EQ(interface, v->get_interface_type()); +} + +TEST(ir_variable_constructor, interface_array) +{ + void *mem_ctx = ralloc_context(NULL); + + static const glsl_struct_field f[] = { + glsl_struct_field(glsl_type::vec(4), "v") + }; + + const glsl_type *const interface = + glsl_type::get_interface_instance(f, + ARRAY_SIZE(f), + GLSL_INTERFACE_PACKING_STD140, + "simple_interface"); + + const glsl_type *const interface_array = + glsl_type::get_array_instance(interface, 2); + + static const char name[] = "array_instance"; + + ir_variable *const v = + new(mem_ctx) ir_variable(interface_array, name, ir_var_uniform); + + EXPECT_STREQ(name, v->name); + EXPECT_NE(name, v->name); + EXPECT_EQ(interface_array, v->type); + EXPECT_EQ(interface, v->get_interface_type()); +} diff --git a/src/compiler/glsl/tests/invalidate_locations_test.cpp b/src/compiler/glsl/tests/invalidate_locations_test.cpp new file mode 100644 index 0000000..ba94d7e --- /dev/null +++ b/src/compiler/glsl/tests/invalidate_locations_test.cpp @@ -0,0 +1,196 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <gtest/gtest.h> +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "util/ralloc.h" +#include "ir.h" +#include "linker.h" + +/** + * \file varyings_test.cpp + * + * Test various aspects of linking shader stage inputs and outputs. + */ + +class invalidate_locations : public ::testing::Test { +public: + virtual void SetUp(); + virtual void TearDown(); + + void *mem_ctx; + exec_list ir; +}; + +void +invalidate_locations::SetUp() +{ + this->mem_ctx = ralloc_context(NULL); + this->ir.make_empty(); +} + +void +invalidate_locations::TearDown() +{ + ralloc_free(this->mem_ctx); + this->mem_ctx = NULL; +} + +TEST_F(invalidate_locations, simple_vertex_in_generic) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_in); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VERT_ATTRIB_GENERIC0; + var->data.location_frac = 2; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(-1, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + EXPECT_FALSE(var->data.explicit_location); + EXPECT_TRUE(var->data.is_unmatched_generic_inout); +} + +TEST_F(invalidate_locations, explicit_location_vertex_in_generic) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_in); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VERT_ATTRIB_GENERIC0; + var->data.explicit_location = true; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(VERT_ATTRIB_GENERIC0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + EXPECT_TRUE(var->data.explicit_location); + EXPECT_FALSE(var->data.is_unmatched_generic_inout); +} + +TEST_F(invalidate_locations, explicit_location_frac_vertex_in_generic) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_in); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VERT_ATTRIB_GENERIC0; + var->data.location_frac = 2; + var->data.explicit_location = true; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(VERT_ATTRIB_GENERIC0, var->data.location); + EXPECT_EQ(2u, var->data.location_frac); + EXPECT_TRUE(var->data.explicit_location); + EXPECT_FALSE(var->data.is_unmatched_generic_inout); +} + +TEST_F(invalidate_locations, vertex_in_builtin) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "gl_Vertex", + ir_var_shader_in); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VERT_ATTRIB_POS; + var->data.explicit_location = true; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(VERT_ATTRIB_POS, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + EXPECT_TRUE(var->data.explicit_location); + EXPECT_FALSE(var->data.is_unmatched_generic_inout); +} + +TEST_F(invalidate_locations, simple_vertex_out_generic) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_out); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VARYING_SLOT_VAR0; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(-1, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + EXPECT_FALSE(var->data.explicit_location); + EXPECT_TRUE(var->data.is_unmatched_generic_inout); +} + +TEST_F(invalidate_locations, vertex_out_builtin) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "gl_FrontColor", + ir_var_shader_out); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VARYING_SLOT_COL0; + var->data.explicit_location = true; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(VARYING_SLOT_COL0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + EXPECT_TRUE(var->data.explicit_location); + EXPECT_FALSE(var->data.is_unmatched_generic_inout); +} diff --git a/src/compiler/glsl/tests/lower_jumps/.gitignore b/src/compiler/glsl/tests/lower_jumps/.gitignore new file mode 100644 index 0000000..e98df62 --- /dev/null +++ b/src/compiler/glsl/tests/lower_jumps/.gitignore @@ -0,0 +1,3 @@ +*.opt_test +*.expected +*.out diff --git a/src/compiler/glsl/tests/lower_jumps/create_test_cases.py b/src/compiler/glsl/tests/lower_jumps/create_test_cases.py new file mode 100644 index 0000000..3be1079 --- /dev/null +++ b/src/compiler/glsl/tests/lower_jumps/create_test_cases.py @@ -0,0 +1,643 @@ +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import os +import os.path +import re +import subprocess +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py, which is in parent dir +from sexps import * + +def make_test_case(f_name, ret_type, body): + """Create a simple optimization test case consisting of a single + function with the given name, return type, and body. + + Global declarations are automatically created for any undeclared + variables that are referenced by the function. All undeclared + variables are assumed to be floats. + """ + check_sexp(body) + declarations = {} + def make_declarations(sexp, already_declared = ()): + if isinstance(sexp, list): + if len(sexp) == 2 and sexp[0] == 'var_ref': + if sexp[1] not in already_declared: + declarations[sexp[1]] = [ + 'declare', ['in'], 'float', sexp[1]] + elif len(sexp) == 4 and sexp[0] == 'assign': + assert sexp[2][0] == 'var_ref' + if sexp[2][1] not in already_declared: + declarations[sexp[2][1]] = [ + 'declare', ['out'], 'float', sexp[2][1]] + make_declarations(sexp[3], already_declared) + else: + already_declared = set(already_declared) + for s in sexp: + if isinstance(s, list) and len(s) >= 4 and \ + s[0] == 'declare': + already_declared.add(s[3]) + else: + make_declarations(s, already_declared) + make_declarations(body) + return declarations.values() + \ + [['function', f_name, ['signature', ret_type, ['parameters'], body]]] + + +# The following functions can be used to build expressions. + +def const_float(value): + """Create an expression representing the given floating point value.""" + return ['constant', 'float', ['{0:.6f}'.format(value)]] + +def const_bool(value): + """Create an expression representing the given boolean value. + + If value is not a boolean, it is converted to a boolean. So, for + instance, const_bool(1) is equivalent to const_bool(True). + """ + return ['constant', 'bool', ['{0}'.format(1 if value else 0)]] + +def gt_zero(var_name): + """Create Construct the expression var_name > 0""" + return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)] + + +# The following functions can be used to build complex control flow +# statements. All of these functions return statement lists (even +# those which only create a single statement), so that statements can +# be sequenced together using the '+' operator. + +def return_(value = None): + """Create a return statement.""" + if value is not None: + return [['return', value]] + else: + return [['return']] + +def break_(): + """Create a break statement.""" + return ['break'] + +def continue_(): + """Create a continue statement.""" + return ['continue'] + +def simple_if(var_name, then_statements, else_statements = None): + """Create a statement of the form + + if (var_name > 0.0) { + <then_statements> + } else { + <else_statements> + } + + else_statements may be omitted. + """ + if else_statements is None: + else_statements = [] + check_sexp(then_statements) + check_sexp(else_statements) + return [['if', gt_zero(var_name), then_statements, else_statements]] + +def loop(statements): + """Create a loop containing the given statements as its loop + body. + """ + check_sexp(statements) + return [['loop', statements]] + +def declare_temp(var_type, var_name): + """Create a declaration of the form + + (declare (temporary) <var_type> <var_name) + """ + return [['declare', ['temporary'], var_type, var_name]] + +def assign_x(var_name, value): + """Create a statement that assigns <value> to the variable + <var_name>. The assignment uses the mask (x). + """ + check_sexp(value) + return [['assign', ['x'], ['var_ref', var_name], value]] + +def complex_if(var_prefix, statements): + """Create a statement of the form + + if (<var_prefix>a > 0.0) { + if (<var_prefix>b > 0.0) { + <statements> + } + } + + This is useful in testing jump lowering, because if <statements> + ends in a jump, lower_jumps.cpp won't try to combine this + construct with the code that follows it, as it might do for a + simple if. + + All variables used in the if statement are prefixed with + var_prefix. This can be used to ensure uniqueness. + """ + check_sexp(statements) + return simple_if(var_prefix + 'a', simple_if(var_prefix + 'b', statements)) + +def declare_execute_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean execute_flag. + """ + return declare_temp('bool', 'execute_flag') + \ + assign_x('execute_flag', const_bool(True)) + +def declare_return_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean return_flag. + """ + return declare_temp('bool', 'return_flag') + \ + assign_x('return_flag', const_bool(False)) + +def declare_return_value(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary variable return_value. Assume that + return_value is a float. + """ + return declare_temp('float', 'return_value') + +def declare_break_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean break_flag. + """ + return declare_temp('bool', 'break_flag') + \ + assign_x('break_flag', const_bool(False)) + +def lowered_return_simple(value = None): + """Create the statements that lower_jumps.cpp lowers a return + statement to, in situations where it does not need to clear the + execute flag. + """ + if value: + result = assign_x('return_value', value) + else: + result = [] + return result + assign_x('return_flag', const_bool(True)) + +def lowered_return(value = None): + """Create the statements that lower_jumps.cpp lowers a return + statement to, in situations where it needs to clear the execute + flag. + """ + return lowered_return_simple(value) + \ + assign_x('execute_flag', const_bool(False)) + +def lowered_continue(): + """Create the statement that lower_jumps.cpp lowers a continue + statement to. + """ + return assign_x('execute_flag', const_bool(False)) + +def lowered_break_simple(): + """Create the statement that lower_jumps.cpp lowers a break + statement to, in situations where it does not need to clear the + execute flag. + """ + return assign_x('break_flag', const_bool(True)) + +def lowered_break(): + """Create the statement that lower_jumps.cpp lowers a break + statement to, in situations where it needs to clear the execute + flag. + """ + return lowered_break_simple() + assign_x('execute_flag', const_bool(False)) + +def if_execute_flag(statements): + """Wrap statements in an if test so that they will only execute if + execute_flag is True. + """ + check_sexp(statements) + return [['if', ['var_ref', 'execute_flag'], statements, []]] + +def if_not_return_flag(statements): + """Wrap statements in an if test so that they will only execute if + return_flag is False. + """ + check_sexp(statements) + return [['if', ['var_ref', 'return_flag'], [], statements]] + +def final_return(): + """Create the return statement that lower_jumps.cpp places at the + end of a function when lowering returns. + """ + return [['return', ['var_ref', 'return_value']]] + +def final_break(): + """Create the conditional break statement that lower_jumps.cpp + places at the end of a function when lowering breaks. + """ + return [['if', ['var_ref', 'break_flag'], break_(), []]] + +def bash_quote(*args): + """Quote the arguments appropriately so that bash will understand + each argument as a single word. + """ + def quote_word(word): + for c in word: + if not (c.isalpha() or c.isdigit() or c in '@%_-+=:,./'): + break + else: + if not word: + return "''" + return word + return "'{0}'".format(word.replace("'", "'\"'\"'")) + return ' '.join(quote_word(word) for word in args) + +def create_test_case(doc_string, input_sexp, expected_sexp, test_name, + pull_out_jumps=False, lower_sub_return=False, + lower_main_return=False, lower_continue=False, + lower_break=False): + """Create a test case that verifies that do_lower_jumps transforms + the given code in the expected way. + """ + doc_lines = [line.strip() for line in doc_string.splitlines()] + doc_string = ''.join('# {0}\n'.format(line) for line in doc_lines if line != '') + check_sexp(input_sexp) + check_sexp(expected_sexp) + input_str = sexp_to_string(sort_decls(input_sexp)) + expected_output = sexp_to_string(sort_decls(expected_sexp)) + + optimization = ( + 'do_lower_jumps({0:d}, {1:d}, {2:d}, {3:d}, {4:d})'.format( + pull_out_jumps, lower_sub_return, lower_main_return, + lower_continue, lower_break)) + args = ['../../glsl_test', 'optpass', '--quiet', '--input-ir', optimization] + test_file = '{0}.opt_test'.format(test_name) + with open(test_file, 'w') as f: + f.write('#!/usr/bin/env bash\n#\n# This file was generated by create_test_cases.py.\n#\n') + f.write(doc_string) + f.write('{0} <<EOF\n'.format(bash_quote(*args))) + f.write('{0}\nEOF\n'.format(input_str)) + os.chmod(test_file, 0774) + expected_file = '{0}.opt_test.expected'.format(test_name) + with open(expected_file, 'w') as f: + f.write('{0}\n'.format(expected_output)) + +def test_lower_returns_main(): + doc_string = """Test that do_lower_jumps respects the lower_main_return + flag in deciding whether to lower returns in the main + function. + """ + input_sexp = make_test_case('main', 'void', ( + complex_if('', return_()) + )) + expected_sexp = make_test_case('main', 'void', ( + declare_execute_flag() + + declare_return_flag() + + complex_if('', lowered_return()) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_main_true', + lower_main_return=True) + create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_main_false', + lower_main_return=False) + +def test_lower_returns_sub(): + doc_string = """Test that do_lower_jumps respects the lower_sub_return flag + in deciding whether to lower returns in subroutines. + """ + input_sexp = make_test_case('sub', 'void', ( + complex_if('', return_()) + )) + expected_sexp = make_test_case('sub', 'void', ( + declare_execute_flag() + + declare_return_flag() + + complex_if('', lowered_return()) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_sub_true', + lower_sub_return=True) + create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_sub_false', + lower_sub_return=False) + +def test_lower_returns_1(): + doc_string = """Test that a void return at the end of a function is + eliminated. + """ + input_sexp = make_test_case('main', 'void', ( + assign_x('a', const_float(1)) + + return_() + )) + expected_sexp = make_test_case('main', 'void', ( + assign_x('a', const_float(1)) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_1', + lower_main_return=True) + +def test_lower_returns_2(): + doc_string = """Test that lowering is not performed on a non-void return at + the end of subroutine. + """ + input_sexp = make_test_case('sub', 'float', ( + assign_x('a', const_float(1)) + + return_(const_float(1)) + )) + create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_2', + lower_sub_return=True) + +def test_lower_returns_3(): + doc_string = """Test lowering of returns when there is one nested inside a + complex structure of ifs, and one at the end of a function. + + In this case, the latter return needs to be lowered because it + will not be at the end of the function once the final return + is inserted. + """ + input_sexp = make_test_case('sub', 'float', ( + complex_if('', return_(const_float(1))) + + return_(const_float(2)) + )) + expected_sexp = make_test_case('sub', 'float', ( + declare_execute_flag() + + declare_return_value() + + declare_return_flag() + + complex_if('', lowered_return(const_float(1))) + + if_execute_flag(lowered_return(const_float(2))) + + final_return() + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_3', + lower_sub_return=True) + +def test_lower_returns_4(): + doc_string = """Test that returns are properly lowered when they occur in + both branches of an if-statement. + """ + input_sexp = make_test_case('sub', 'float', ( + simple_if('a', return_(const_float(1)), + return_(const_float(2))) + )) + expected_sexp = make_test_case('sub', 'float', ( + declare_execute_flag() + + declare_return_value() + + declare_return_flag() + + simple_if('a', lowered_return(const_float(1)), + lowered_return(const_float(2))) + + final_return() + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_4', + lower_sub_return=True) + +def test_lower_unified_returns(): + doc_string = """If both branches of an if statement end in a return, and + pull_out_jumps is True, then those returns should be lifted + outside the if and then properly lowered. + + Verify that this lowering occurs during the same pass as the + lowering of other returns by checking that extra temporary + variables aren't generated. + """ + input_sexp = make_test_case('main', 'void', ( + complex_if('a', return_()) + + simple_if('b', simple_if('c', return_(), return_())) + )) + expected_sexp = make_test_case('main', 'void', ( + declare_execute_flag() + + declare_return_flag() + + complex_if('a', lowered_return()) + + if_execute_flag(simple_if('b', (simple_if('c', [], []) + + lowered_return()))) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_unified_returns', + lower_main_return=True, pull_out_jumps=True) + +def test_lower_pulled_out_jump(): + doc_string = """If one branch of an if ends in a jump, and control cannot + fall out the bottom of the other branch, and pull_out_jumps is + True, then the jump is lifted outside the if. + + Verify that this lowering occurs during the same pass as the + lowering of other jumps by checking that extra temporary + variables aren't generated. + """ + input_sexp = make_test_case('main', 'void', ( + complex_if('a', return_()) + + loop(simple_if('b', simple_if('c', break_(), continue_()), + return_())) + + assign_x('d', const_float(1)) + )) + # Note: optimization produces two other effects: the break + # gets lifted out of the if statements, and the code after the + # loop gets guarded so that it only executes if the return + # flag is clear. + expected_sexp = make_test_case('main', 'void', ( + declare_execute_flag() + + declare_return_flag() + + complex_if('a', lowered_return()) + + if_execute_flag( + loop(simple_if('b', simple_if('c', [], continue_()), + lowered_return_simple()) + + break_()) + + if_not_return_flag(assign_x('d', const_float(1)))) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_pulled_out_jump', + lower_main_return=True, pull_out_jumps=True) + +def test_lower_breaks_1(): + doc_string = """If a loop contains an unconditional break at the bottom of + it, it should not be lowered.""" + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + break_()) + )) + expected_sexp = input_sexp + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_1', lower_break=True) + +def test_lower_breaks_2(): + doc_string = """If a loop contains a conditional break at the bottom of it, + it should not be lowered if it is in the then-clause. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + simple_if('b', break_())) + )) + expected_sexp = input_sexp + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_2', lower_break=True) + +def test_lower_breaks_3(): + doc_string = """If a loop contains a conditional break at the bottom of it, + it should not be lowered if it is in the then-clause, even if + there are statements preceding the break. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + simple_if('b', (assign_x('c', const_float(1)) + + break_()))) + )) + expected_sexp = input_sexp + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_3', lower_break=True) + +def test_lower_breaks_4(): + doc_string = """If a loop contains a conditional break at the bottom of it, + it should not be lowered if it is in the else-clause. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + simple_if('b', [], break_())) + )) + expected_sexp = input_sexp + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_4', lower_break=True) + +def test_lower_breaks_5(): + doc_string = """If a loop contains a conditional break at the bottom of it, + it should not be lowered if it is in the else-clause, even if + there are statements preceding the break. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + simple_if('b', [], (assign_x('c', const_float(1)) + + break_()))) + )) + expected_sexp = input_sexp + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_5', lower_break=True) + +def test_lower_breaks_6(): + doc_string = """If a loop contains conditional breaks and continues, and + ends in an unconditional break, then the unconditional break + needs to be lowered, because it will no longer be at the end + of the loop after the final break is added. + """ + input_sexp = make_test_case('main', 'void', ( + loop(simple_if('a', (complex_if('b', continue_()) + + complex_if('c', break_()))) + + break_()) + )) + expected_sexp = make_test_case('main', 'void', ( + declare_break_flag() + + loop(declare_execute_flag() + + simple_if( + 'a', + (complex_if('b', lowered_continue()) + + if_execute_flag( + complex_if('c', lowered_break())))) + + if_execute_flag(lowered_break_simple()) + + final_break()) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_6', + lower_break=True, lower_continue=True) + +def test_lower_guarded_conditional_break(): + doc_string = """Normally a conditional break at the end of a loop isn't + lowered, however if the conditional break gets placed inside + an if(execute_flag) because of earlier lowering of continues, + then the break needs to be lowered. + """ + input_sexp = make_test_case('main', 'void', ( + loop(complex_if('a', continue_()) + + simple_if('b', break_())) + )) + expected_sexp = make_test_case('main', 'void', ( + declare_break_flag() + + loop(declare_execute_flag() + + complex_if('a', lowered_continue()) + + if_execute_flag(simple_if('b', lowered_break())) + + final_break()) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'lower_guarded_conditional_break', + lower_break=True, lower_continue=True) + +def test_remove_continue_at_end_of_loop(): + doc_string = """Test that a redundant continue-statement at the end of a + loop is removed. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + continue_()) + )) + expected_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1))) + )) + create_test_case(doc_string, input_sexp, expected_sexp, 'remove_continue_at_end_of_loop') + +def test_lower_return_void_at_end_of_loop(): + doc_string = """Test that a return of void at the end of a loop is properly + lowered. + """ + input_sexp = make_test_case('main', 'void', ( + loop(assign_x('a', const_float(1)) + + return_()) + + assign_x('b', const_float(2)) + )) + expected_sexp = make_test_case('main', 'void', ( + declare_return_flag() + + loop(assign_x('a', const_float(1)) + + lowered_return_simple() + + break_()) + + if_not_return_flag(assign_x('b', const_float(2))) + )) + create_test_case(doc_string, input_sexp, input_sexp, 'return_void_at_end_of_loop_lower_nothing') + create_test_case(doc_string, input_sexp, expected_sexp, 'return_void_at_end_of_loop_lower_return', + lower_main_return=True) + create_test_case(doc_string, input_sexp, expected_sexp, 'return_void_at_end_of_loop_lower_return_and_break', + lower_main_return=True, lower_break=True) + +def test_lower_return_non_void_at_end_of_loop(): + doc_string = """Test that a non-void return at the end of a loop is + properly lowered. + """ + input_sexp = make_test_case('sub', 'float', ( + loop(assign_x('a', const_float(1)) + + return_(const_float(2))) + + assign_x('b', const_float(3)) + + return_(const_float(4)) + )) + expected_sexp = make_test_case('sub', 'float', ( + declare_execute_flag() + + declare_return_value() + + declare_return_flag() + + loop(assign_x('a', const_float(1)) + + lowered_return_simple(const_float(2)) + + break_()) + + if_not_return_flag(assign_x('b', const_float(3)) + + lowered_return(const_float(4))) + + final_return() + )) + create_test_case(doc_string, input_sexp, input_sexp, 'return_non_void_at_end_of_loop_lower_nothing') + create_test_case(doc_string, input_sexp, expected_sexp, 'return_non_void_at_end_of_loop_lower_return', + lower_sub_return=True) + create_test_case(doc_string, input_sexp, expected_sexp, 'return_non_void_at_end_of_loop_lower_return_and_break', + lower_sub_return=True, lower_break=True) + +if __name__ == '__main__': + test_lower_returns_main() + test_lower_returns_sub() + test_lower_returns_1() + test_lower_returns_2() + test_lower_returns_3() + test_lower_returns_4() + test_lower_unified_returns() + test_lower_pulled_out_jump() + test_lower_breaks_1() + test_lower_breaks_2() + test_lower_breaks_3() + test_lower_breaks_4() + test_lower_breaks_5() + test_lower_breaks_6() + test_lower_guarded_conditional_break() + test_remove_continue_at_end_of_loop() + test_lower_return_void_at_end_of_loop() + test_lower_return_non_void_at_end_of_loop() diff --git a/src/compiler/glsl/tests/optimization-test b/src/compiler/glsl/tests/optimization-test new file mode 100755 index 0000000..26a51be --- /dev/null +++ b/src/compiler/glsl/tests/optimization-test @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +if [ ! -z "$srcdir" ]; then + compare_ir=`pwd`/tests/compare_ir +else + compare_ir=./compare_ir +fi + +total=0 +pass=0 + +echo "====== Generating tests ======" +for dir in tests/*/; do + if [ -e "${dir}create_test_cases.py" ]; then + cd $dir; $PYTHON2 create_test_cases.py; cd .. + fi + echo "$dir" +done + +echo "====== Testing optimization passes ======" +for test in `find . -iname '*.opt_test'`; do + echo -n "Testing $test..." + (cd `dirname "$test"`; ./`basename "$test"`) > "$test.out" 2>&1 + total=$((total+1)) + if $PYTHON2 $PYTHON_FLAGS $compare_ir "$test.expected" "$test.out" >/dev/null 2>&1; then + echo "PASS" + pass=$((pass+1)) + else + echo "FAIL" + $PYTHON2 $PYTHON_FLAGS $compare_ir "$test.expected" "$test.out" + fi +done + +echo "" +echo "$pass/$total tests returned correct results" +echo "" + +if [[ $pass == $total ]]; then + exit 0 +else + exit 1 +fi diff --git a/src/compiler/glsl/tests/sampler_types_test.cpp b/src/compiler/glsl/tests/sampler_types_test.cpp new file mode 100644 index 0000000..04dd65e --- /dev/null +++ b/src/compiler/glsl/tests/sampler_types_test.cpp @@ -0,0 +1,100 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <gtest/gtest.h> +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "ir.h" + +/** + * \file sampler_types_test.cpp + * + * Test that built-in sampler types have the right properties. + */ + +#define ARRAY EXPECT_TRUE(type->sampler_array); +#define NONARRAY EXPECT_FALSE(type->sampler_array); +#define SHADOW EXPECT_TRUE(type->sampler_shadow); +#define COLOR EXPECT_FALSE(type->sampler_shadow); + +#define T(TYPE, DIM, DATA_TYPE, ARR, SHAD, COMPS) \ +TEST(sampler_types, TYPE) \ +{ \ + const glsl_type *type = glsl_type::TYPE##_type; \ + EXPECT_EQ(GLSL_TYPE_SAMPLER, type->base_type); \ + EXPECT_EQ(DIM, type->sampler_dimensionality); \ + EXPECT_EQ(DATA_TYPE, type->sampler_type); \ + ARR; \ + SHAD; \ + EXPECT_EQ(COMPS, type->coordinate_components()); \ +} + +T( sampler1D, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 1) +T( sampler2D, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) +T( sampler3D, GLSL_SAMPLER_DIM_3D, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 3) +T( samplerCube, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 3) +T( sampler1DArray, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, ARRAY, COLOR, 2) +T( sampler2DArray, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, ARRAY, COLOR, 3) +T( samplerCubeArray, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, ARRAY, COLOR, 4) +T( sampler2DRect, GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) +T( samplerBuffer, GLSL_SAMPLER_DIM_BUF, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 1) +T( sampler2DMS, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) +T( sampler2DMSArray, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_FLOAT, ARRAY, COLOR, 3) +T(isampler1D, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_INT, NONARRAY, COLOR, 1) +T(isampler2D, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_INT, NONARRAY, COLOR, 2) +T(isampler3D, GLSL_SAMPLER_DIM_3D, GLSL_TYPE_INT, NONARRAY, COLOR, 3) +T(isamplerCube, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_INT, NONARRAY, COLOR, 3) +T(isampler1DArray, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_INT, ARRAY, COLOR, 2) +T(isampler2DArray, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_INT, ARRAY, COLOR, 3) +T(isamplerCubeArray, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_INT, ARRAY, COLOR, 4) +T(isampler2DRect, GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_INT, NONARRAY, COLOR, 2) +T(isamplerBuffer, GLSL_SAMPLER_DIM_BUF, GLSL_TYPE_INT, NONARRAY, COLOR, 1) +T(isampler2DMS, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_INT, NONARRAY, COLOR, 2) +T(isampler2DMSArray, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_INT, ARRAY, COLOR, 3) +T(usampler1D, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_UINT, NONARRAY, COLOR, 1) +T(usampler2D, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_UINT, NONARRAY, COLOR, 2) +T(usampler3D, GLSL_SAMPLER_DIM_3D, GLSL_TYPE_UINT, NONARRAY, COLOR, 3) +T(usamplerCube, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_UINT, NONARRAY, COLOR, 3) +T(usampler1DArray, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_UINT, ARRAY, COLOR, 2) +T(usampler2DArray, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_UINT, ARRAY, COLOR, 3) +T(usamplerCubeArray, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_UINT, ARRAY, COLOR, 4) +T(usampler2DRect, GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_UINT, NONARRAY, COLOR, 2) +T(usamplerBuffer, GLSL_SAMPLER_DIM_BUF, GLSL_TYPE_UINT, NONARRAY, COLOR, 1) +T(usampler2DMS, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_UINT, NONARRAY, COLOR, 2) +T(usampler2DMSArray, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_UINT, ARRAY, COLOR, 3) + +T(sampler1DShadow, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 1) +T(sampler2DShadow, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 2) +T(samplerCubeShadow, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 3) + +T(sampler1DArrayShadow, + GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, ARRAY, SHADOW, 2) +T(sampler2DArrayShadow, + GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, ARRAY, SHADOW, 3) +T(samplerCubeArrayShadow, + GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, ARRAY, SHADOW, 4) +T(sampler2DRectShadow, + GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 2) + +T(samplerExternalOES, + GLSL_SAMPLER_DIM_EXTERNAL, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) diff --git a/src/compiler/glsl/tests/set_uniform_initializer_tests.cpp b/src/compiler/glsl/tests/set_uniform_initializer_tests.cpp new file mode 100644 index 0000000..0b1f66c --- /dev/null +++ b/src/compiler/glsl/tests/set_uniform_initializer_tests.cpp @@ -0,0 +1,594 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <gtest/gtest.h> +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "util/ralloc.h" +#include "uniform_initializer_utils.h" + +namespace linker { +extern void +set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, + const char *name, const glsl_type *type, + ir_constant *val, unsigned int boolean_true); +} + +class set_uniform_initializer : public ::testing::Test { +public: + virtual void SetUp(); + virtual void TearDown(); + + /** + * Index of the uniform to be tested. + * + * All of the \c set_uniform_initializer tests create several slots for + * unifroms. All but one of the slots is fake. This field holds the index + * of the slot for the uniform being tested. + */ + unsigned actual_index; + + /** + * Name of the uniform to be tested. + */ + const char *name; + + /** + * Shader program used in the test. + */ + struct gl_shader_program *prog; + + /** + * Ralloc memory context used for all temporary allocations. + */ + void *mem_ctx; +}; + +void +set_uniform_initializer::SetUp() +{ + this->mem_ctx = ralloc_context(NULL); + this->prog = rzalloc(NULL, struct gl_shader_program); + + /* Set default values used by the test cases. + */ + this->actual_index = 1; + this->name = "i"; +} + +void +set_uniform_initializer::TearDown() +{ + ralloc_free(this->mem_ctx); + this->mem_ctx = NULL; + + ralloc_free(this->prog); + this->prog = NULL; +} + +/** + * Create some uniform storage for a program. + * + * \param prog Program to get some storage + * \param num_storage Total number of storage slots + * \param index_to_set Storage slot that will actually get a value + * \param name Name for the actual storage slot + * \param type Type for the elements of the actual storage slot + * \param array_size Size for the array of the actual storage slot. This + * should be zero for non-arrays. + */ +static unsigned +establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage, + unsigned index_to_set, const char *name, + const glsl_type *type, unsigned array_size) +{ + const unsigned elements = MAX2(1, array_size); + const unsigned data_components = elements * type->components(); + const unsigned total_components = MAX2(17, (data_components + + type->components())); + const unsigned red_zone_components = total_components - data_components; + + prog->UniformStorage = rzalloc_array(prog, struct gl_uniform_storage, + num_storage); + prog->NumUniformStorage = num_storage; + + prog->UniformStorage[index_to_set].name = (char *) name; + prog->UniformStorage[index_to_set].type = type; + prog->UniformStorage[index_to_set].array_elements = array_size; + prog->UniformStorage[index_to_set].initialized = false; + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + prog->UniformStorage[index_to_set].opaque[sh].index = ~0; + prog->UniformStorage[index_to_set].opaque[sh].active = false; + } + prog->UniformStorage[index_to_set].num_driver_storage = 0; + prog->UniformStorage[index_to_set].driver_storage = NULL; + prog->UniformStorage[index_to_set].storage = + rzalloc_array(prog, union gl_constant_value, total_components); + + fill_storage_array_with_sentinels(prog->UniformStorage[index_to_set].storage, + data_components, + red_zone_components); + + for (unsigned i = 0; i < num_storage; i++) { + if (i == index_to_set) + continue; + + prog->UniformStorage[i].name = (char *) "invalid slot"; + prog->UniformStorage[i].type = glsl_type::void_type; + prog->UniformStorage[i].array_elements = 0; + prog->UniformStorage[i].initialized = false; + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + prog->UniformStorage[i].opaque[sh].index = ~0; + prog->UniformStorage[i].opaque[sh].active = false; + } + prog->UniformStorage[i].num_driver_storage = 0; + prog->UniformStorage[i].driver_storage = NULL; + prog->UniformStorage[i].storage = NULL; + } + + return red_zone_components; +} + +/** + * Verify that the correct uniform is marked as having been initialized. + */ +static void +verify_initialization(struct gl_shader_program *prog, unsigned actual_index) +{ + for (unsigned i = 0; i < prog->NumUniformStorage; i++) { + if (i == actual_index) { + EXPECT_TRUE(prog->UniformStorage[actual_index].initialized); + } else { + EXPECT_FALSE(prog->UniformStorage[i].initialized); + } + } +} + +static void +non_array_test(void *mem_ctx, struct gl_shader_program *prog, + unsigned actual_index, const char *name, + enum glsl_base_type base_type, + unsigned columns, unsigned rows) +{ + const glsl_type *const type = + glsl_type::get_instance(base_type, rows, columns); + + unsigned red_zone_components = + establish_uniform_storage(prog, 3, actual_index, name, type, 0); + + ir_constant *val; + generate_data(mem_ctx, base_type, columns, rows, val); + + linker::set_uniform_initializer(mem_ctx, prog, name, type, val, 0xF00F); + + verify_initialization(prog, actual_index); + verify_data(prog->UniformStorage[actual_index].storage, 0, val, + red_zone_components, 0xF00F); +} + +TEST_F(set_uniform_initializer, int_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 1); +} + +TEST_F(set_uniform_initializer, ivec2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 2); +} + +TEST_F(set_uniform_initializer, ivec3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 3); +} + +TEST_F(set_uniform_initializer, ivec4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 4); +} + +TEST_F(set_uniform_initializer, uint_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 1); +} + +TEST_F(set_uniform_initializer, uvec2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 2); +} + +TEST_F(set_uniform_initializer, uvec3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 3); +} + +TEST_F(set_uniform_initializer, uvec4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 4); +} + +TEST_F(set_uniform_initializer, bool_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 1); +} + +TEST_F(set_uniform_initializer, bvec2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 2); +} + +TEST_F(set_uniform_initializer, bvec3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 3); +} + +TEST_F(set_uniform_initializer, bvec4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 4); +} + +TEST_F(set_uniform_initializer, float_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2); +} + +TEST_F(set_uniform_initializer, vec2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2); +} + +TEST_F(set_uniform_initializer, vec3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 3); +} + +TEST_F(set_uniform_initializer, vec4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 4); +} + +TEST_F(set_uniform_initializer, mat2x2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 2); +} + +TEST_F(set_uniform_initializer, mat2x3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 3); +} + +TEST_F(set_uniform_initializer, mat2x4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 4); +} + +TEST_F(set_uniform_initializer, mat3x2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 2); +} + +TEST_F(set_uniform_initializer, mat3x3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 3); +} + +TEST_F(set_uniform_initializer, mat3x4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 4); +} + +TEST_F(set_uniform_initializer, mat4x2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 2); +} + +TEST_F(set_uniform_initializer, mat4x3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 3); +} + +TEST_F(set_uniform_initializer, mat4x4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 4); +} + +static void +array_test(void *mem_ctx, struct gl_shader_program *prog, + unsigned actual_index, const char *name, + enum glsl_base_type base_type, + unsigned columns, unsigned rows, unsigned array_size, + unsigned excess_data_size) +{ + const glsl_type *const element_type = + glsl_type::get_instance(base_type, rows, columns); + + const unsigned red_zone_components = + establish_uniform_storage(prog, 3, actual_index, name, element_type, + array_size); + + /* The constant value generated may have more array elements than the + * uniform that it initializes. In the real compiler and linker this can + * happen when a uniform array is compacted because some of the tail + * elements are not used. In this case, the type of the uniform will be + * modified, but the initializer will not. + */ + ir_constant *val; + generate_array_data(mem_ctx, base_type, columns, rows, + array_size + excess_data_size, val); + + linker::set_uniform_initializer(mem_ctx, prog, name, element_type, val, + 0xF00F); + + verify_initialization(prog, actual_index); + verify_data(prog->UniformStorage[actual_index].storage, array_size, + val, red_zone_components, 0xF00F); +} + +TEST_F(set_uniform_initializer, int_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 1, 4, 0); +} + +TEST_F(set_uniform_initializer, ivec2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, ivec3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, ivec4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, uint_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 1, 4, 0); +} + +TEST_F(set_uniform_initializer, uvec2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, uvec3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, uvec4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, bool_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 1, 4, 0); +} + +TEST_F(set_uniform_initializer, bvec2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, bvec3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, bvec4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, float_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 1, 4, 0); +} + +TEST_F(set_uniform_initializer, vec2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, vec3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, vec4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, mat2x2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, mat2x3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, mat2x4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, mat3x2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, mat3x3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, mat3x4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, mat4x2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, mat4x3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, mat4x4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, int_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 1, 4, 5); +} + +TEST_F(set_uniform_initializer, ivec2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, ivec3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, ivec4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, uint_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 1, 4, 5); +} + +TEST_F(set_uniform_initializer, uvec2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, uvec3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, uvec4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, bool_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 1, 4, 5); +} + +TEST_F(set_uniform_initializer, bvec2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, bvec3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, bvec4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, float_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 1, 4, 5); +} + +TEST_F(set_uniform_initializer, vec2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, vec3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, vec4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, mat2x2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, mat2x3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, mat2x4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, mat3x2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, mat3x3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, mat3x4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, mat4x2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, mat4x3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, mat4x4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 4, 4, 5); +} diff --git a/src/compiler/glsl/tests/sexps.py b/src/compiler/glsl/tests/sexps.py new file mode 100644 index 0000000..a714af8 --- /dev/null +++ b/src/compiler/glsl/tests/sexps.py @@ -0,0 +1,103 @@ +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# This file contains helper functions for manipulating sexps in Python. +# +# We represent a sexp in Python using nested lists containing strings. +# So, for example, the sexp (constant float (1.000000)) is represented +# as ['constant', 'float', ['1.000000']]. + +import re + +def check_sexp(sexp): + """Verify that the argument is a proper sexp. + + That is, raise an exception if the argument is not a string or a + list, or if it contains anything that is not a string or a list at + any nesting level. + """ + if isinstance(sexp, list): + for s in sexp: + check_sexp(s) + elif not isinstance(sexp, basestring): + raise Exception('Not a sexp: {0!r}'.format(sexp)) + +def parse_sexp(sexp): + """Convert a string, of the form that would be output by mesa, + into a sexp represented as nested lists containing strings. + """ + sexp_token_regexp = re.compile( + '[a-zA-Z_]+(@[0-9]+)?|[0-9]+(\\.[0-9]+)?|[^ \n]') + stack = [[]] + for match in sexp_token_regexp.finditer(sexp): + token = match.group(0) + if token == '(': + stack.append([]) + elif token == ')': + if len(stack) == 1: + raise Exception('Unmatched )') + sexp = stack.pop() + stack[-1].append(sexp) + else: + stack[-1].append(token) + if len(stack) != 1: + raise Exception('Unmatched (') + if len(stack[0]) != 1: + raise Exception('Multiple sexps') + return stack[0][0] + +def sexp_to_string(sexp): + """Convert a sexp, represented as nested lists containing strings, + into a single string of the form parseable by mesa. + """ + if isinstance(sexp, basestring): + return sexp + assert isinstance(sexp, list) + result = '' + for s in sexp: + sub_result = sexp_to_string(s) + if result == '': + result = sub_result + elif '\n' not in result and '\n' not in sub_result and \ + len(result) + len(sub_result) + 1 <= 70: + result += ' ' + sub_result + else: + result += '\n' + sub_result + return '({0})'.format(result.replace('\n', '\n ')) + +def sort_decls(sexp): + """Sort all toplevel variable declarations in sexp. + + This is used to work around the fact that + ir_reader::read_instructions reorders declarations. + """ + assert isinstance(sexp, list) + decls = [] + other_code = [] + for s in sexp: + if isinstance(s, list) and len(s) >= 4 and s[0] == 'declare': + decls.append(s) + else: + other_code.append(s) + return sorted(decls) + other_code + diff --git a/src/compiler/glsl/tests/uniform_initializer_utils.cpp b/src/compiler/glsl/tests/uniform_initializer_utils.cpp new file mode 100644 index 0000000..5006387 --- /dev/null +++ b/src/compiler/glsl/tests/uniform_initializer_utils.cpp @@ -0,0 +1,255 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <gtest/gtest.h> +#include "main/mtypes.h" +#include "main/macros.h" +#include "util/ralloc.h" +#include "uniform_initializer_utils.h" +#include <stdio.h> + +void +fill_storage_array_with_sentinels(gl_constant_value *storage, + unsigned data_size, + unsigned red_zone_size) +{ + for (unsigned i = 0; i < data_size; i++) + storage[i].u = 0xDEADBEEF; + + for (unsigned i = 0; i < red_zone_size; i++) + storage[data_size + i].u = 0xBADDC0DE; +} + +/** + * Verfiy that markers past the end of the real uniform are unmodified + */ +static ::testing::AssertionResult +red_zone_is_intact(gl_constant_value *storage, + unsigned data_size, + unsigned red_zone_size) +{ + for (unsigned i = 0; i < red_zone_size; i++) { + const unsigned idx = data_size + i; + + if (storage[idx].u != 0xBADDC0DE) + return ::testing::AssertionFailure() + << "storage[" << idx << "].u = " << storage[idx].u + << ", exepected data values = " << data_size + << ", red-zone size = " << red_zone_size; + } + + return ::testing::AssertionSuccess(); +} + +static const int values[] = { + 2, 0, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53 +}; + +/** + * Generate a single data element. + * + * This is by both \c generate_data and \c generate_array_data to create the + * data. + */ +static void +generate_data_element(void *mem_ctx, const glsl_type *type, + ir_constant *&val, unsigned data_index_base) +{ + /* Set the initial data values for the generated constant. + */ + ir_constant_data data; + memset(&data, 0, sizeof(data)); + for (unsigned i = 0; i < type->components(); i++) { + const unsigned idx = (i + data_index_base) % ARRAY_SIZE(values); + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + data.i[i] = values[idx]; + break; + case GLSL_TYPE_FLOAT: + data.f[i] = float(values[idx]); + break; + case GLSL_TYPE_BOOL: + data.b[i] = bool(values[idx]); + break; + case GLSL_TYPE_DOUBLE: + data.d[i] = double(values[idx]); + break; + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_SUBROUTINE: + ASSERT_TRUE(false); + break; + } + } + + /* Generate and verify the constant. + */ + val = new(mem_ctx) ir_constant(type, &data); + + for (unsigned i = 0; i < type->components(); i++) { + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + ASSERT_EQ(data.i[i], val->value.i[i]); + break; + case GLSL_TYPE_FLOAT: + ASSERT_EQ(data.f[i], val->value.f[i]); + break; + case GLSL_TYPE_BOOL: + ASSERT_EQ(data.b[i], val->value.b[i]); + break; + case GLSL_TYPE_DOUBLE: + ASSERT_EQ(data.d[i], val->value.d[i]); + break; + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_SUBROUTINE: + ASSERT_TRUE(false); + break; + } + } +} + +void +generate_data(void *mem_ctx, enum glsl_base_type base_type, + unsigned columns, unsigned rows, + ir_constant *&val) +{ + /* Determine what the type of the generated constant should be. + */ + const glsl_type *const type = + glsl_type::get_instance(base_type, rows, columns); + ASSERT_FALSE(type->is_error()); + + generate_data_element(mem_ctx, type, val, 0); +} + +void +generate_array_data(void *mem_ctx, enum glsl_base_type base_type, + unsigned columns, unsigned rows, unsigned array_size, + ir_constant *&val) +{ + /* Determine what the type of the generated constant should be. + */ + const glsl_type *const element_type = + glsl_type::get_instance(base_type, rows, columns); + ASSERT_FALSE(element_type->is_error()); + + const glsl_type *const array_type = + glsl_type::get_array_instance(element_type, array_size); + ASSERT_FALSE(array_type->is_error()); + + /* Set the initial data values for the generated constant. + */ + exec_list values_for_array; + for (unsigned i = 0; i < array_size; i++) { + ir_constant *element; + + generate_data_element(mem_ctx, element_type, element, i); + values_for_array.push_tail(element); + } + + val = new(mem_ctx) ir_constant(array_type, &values_for_array); +} + +/** + * Verify that the data stored for the uniform matches the initializer + * + * \param storage Backing storage for the uniform + * \param storage_array_size Array size of the backing storage. This must be + * less than or equal to the array size of the type + * of \c val. If \c val is not an array, this must + * be zero. + * \param val Value of the initializer for the unifrom. + * \param red_zone + */ +void +verify_data(gl_constant_value *storage, unsigned storage_array_size, + ir_constant *val, unsigned red_zone_size, + unsigned int boolean_true) +{ + if (val->type->base_type == GLSL_TYPE_ARRAY) { + const glsl_type *const element_type = val->array_elements[0]->type; + + for (unsigned i = 0; i < storage_array_size; i++) { + verify_data(storage + (i * element_type->components()), 0, + val->array_elements[i], 0, boolean_true); + } + + const unsigned components = element_type->components(); + + if (red_zone_size > 0) { + EXPECT_TRUE(red_zone_is_intact(storage, + storage_array_size * components, + red_zone_size)); + } + } else { + ASSERT_EQ(0u, storage_array_size); + for (unsigned i = 0; i < val->type->components(); i++) { + switch (val->type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + EXPECT_EQ(val->value.i[i], storage[i].i); + break; + case GLSL_TYPE_FLOAT: + EXPECT_EQ(val->value.f[i], storage[i].f); + break; + case GLSL_TYPE_BOOL: + EXPECT_EQ(val->value.b[i] ? boolean_true : 0, storage[i].i); + break; + case GLSL_TYPE_DOUBLE: + EXPECT_EQ(val->value.d[i], *(double *)&storage[i*2].i); + break; + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_SUBROUTINE: + ASSERT_TRUE(false); + break; + } + } + + if (red_zone_size > 0) { + EXPECT_TRUE(red_zone_is_intact(storage, + val->type->components(), + red_zone_size)); + } + } +} diff --git a/src/compiler/glsl/tests/uniform_initializer_utils.h b/src/compiler/glsl/tests/uniform_initializer_utils.h new file mode 100644 index 0000000..b4d0c10 --- /dev/null +++ b/src/compiler/glsl/tests/uniform_initializer_utils.h @@ -0,0 +1,48 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include "program/prog_parameter.h" +#include "ir.h" +#include "ir_uniform.h" + +extern void +fill_storage_array_with_sentinels(gl_constant_value *storage, + unsigned data_size, + unsigned red_zone_size); + +extern void +generate_data(void *mem_ctx, enum glsl_base_type base_type, + unsigned columns, unsigned rows, + ir_constant *&val); + +extern void +generate_array_data(void *mem_ctx, enum glsl_base_type base_type, + unsigned columns, unsigned rows, unsigned array_size, + ir_constant *&val); + +extern void +verify_data(gl_constant_value *storage, unsigned storage_array_size, + ir_constant *val, unsigned red_zone_size, + unsigned int boolean_true); diff --git a/src/compiler/glsl/tests/varyings_test.cpp b/src/compiler/glsl/tests/varyings_test.cpp new file mode 100644 index 0000000..0c4e0a4 --- /dev/null +++ b/src/compiler/glsl/tests/varyings_test.cpp @@ -0,0 +1,349 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <gtest/gtest.h> +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "util/ralloc.h" +#include "ir.h" +#include "program/hash_table.h" + +/** + * \file varyings_test.cpp + * + * Test various aspects of linking shader stage inputs and outputs. + */ + +namespace linker { +bool +populate_consumer_input_sets(void *mem_ctx, exec_list *ir, + hash_table *consumer_inputs, + hash_table *consumer_interface_inputs, + ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX]); + +ir_variable * +get_matching_input(void *mem_ctx, + const ir_variable *output_var, + hash_table *consumer_inputs, + hash_table *consumer_interface_inputs, + ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX]); +} + +class link_varyings : public ::testing::Test { +public: + link_varyings(); + + virtual void SetUp(); + virtual void TearDown(); + + char *interface_field_name(const glsl_type *iface, unsigned field = 0) + { + return ralloc_asprintf(mem_ctx, + "%s.%s", + iface->name, + iface->fields.structure[field].name); + } + + void *mem_ctx; + exec_list ir; + hash_table *consumer_inputs; + hash_table *consumer_interface_inputs; + + const glsl_type *simple_interface; + ir_variable *junk[VARYING_SLOT_TESS_MAX]; +}; + +link_varyings::link_varyings() +{ + static const glsl_struct_field f[] = { + glsl_struct_field(glsl_type::vec(4), "v") + }; + + this->simple_interface = + glsl_type::get_interface_instance(f, + ARRAY_SIZE(f), + GLSL_INTERFACE_PACKING_STD140, + "simple_interface"); +} + +void +link_varyings::SetUp() +{ + this->mem_ctx = ralloc_context(NULL); + this->ir.make_empty(); + + this->consumer_inputs + = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); + + this->consumer_interface_inputs + = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); +} + +void +link_varyings::TearDown() +{ + ralloc_free(this->mem_ctx); + this->mem_ctx = NULL; + + hash_table_dtor(this->consumer_inputs); + this->consumer_inputs = NULL; + hash_table_dtor(this->consumer_interface_inputs); + this->consumer_interface_inputs = NULL; +} + +/** + * Hash table callback function that counts the elements in the table + * + * \sa num_elements + */ +static void +ht_count_callback(const void *, void *, void *closure) +{ + unsigned int *counter = (unsigned int *) closure; + + (*counter)++; +} + +/** + * Helper function to count the number of elements in a hash table. + */ +static unsigned +num_elements(hash_table *ht) +{ + unsigned int counter = 0; + + hash_table_call_foreach(ht, ht_count_callback, (void *) &counter); + + return counter; +} + +/** + * Helper function to determine whether a hash table is empty. + */ +static bool +is_empty(hash_table *ht) +{ + return num_elements(ht) == 0; +} + +TEST_F(link_varyings, single_simple_input) +{ + ir_variable *const v = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_in); + + + ir.push_tail(v); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + + EXPECT_EQ((void *) v, hash_table_find(consumer_inputs, "a")); + EXPECT_EQ(1u, num_elements(consumer_inputs)); + EXPECT_TRUE(is_empty(consumer_interface_inputs)); +} + +TEST_F(link_varyings, gl_ClipDistance) +{ + const glsl_type *const array_8_of_float = + glsl_type::get_array_instance(glsl_type::vec(1), 8); + + ir_variable *const clipdistance = + new(mem_ctx) ir_variable(array_8_of_float, + "gl_ClipDistance", + ir_var_shader_in); + + clipdistance->data.explicit_location = true; + clipdistance->data.location = VARYING_SLOT_CLIP_DIST0; + clipdistance->data.explicit_index = 0; + + ir.push_tail(clipdistance); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + + EXPECT_EQ(clipdistance, junk[VARYING_SLOT_CLIP_DIST0]); + EXPECT_TRUE(is_empty(consumer_inputs)); + EXPECT_TRUE(is_empty(consumer_interface_inputs)); +} + +TEST_F(link_varyings, single_interface_input) +{ + ir_variable *const v = + new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, + simple_interface->fields.structure[0].name, + ir_var_shader_in); + + v->init_interface_type(simple_interface); + + ir.push_tail(v); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + char *const full_name = interface_field_name(simple_interface); + + EXPECT_EQ((void *) v, hash_table_find(consumer_interface_inputs, full_name)); + EXPECT_EQ(1u, num_elements(consumer_interface_inputs)); + EXPECT_TRUE(is_empty(consumer_inputs)); +} + +TEST_F(link_varyings, one_interface_and_one_simple_input) +{ + ir_variable *const v = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_in); + + + ir.push_tail(v); + + ir_variable *const iface = + new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, + simple_interface->fields.structure[0].name, + ir_var_shader_in); + + iface->init_interface_type(simple_interface); + + ir.push_tail(iface); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + + char *const iface_field_name = interface_field_name(simple_interface); + + EXPECT_EQ((void *) iface, hash_table_find(consumer_interface_inputs, + iface_field_name)); + EXPECT_EQ(1u, num_elements(consumer_interface_inputs)); + + EXPECT_EQ((void *) v, hash_table_find(consumer_inputs, "a")); + EXPECT_EQ(1u, num_elements(consumer_inputs)); +} + +TEST_F(link_varyings, invalid_interface_input) +{ + ir_variable *const v = + new(mem_ctx) ir_variable(simple_interface, + "named_interface", + ir_var_shader_in); + + ASSERT_EQ(simple_interface, v->get_interface_type()); + + ir.push_tail(v); + + EXPECT_FALSE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); +} + +TEST_F(link_varyings, interface_field_doesnt_match_noninterface) +{ + char *const iface_field_name = interface_field_name(simple_interface); + + /* The input shader has a single input variable name "a.v" + */ + ir_variable *const in_v = + new(mem_ctx) ir_variable(glsl_type::vec(4), + iface_field_name, + ir_var_shader_in); + + ir.push_tail(in_v); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + + /* Create an output variable, "v", that is part of an interface block named + * "a". They should not match. + */ + ir_variable *const out_v = + new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, + simple_interface->fields.structure[0].name, + ir_var_shader_in); + + out_v->init_interface_type(simple_interface); + + ir_variable *const match = + linker::get_matching_input(mem_ctx, + out_v, + consumer_inputs, + consumer_interface_inputs, + junk); + + EXPECT_EQ(NULL, match); +} + +TEST_F(link_varyings, interface_field_doesnt_match_noninterface_vice_versa) +{ + char *const iface_field_name = interface_field_name(simple_interface); + + /* In input shader has a single variable, "v", that is part of an interface + * block named "a". + */ + ir_variable *const in_v = + new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, + simple_interface->fields.structure[0].name, + ir_var_shader_in); + + in_v->init_interface_type(simple_interface); + + ir.push_tail(in_v); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + + /* Create an output variable "a.v". They should not match. + */ + ir_variable *const out_v = + new(mem_ctx) ir_variable(glsl_type::vec(4), + iface_field_name, + ir_var_shader_out); + + ir_variable *const match = + linker::get_matching_input(mem_ctx, + out_v, + consumer_inputs, + consumer_interface_inputs, + junk); + + EXPECT_EQ(NULL, match); +} |