summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2015-08-25 17:12:03 -0700
committerJason Ekstrand <jason.ekstrand@intel.com>2015-08-25 18:41:21 -0700
commit9b387b5d3f4103c51079ea5298d33086af6da433 (patch)
tree4127f2284b6b4a5746bbc01bbfc6a97305057cb4 /src/gallium
parent5360edcb304e147341b934567f3bbf40e9d5a3b5 (diff)
parent1d2a844e7d55645ea3d24fb589bec03695b3d2b1 (diff)
downloadexternal_mesa3d-9b387b5d3f4103c51079ea5298d33086af6da433.zip
external_mesa3d-9b387b5d3f4103c51079ea5298d33086af6da433.tar.gz
external_mesa3d-9b387b5d3f4103c51079ea5298d33086af6da433.tar.bz2
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c58
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c23
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c54
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h3
-rw-r--r--src/gallium/auxiliary/util/u_surface.c3
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h4
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h23
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_format.c11
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_texture.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h12
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.h1
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_format.c10
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_gmem.c22
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.c7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.c4
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h4
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.c4
-rw-r--r--src/gallium/drivers/freedreno/freedreno_fence.c25
-rw-r--r--src/gallium/drivers/freedreno/freedreno_fence.h5
-rw-r--r--src/gallium/drivers/freedreno/freedreno_resource.c9
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c10
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c13
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_ra.c2
-rw-r--r--src/gallium/drivers/i915/i915_surface.c3
-rw-r--r--src/gallium/drivers/nouveau/Makefile.sources3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir.h8
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp21
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp12
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp90
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h7
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp253
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp5
-rw-r--r--src/gallium/drivers/nouveau/codegen/unordered_set.h48
-rw-r--r--src/gallium/drivers/nouveau/nouveau_compiler.c4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c10
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c22
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_stateobj.h2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c20
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c25
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c17
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c7
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c12
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c21
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c15
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c3
-rw-r--r--src/gallium/drivers/r300/r300_blit.c3
-rw-r--r--src/gallium/drivers/r600/r600_blit.c3
-rw-r--r--src/gallium/drivers/r600/r600_isa.h2
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h4
-rw-r--r--src/gallium/drivers/r600/r600_shader.c34
-rw-r--r--src/gallium/drivers/r600/r600_shader.h20
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c42
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.c5
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c19
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_algebraic.c33
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_copy_propagation.c22
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_vpm_writes.c18
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c96
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c38
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h47
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu.h7
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_disasm.c16
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c63
-rw-r--r--src/gallium/drivers/vc4/vc4_register_allocate.c42
-rw-r--r--src/gallium/include/pipe/p_state.h1
-rw-r--r--src/gallium/state_trackers/clover/llvm/invocation.cpp33
-rw-r--r--src/gallium/state_trackers/nine/adapter9.c38
-rw-r--r--src/gallium/state_trackers/nine/basetexture9.c46
-rw-r--r--src/gallium/state_trackers/nine/basetexture9.h6
-rw-r--r--src/gallium/state_trackers/nine/cubetexture9.c27
-rw-r--r--src/gallium/state_trackers/nine/device9.c436
-rw-r--r--src/gallium/state_trackers/nine/device9.h13
-rw-r--r--src/gallium/state_trackers/nine/nine_ff.c381
-rw-r--r--src/gallium/state_trackers/nine/nine_ff.h81
-rw-r--r--src/gallium/state_trackers/nine/nine_pipe.c42
-rw-r--r--src/gallium/state_trackers/nine/nine_pipe.h67
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.c307
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.h49
-rw-r--r--src/gallium/state_trackers/nine/nine_state.c1096
-rw-r--r--src/gallium/state_trackers/nine/nine_state.h64
-rw-r--r--src/gallium/state_trackers/nine/pixelshader9.c42
-rw-r--r--src/gallium/state_trackers/nine/pixelshader9.h57
-rw-r--r--src/gallium/state_trackers/nine/resource9.c20
-rw-r--r--src/gallium/state_trackers/nine/stateblock9.c2
-rw-r--r--src/gallium/state_trackers/nine/surface9.c213
-rw-r--r--src/gallium/state_trackers/nine/surface9.h14
-rw-r--r--src/gallium/state_trackers/nine/swapchain9.c23
-rw-r--r--src/gallium/state_trackers/nine/texture9.c31
-rw-r--r--src/gallium/state_trackers/nine/vertexshader9.c33
-rw-r--r--src/gallium/state_trackers/nine/vertexshader9.h34
-rw-r--r--src/gallium/state_trackers/nine/volume9.c193
-rw-r--r--src/gallium/state_trackers/nine/volume9.h19
-rw-r--r--src/gallium/state_trackers/nine/volumetexture9.c18
-rw-r--r--src/gallium/targets/d3dadapter9/Makefile.am1
-rw-r--r--src/gallium/targets/d3dadapter9/description.c324
-rw-r--r--src/gallium/targets/d3dadapter9/drm.c76
-rw-r--r--src/gallium/targets/dri/Android.mk4
-rw-r--r--src/gallium/winsys/amdgpu/drm/Android.mk10
-rw-r--r--src/gallium/winsys/amdgpu/drm/Makefile.sources2
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_cs.h4
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.c8
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.h2
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c12
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.h1
-rw-r--r--src/gallium/winsys/sw/kms-dri/Makefile.am2
115 files changed, 3527 insertions, 1760 deletions
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 93dfb80..278d5e9 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -24,6 +24,7 @@
#include "util/ralloc.h"
#include "glsl/nir/nir.h"
+#include "glsl/nir/nir_control_flow.h"
#include "glsl/nir/nir_builder.h"
#include "glsl/list.h"
#include "glsl/shader_enums.h"
@@ -307,7 +308,7 @@ ttn_emit_immediate(struct ttn_compile *c)
for (i = 0; i < 4; i++)
load_const->value.u[i] = tgsi_imm->u[i].Uint;
- nir_instr_insert_after_cf_list(b->cf_node_list, &load_const->instr);
+ nir_builder_instr_insert(b, &load_const->instr);
}
static nir_src
@@ -363,7 +364,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
load->variables[0] = ttn_array_deref(c, load, var, offset, indirect);
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
- nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+ nir_builder_instr_insert(b, &load->instr);
src = nir_src_for_ssa(&load->dest.ssa);
@@ -414,7 +415,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
load->num_components = ncomp;
nir_ssa_dest_init(&load->instr, &load->dest, ncomp, NULL);
- nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+ nir_builder_instr_insert(b, &load->instr);
src = nir_src_for_ssa(&load->dest.ssa);
break;
@@ -476,7 +477,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
srcn++;
}
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
- nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+ nir_builder_instr_insert(b, &load->instr);
src = nir_src_for_ssa(&load->dest.ssa);
break;
@@ -552,7 +553,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
load->dest = nir_dest_for_reg(reg);
- nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+ nir_builder_instr_insert(b, &load->instr);
} else {
assert(!tgsi_dst->Indirect);
dest.dest.reg.reg = c->temp_regs[index].reg;
@@ -667,7 +668,7 @@ ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
instr->src[i].src = nir_src_for_ssa(src[i]);
instr->dest = dest;
- nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+ nir_builder_instr_insert(b, &instr->instr);
}
static void
@@ -683,7 +684,7 @@ ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
mov->src[0].src = nir_src_for_ssa(def);
for (unsigned i = def->num_components; i < 4; i++)
mov->src[0].swizzle[i] = def->num_components - 1;
- nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
+ nir_builder_instr_insert(b, &mov->instr);
}
static void
@@ -902,7 +903,7 @@ ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
{
nir_intrinsic_instr *discard =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
- nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
+ nir_builder_instr_insert(b, &discard->instr);
}
static void
@@ -912,7 +913,7 @@ ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
nir_intrinsic_instr *discard =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
discard->src[0] = nir_src_for_ssa(cmp);
- nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
+ nir_builder_instr_insert(b, &discard->instr);
}
static void
@@ -976,14 +977,14 @@ static void
ttn_cont(nir_builder *b)
{
nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue);
- nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+ nir_builder_instr_insert(b, &instr->instr);
}
static void
ttn_brk(nir_builder *b)
{
nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
- nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+ nir_builder_instr_insert(b, &instr->instr);
}
static void
@@ -1279,7 +1280,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
assert(src_number == num_srcs);
nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
- nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+ nir_builder_instr_insert(b, &instr->instr);
/* Resolve the writemask on the texture op. */
ttn_move_dest(b, dest, &instr->dest.ssa);
@@ -1318,10 +1319,10 @@ ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
txs->src[0].src_type = nir_tex_src_lod;
nir_ssa_dest_init(&txs->instr, &txs->dest, 3, NULL);
- nir_instr_insert_after_cf_list(b->cf_node_list, &txs->instr);
+ nir_builder_instr_insert(b, &txs->instr);
nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, NULL);
- nir_instr_insert_after_cf_list(b->cf_node_list, &qlv->instr);
+ nir_builder_instr_insert(b, &qlv->instr);
ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W);
@@ -1730,7 +1731,7 @@ ttn_emit_instruction(struct ttn_compile *c)
store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
- nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
+ nir_builder_instr_insert(b, &store->instr);
}
}
@@ -1759,11 +1760,26 @@ ttn_add_output_stores(struct ttn_compile *c)
store->const_index[0] = loc;
store->src[0].reg.reg = c->output_regs[loc].reg;
store->src[0].reg.base_offset = c->output_regs[loc].offset;
- nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
+ nir_builder_instr_insert(b, &store->instr);
}
}
}
+static gl_shader_stage
+tgsi_processor_to_shader_stage(unsigned processor)
+{
+ switch (processor) {
+ case TGSI_PROCESSOR_FRAGMENT: return MESA_SHADER_FRAGMENT;
+ case TGSI_PROCESSOR_VERTEX: return MESA_SHADER_VERTEX;
+ case TGSI_PROCESSOR_GEOMETRY: return MESA_SHADER_GEOMETRY;
+ case TGSI_PROCESSOR_TESS_CTRL: return MESA_SHADER_TESS_CTRL;
+ case TGSI_PROCESSOR_TESS_EVAL: return MESA_SHADER_TESS_EVAL;
+ case TGSI_PROCESSOR_COMPUTE: return MESA_SHADER_COMPUTE;
+ default:
+ unreachable("invalid TGSI processor");
+ };
+}
+
struct nir_shader *
tgsi_to_nir(const void *tgsi_tokens,
const nir_shader_compiler_options *options)
@@ -1775,7 +1791,12 @@ tgsi_to_nir(const void *tgsi_tokens,
int ret;
c = rzalloc(NULL, struct ttn_compile);
- s = nir_shader_create(NULL, options);
+
+ tgsi_scan_shader(tgsi_tokens, &scan);
+ c->scan = &scan;
+
+ s = nir_shader_create(NULL, tgsi_processor_to_shader_stage(scan.processor),
+ options);
nir_function *func = nir_function_create(s, "main");
nir_function_overload *overload = nir_function_overload_create(func);
@@ -1784,9 +1805,6 @@ tgsi_to_nir(const void *tgsi_tokens,
nir_builder_init(&c->build, impl);
nir_builder_insert_after_cf_list(&c->build, &impl->body);
- tgsi_scan_shader(tgsi_tokens, &scan);
- c->scan = &scan;
-
s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
s->num_uniforms = scan.const_file_max[0] + 1;
s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index a6675c5..3e3ed5b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -259,7 +259,7 @@ struct translate_ctx
struct tgsi_token *tokens_end;
struct tgsi_header *header;
unsigned processor : 4;
- int implied_array_size : 5;
+ unsigned implied_array_size : 6;
unsigned num_immediates;
};
@@ -675,6 +675,9 @@ parse_register_dcl(
eat_opt_white( &cur );
if (cur[0] == '[') {
+ bool is_in = *file == TGSI_FILE_INPUT;
+ bool is_out = *file == TGSI_FILE_OUTPUT;
+
++cur;
ctx->cur = cur;
if (!parse_register_dcl_bracket( ctx, &brackets[1] ))
@@ -684,7 +687,11 @@ parse_register_dcl(
* input primitive. so we want to declare just
* the index relevant to the semantics which is in
* the second bracket */
- if (ctx->processor == TGSI_PROCESSOR_GEOMETRY && *file == TGSI_FILE_INPUT) {
+
+ /* tessellation has similar constraints to geometry shader */
+ if ((ctx->processor == TGSI_PROCESSOR_GEOMETRY && is_in) ||
+ (ctx->processor == TGSI_PROCESSOR_TESS_EVAL && is_in) ||
+ (ctx->processor == TGSI_PROCESSOR_TESS_CTRL && (is_in || is_out))) {
brackets[0] = brackets[1];
*num_brackets = 1;
} else {
@@ -740,6 +747,14 @@ parse_dst_operand(
dst->Dimension.Indirect = 0;
dst->Dimension.Dimension = 0;
dst->Dimension.Index = bracket[0].index;
+
+ if (bracket[0].ind_file != TGSI_FILE_NULL) {
+ dst->Dimension.Indirect = 1;
+ dst->DimIndirect.File = bracket[0].ind_file;
+ dst->DimIndirect.Index = bracket[0].ind_index;
+ dst->DimIndirect.Swizzle = bracket[0].ind_comp;
+ dst->DimIndirect.ArrayID = bracket[0].ind_array;
+ }
bracket[0] = bracket[1];
}
dst->Register.Index = bracket[0].index;
@@ -1623,6 +1638,10 @@ static boolean translate( struct translate_ctx *ctx )
if (!parse_header( ctx ))
return FALSE;
+ if (ctx->processor == TGSI_PROCESSOR_TESS_CTRL ||
+ ctx->processor == TGSI_PROCESSOR_TESS_EVAL)
+ ctx->implied_array_size = 32;
+
while (*ctx->cur != '\0') {
uint label_val = 0;
if (!eat_white( &ctx->cur )) {
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 85206ea..9bba07a 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -104,7 +104,7 @@ struct blitter_context_priv
void *fs_resolve_uint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2];
/* Blend state. */
- void *blend[PIPE_MASK_RGBA+1]; /**< blend state with writemask */
+ void *blend[PIPE_MASK_RGBA+1][2]; /**< blend state with writemask */
void *blend_clear[GET_CLEAR_BLEND_STATE_IDX(PIPE_CLEAR_COLOR)+1];
/* Depth stencil alpha state. */
@@ -159,7 +159,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
struct pipe_rasterizer_state rs_state;
struct pipe_sampler_state sampler_state;
struct pipe_vertex_element velem[2];
- unsigned i;
+ unsigned i, j;
ctx = CALLOC_STRUCT(blitter_context_priv);
if (!ctx)
@@ -208,8 +208,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
memset(&blend, 0, sizeof(blend));
for (i = 0; i <= PIPE_MASK_RGBA; i++) {
- blend.rt[0].colormask = i;
- ctx->blend[i] = pipe->create_blend_state(pipe, &blend);
+ for (j = 0; j < 2; j++) {
+ memset(&blend.rt[0], 0, sizeof(blend.rt[0]));
+ blend.rt[0].colormask = i;
+ if (j) {
+ blend.rt[0].blend_enable = 1;
+ blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+ blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+ blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA;
+ blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+ blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+ blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA;
+ }
+ ctx->blend[i][j] = pipe->create_blend_state(pipe, &blend);
+ }
}
/* depth stencil alpha state objects */
@@ -409,9 +421,10 @@ void util_blitter_destroy(struct blitter_context *blitter)
struct pipe_context *pipe = blitter->pipe;
int i, j, f;
- for (i = 0; i <= PIPE_MASK_RGBA; i++) {
- pipe->delete_blend_state(pipe, ctx->blend[i]);
- }
+ for (i = 0; i <= PIPE_MASK_RGBA; i++)
+ for (j = 0; j < 2; j++)
+ pipe->delete_blend_state(pipe, ctx->blend[i][j]);
+
for (i = 0; i < Elements(ctx->blend_clear); i++) {
if (ctx->blend_clear[i])
pipe->delete_blend_state(pipe, ctx->blend_clear[i]);
@@ -1217,7 +1230,7 @@ static void *get_clear_blend_state(struct blitter_context_priv *ctx,
/* Return an existing blend state. */
if (!clear_buffers)
- return ctx->blend[0];
+ return ctx->blend[0][0];
index = GET_CLEAR_BLEND_STATE_IDX(clear_buffers);
@@ -1483,7 +1496,8 @@ void util_blitter_copy_texture(struct blitter_context *blitter,
/* Copy. */
util_blitter_blit_generic(blitter, dst_view, &dstbox,
src_view, srcbox, src->width0, src->height0,
- PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
+ PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+ FALSE);
pipe_surface_reference(&dst_view, NULL);
pipe_sampler_view_reference(&src_view, NULL);
@@ -1496,7 +1510,8 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
const struct pipe_box *srcbox,
unsigned src_width0, unsigned src_height0,
unsigned mask, unsigned filter,
- const struct pipe_scissor_state *scissor)
+ const struct pipe_scissor_state *scissor,
+ boolean alpha_blend)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
@@ -1550,7 +1565,7 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
fb_state.zsbuf = NULL;
if (blit_depth || blit_stencil) {
- pipe->bind_blend_state(pipe, ctx->blend[0]);
+ pipe->bind_blend_state(pipe, ctx->blend[0][0]);
if (blit_depth && blit_stencil) {
pipe->bind_depth_stencil_alpha_state(pipe,
@@ -1573,7 +1588,9 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
}
} else {
- pipe->bind_blend_state(pipe, ctx->blend[mask & PIPE_MASK_RGBA]);
+ unsigned colormask = mask & PIPE_MASK_RGBA;
+
+ pipe->bind_blend_state(pipe, ctx->blend[colormask][alpha_blend]);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
ctx->bind_fs_state(pipe,
blitter_get_fs_texfetch_col(ctx, src->format, src_target,
@@ -1786,7 +1803,8 @@ util_blitter_blit(struct blitter_context *blitter,
util_blitter_blit_generic(blitter, dst_view, &info->dst.box,
src_view, &info->src.box, src->width0, src->height0,
info->mask, info->filter,
- info->scissor_enable ? &info->scissor : NULL);
+ info->scissor_enable ? &info->scissor : NULL,
+ info->alpha_blend);
pipe_surface_reference(&dst_view, NULL);
pipe_sampler_view_reference(&src_view, NULL);
@@ -1815,7 +1833,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter,
blitter_disable_render_cond(ctx);
/* bind states */
- pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]);
+ pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA][0]);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
bind_fs_write_one_cbuf(ctx);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
@@ -1867,7 +1885,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
blitter_disable_render_cond(ctx);
/* bind states */
- pipe->bind_blend_state(pipe, ctx->blend[0]);
+ pipe->bind_blend_state(pipe, ctx->blend[0][0]);
if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
sr.ref_value[0] = stencil & 0xff;
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
@@ -1933,8 +1951,8 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
blitter_disable_render_cond(ctx);
/* bind states */
- pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] :
- ctx->blend[0]);
+ pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA][0] :
+ ctx->blend[0][0]);
pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
if (cbsurf)
bind_fs_write_one_cbuf(ctx);
@@ -2187,7 +2205,7 @@ void util_blitter_custom_color(struct blitter_context *blitter,
/* bind states */
pipe->bind_blend_state(pipe, custom_blend ? custom_blend
- : ctx->blend[PIPE_MASK_RGBA]);
+ : ctx->blend[PIPE_MASK_RGBA][0]);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
bind_fs_write_one_cbuf(ctx);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index 0cd173d..becdb02 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -246,7 +246,8 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
const struct pipe_box *srcbox,
unsigned src_width0, unsigned src_height0,
unsigned mask, unsigned filter,
- const struct pipe_scissor_state *scissor);
+ const struct pipe_scissor_state *scissor,
+ boolean alpha_blend);
void util_blitter_blit(struct blitter_context *blitter,
const struct pipe_blit_info *info);
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index 654b5bb..70ed911 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -676,6 +676,9 @@ util_try_blit_via_copy_region(struct pipe_context *ctx,
return FALSE;
}
+ if (blit->alpha_blend)
+ return FALSE;
+
ctx->resource_copy_region(ctx, blit->dst.resource, blit->dst.level,
blit->dst.box.x, blit->dst.box.y, blit->dst.box.z,
blit->src.resource, blit->src.level,
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index c4516ba..dd48956 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index 8e8cf6a..441bfec 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
@@ -326,6 +326,13 @@ enum a3xx_tex_type {
A3XX_TEX_3D = 3,
};
+enum a3xx_tex_msaa {
+ A3XX_TPL1_MSAA1X = 0,
+ A3XX_TPL1_MSAA2X = 1,
+ A3XX_TPL1_MSAA4X = 2,
+ A3XX_TPL1_MSAA8X = 3,
+};
+
#define A3XX_INT0_RBBM_GPU_IDLE 0x00000001
#define A3XX_INT0_RBBM_AHB_ERROR 0x00000002
#define A3XX_INT0_RBBM_REG_TIMEOUT 0x00000004
@@ -2652,6 +2659,7 @@ static inline uint32_t A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val)
#define REG_A3XX_VGT_IMMED_DATA 0x000021fd
#define REG_A3XX_TEX_SAMP_0 0x00000000
+#define A3XX_TEX_SAMP_0_CLAMPENABLE 0x00000001
#define A3XX_TEX_SAMP_0_MIPFILTER_LINEAR 0x00000002
#define A3XX_TEX_SAMP_0_XY_MAG__MASK 0x0000000c
#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT 2
@@ -2695,6 +2703,7 @@ static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val
{
return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK;
}
+#define A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF 0x01000000
#define A3XX_TEX_SAMP_0_UNNORM_COORDS 0x80000000
#define REG_A3XX_TEX_SAMP_1 0x00000001
@@ -2750,6 +2759,12 @@ static inline uint32_t A3XX_TEX_CONST_0_MIPLVLS(uint32_t val)
{
return ((val) << A3XX_TEX_CONST_0_MIPLVLS__SHIFT) & A3XX_TEX_CONST_0_MIPLVLS__MASK;
}
+#define A3XX_TEX_CONST_0_MSAATEX__MASK 0x00300000
+#define A3XX_TEX_CONST_0_MSAATEX__SHIFT 20
+static inline uint32_t A3XX_TEX_CONST_0_MSAATEX(enum a3xx_tex_msaa val)
+{
+ return ((val) << A3XX_TEX_CONST_0_MSAATEX__SHIFT) & A3XX_TEX_CONST_0_MSAATEX__MASK;
+}
#define A3XX_TEX_CONST_0_FMT__MASK 0x1fc00000
#define A3XX_TEX_CONST_0_FMT__SHIFT 22
static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
@@ -2785,7 +2800,7 @@ static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val)
}
#define REG_A3XX_TEX_CONST_2 0x00000002
-#define A3XX_TEX_CONST_2_INDX__MASK 0x000000ff
+#define A3XX_TEX_CONST_2_INDX__MASK 0x000001ff
#define A3XX_TEX_CONST_2_INDX__SHIFT 0
static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val)
{
@@ -2805,7 +2820,7 @@ static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
}
#define REG_A3XX_TEX_CONST_3 0x00000003
-#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x00007fff
+#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x0001ffff
#define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT 0
static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val)
{
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
index ec87aa9..04cb9b9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -262,6 +262,15 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
_T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX),
_T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX),
_T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX),
+
+ _T(DXT1_RGB, DXT1, NONE, WZYX),
+ _T(DXT1_SRGB, DXT1, NONE, WZYX),
+ _T(DXT1_RGBA, DXT1, NONE, WZYX),
+ _T(DXT1_SRGBA, DXT1, NONE, WZYX),
+ _T(DXT3_RGBA, DXT3, NONE, WZYX),
+ _T(DXT3_SRGBA, DXT3, NONE, WZYX),
+ _T(DXT5_RGBA, DXT5, NONE, WZYX),
+ _T(DXT5_SRGBA, DXT5, NONE, WZYX),
};
enum a3xx_vtx_fmt
@@ -301,7 +310,7 @@ fd3_pipe2fetchsize(enum pipe_format format)
{
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
format = PIPE_FORMAT_Z32_FLOAT;
- switch (util_format_get_blocksizebits(format)) {
+ switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
case 8: return TFETCH_1_BYTE;
case 16: return TFETCH_2_BYTE;
case 32: return TFETCH_4_BYTE;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
index 9c16804..583caaa 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
@@ -73,7 +73,7 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,
so->gras_su_poly_offset_scale =
A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
so->gras_su_poly_offset_offset =
- A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
+ A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
so->gras_su_mode_control =
A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index c30658d..2d6ecb2 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -115,6 +115,7 @@ fd3_sampler_state_create(struct pipe_context *pctx,
so->texsamp0 =
COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
+ COND(!cso->seamless_cube_map, A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF) |
COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) |
A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
@@ -239,7 +240,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
so->texconst2 =
- A3XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
+ A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
switch (prsc->target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 563f70a..2e1d712 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
@@ -162,10 +162,13 @@ enum a4xx_tex_fmt {
TFMT4_8_UNORM = 4,
TFMT4_8_8_UNORM = 14,
TFMT4_8_8_8_8_UNORM = 28,
+ TFMT4_8_SNORM = 5,
TFMT4_8_8_SNORM = 15,
TFMT4_8_8_8_8_SNORM = 29,
+ TFMT4_8_UINT = 6,
TFMT4_8_8_UINT = 16,
TFMT4_8_8_8_8_UINT = 30,
+ TFMT4_8_SINT = 7,
TFMT4_8_8_SINT = 17,
TFMT4_8_8_8_8_SINT = 31,
TFMT4_16_UINT = 21,
@@ -430,7 +433,7 @@ static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
}
#define A4XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00002000
-#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0x007fc000
+#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0xffffc000
#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 14
static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
{
@@ -440,7 +443,7 @@ static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
static inline uint32_t REG_A4XX_RB_MRT_BASE(uint32_t i0) { return 0x000020a6 + 0x5*i0; }
static inline uint32_t REG_A4XX_RB_MRT_CONTROL3(uint32_t i0) { return 0x000020a7 + 0x5*i0; }
-#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK 0x0001fff8
+#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK 0x03fffff8
#define A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT 3
static inline uint32_t A4XX_RB_MRT_CONTROL3_STRIDE(uint32_t val)
{
@@ -1460,6 +1463,7 @@ static inline uint32_t A4XX_SP_FS_MRT_REG_MRTFORMAT(enum a4xx_color_fmt val)
{
return ((val) << A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT) & A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK;
}
+#define A4XX_SP_FS_MRT_REG_COLOR_SRGB 0x00040000
#define REG_A4XX_SP_CS_CTRL_REG0 0x00002300
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
index ab7850e..3a1d4b6 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
@@ -56,6 +56,7 @@ struct fd4_emit {
uint32_t sprite_coord_enable; /* bitmask */
bool sprite_coord_mode;
bool rasterflat;
+ bool no_decode_srgb;
/* cached to avoid repeated lookups of same variants: */
struct ir3_shader_variant *vp, *fp;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 3e00454..6c9e217 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -79,9 +79,9 @@ struct fd4_format {
static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
/* 8-bit */
VT(R8_UNORM, 8_UNORM, R8_UNORM, WZYX),
- V_(R8_SNORM, 8_SNORM, NONE, WZYX),
- V_(R8_UINT, 8_UINT, NONE, WZYX),
- V_(R8_SINT, 8_SINT, NONE, WZYX),
+ VT(R8_SNORM, 8_SNORM, NONE, WZYX),
+ VT(R8_UINT, 8_UINT, NONE, WZYX),
+ VT(R8_SINT, 8_SINT, NONE, WZYX),
V_(R8_USCALED, 8_UINT, NONE, WZYX),
V_(R8_SSCALED, 8_UINT, NONE, WZYX),
@@ -115,8 +115,8 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX),
VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX),
- VT(R8G8_UINT, 8_8_UINT, NONE, WZYX),
- VT(R8G8_SINT, 8_8_SINT, NONE, WZYX),
+ VT(R8G8_UINT, 8_8_UINT, R8G8_UINT, WZYX),
+ VT(R8G8_SINT, 8_8_SINT, R8G8_SINT, WZYX),
V_(R8G8_USCALED, 8_8_UINT, NONE, WZYX),
V_(R8G8_SSCALED, 8_8_SINT, NONE, WZYX),
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
index 81c37f7..3f8bbf3 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -46,7 +46,8 @@
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
- struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
+ struct pipe_surface **bufs, uint32_t *bases,
+ uint32_t bin_w, bool decode_srgb)
{
enum a4xx_tile_mode tile_mode;
unsigned i;
@@ -60,6 +61,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
enum a4xx_color_fmt format = 0;
enum a3xx_color_swap swap = WZYX;
+ bool srgb = false;
struct fd_resource *rsc = NULL;
struct fd_resource_slice *slice = NULL;
uint32_t stride = 0;
@@ -68,10 +70,9 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
if ((i < nr_bufs) && bufs[i]) {
struct pipe_surface *psurf = bufs[i];
- enum pipe_format pformat = 0;
+ enum pipe_format pformat = psurf->format;
rsc = fd_resource(psurf->texture);
- pformat = psurf->format;
/* In case we're drawing to Z32F_S8, the "color" actually goes to
* the stencil
@@ -86,6 +87,11 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
format = fd4_pipe2color(pformat);
swap = fd4_pipe2swap(pformat);
+ if (decode_srgb)
+ srgb = util_format_is_srgb(pformat);
+ else
+ pformat = util_format_linear(pformat);
+
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
offset = fd_resource_offset(rsc, psurf->u.tex.level,
@@ -108,7 +114,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
- A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
+ A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
+ COND(srgb, A4XX_RB_MRT_BUF_INFO_COLOR_SRGB));
if (bin_w || (i >= nr_bufs) || !bufs[i]) {
OUT_RING(ring, base);
OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
@@ -282,7 +289,7 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t *bases,
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_surface *zsbufs[2];
- emit_mrt(ring, nr_bufs, bufs, bases, bin_w);
+ emit_mrt(ring, nr_bufs, bufs, bases, bin_w, false);
if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
/* The gmem_restore_tex logic will put the first buffer's stencil
@@ -315,6 +322,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
.key = {
.half_precision = fd_half_precision(pfb),
},
+ .no_decode_srgb = true,
};
unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
float x0, y0, x1, y1;
@@ -520,7 +528,7 @@ fd4_emit_sysmem_prep(struct fd_context *ctx)
OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
/* setup scissor/offset for current tile: */
OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
@@ -677,7 +685,7 @@ fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true);
/* setup scissor/offset for current tile: */
OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index 1a6d014..a3d7123 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -450,10 +450,15 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
for (i = 0; i < 8; i++) {
enum a4xx_color_fmt format = 0;
- if (i < nr)
+ bool srgb = false;
+ if (i < nr) {
format = fd4_emit_format(bufs[i]);
+ if (bufs[i] && !emit->no_decode_srgb)
+ srgb = util_format_is_srgb(bufs[i]->format);
+ }
OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
A4XX_SP_FS_MRT_REG_MRTFORMAT(format) |
+ COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) |
COND(emit->key.half_precision,
A4XX_SP_FS_MRT_REG_HALF_PRECISION));
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index d2bc5fe..213b29c 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -187,9 +187,9 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
break;
case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY: /* ?? not sure about _CUBE_ARRAY */
+ case PIPE_TEXTURE_CUBE_ARRAY:
so->texconst3 =
- A4XX_TEX_CONST_3_DEPTH(1) |
+ A4XX_TEX_CONST_3_DEPTH(prsc->array_size / 6) |
A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
break;
case PIPE_TEXTURE_3D:
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index 00b6acb..29944b7 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index 98a90e2..432dce3 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 8e6d431..0b6b9fb 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -131,11 +131,13 @@ static void
fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
unsigned flags)
{
+ struct fd_ringbuffer *ring = fd_context(pctx)->ring;
+
fd_context_render(pctx);
if (fence) {
fd_screen_fence_ref(pctx->screen, fence, NULL);
- *fence = fd_fence_create(pctx);
+ *fence = fd_fence_create(pctx, fd_ringbuffer_timestamp(ring));
}
}
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.c b/src/gallium/drivers/freedreno/freedreno_fence.c
index 04a9fea..5125f09 100644
--- a/src/gallium/drivers/freedreno/freedreno_fence.c
+++ b/src/gallium/drivers/freedreno/freedreno_fence.c
@@ -50,35 +50,18 @@ fd_screen_fence_ref(struct pipe_screen *pscreen,
*ptr = pfence;
}
-/* TODO we need to spiff out libdrm_freedreno a bit to allow passing
- * the timeout.. and maybe a better way to check if fence has been
- * signaled. The current implementation is a bit lame for now to
- * avoid bumping libdrm version requirement.
- */
-
-boolean fd_screen_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- uint32_t timestamp = fd_ringbuffer_timestamp(fence->ctx->ring);
-
- /* TODO util helper for compare w/ rollover? */
- return timestamp >= fence->timestamp;
-}
-
boolean fd_screen_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
- if (!timeout)
- return fd_screen_fence_signalled(screen, fence);
-
- if (fd_pipe_wait(fence->screen->pipe, fence->timestamp))
+ if (fd_pipe_wait_timeout(fence->screen->pipe, fence->timestamp, timeout))
return false;
return true;
}
-struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx)
+struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx,
+ uint32_t timestamp)
{
struct pipe_fence_handle *fence;
struct fd_context *ctx = fd_context(pctx);
@@ -91,7 +74,7 @@ struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx)
fence->ctx = ctx;
fence->screen = ctx->screen;
- fence->timestamp = fd_ringbuffer_timestamp(ctx->ring);
+ fence->timestamp = timestamp;
return fence;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.h b/src/gallium/drivers/freedreno/freedreno_fence.h
index e36bcc4..06c314a 100644
--- a/src/gallium/drivers/freedreno/freedreno_fence.h
+++ b/src/gallium/drivers/freedreno/freedreno_fence.h
@@ -34,11 +34,10 @@
void fd_screen_fence_ref(struct pipe_screen *pscreen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *pfence);
-boolean fd_screen_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *pfence);
boolean fd_screen_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *pfence,
uint64_t timeout);
-struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx);
+struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx,
+ uint32_t timestamp);
#endif /* FREEDRENO_FENCE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 709ad4e..98de096 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -222,7 +222,7 @@ fd_resource_transfer_map(struct pipe_context *pctx,
ptrans->level = level;
ptrans->usage = usage;
ptrans->box = *box;
- ptrans->stride = slice->pitch * rsc->cpp;
+ ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
ptrans->layer_stride = slice->size0;
if (usage & PIPE_TRANSFER_READ)
@@ -375,9 +375,11 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment)
for (level = 0; level <= prsc->last_level; level++) {
struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
+ uint32_t blocks;
slice->pitch = width = align(width, 32);
slice->offset = size;
+ blocks = util_format_get_nblocks(prsc->format, width, height);
/* 1d array and 2d array textures must all have the same layer size
* for each miplevel on a3xx. 3d textures can have different layer
* sizes for high levels, but the hw auto-sizer is buggy (or at least
@@ -387,9 +389,9 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment)
if (prsc->target == PIPE_TEXTURE_3D && (
level == 1 ||
(level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
- slice->size0 = align(slice->pitch * height * rsc->cpp, alignment);
+ slice->size0 = align(blocks * rsc->cpp, alignment);
else if (level == 0 || rsc->layer_first || alignment == 1)
- slice->size0 = align(slice->pitch * height * rsc->cpp, alignment);
+ slice->size0 = align(blocks * rsc->cpp, alignment);
else
slice->size0 = rsc->slices[level - 1].size0;
@@ -459,7 +461,6 @@ fd_resource_create(struct pipe_screen *pscreen,
if (is_a4xx(fd_screen(pscreen))) {
switch (tmpl->target) {
case PIPE_TEXTURE_3D:
- /* TODO 3D_ARRAY? */
rsc->layer_first = false;
break;
default:
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index b55f5b3..86e9a21 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -163,7 +163,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_COMPUTE:
@@ -176,6 +175,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return is_a3xx(screen) || is_a4xx(screen);
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
@@ -191,8 +191,13 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 16383;
case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return is_a3xx(screen);
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ return is_a4xx(screen);
+
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 256;
@@ -202,7 +207,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return is_ir3(screen) ? 130 : 120;
/* Unsupported features. */
- case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
@@ -230,8 +234,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
- case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
- case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 0ab3345..071901a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1636,6 +1636,11 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
coord[i] = ir3_SHL_B(b, coord[i], 0, lod, 0);
}
+ /* the array coord for cube arrays needs 0.5 added to it */
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array &&
+ opc != OPC_ISAML)
+ coord[3] = ir3_ADD_F(b, coord[3], 0, create_immed(b, fui(0.5)), 0);
+
/*
* lay out the first argument in the proper order:
* - actual coordinates first
@@ -1759,6 +1764,12 @@ emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex)
tex_info(tex, &flags, &coords);
+ /* Actually we want the number of dimensions, not coordinates. This
+ * distinction only matters for cubes.
+ */
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
+ coords = 2;
+
dst = get_dst(ctx, &tex->dest, 4);
compile_assert(ctx, tex->num_srcs == 1);
@@ -2301,7 +2312,7 @@ emit_instructions(struct ir3_compile *ctx)
ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs);
/* Create inputs in first block: */
- ctx->block = get_block(ctx, fxn->start_block);
+ ctx->block = get_block(ctx, nir_start_block(fxn));
ctx->in_block = ctx->block;
list_addtail(&ctx->block->node, &ctx->ir->block_list);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
index dc9e462..bed7b7b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
@@ -29,6 +29,7 @@
#include "ir3_nir.h"
#include "glsl/nir/nir_builder.h"
+#include "glsl/nir/nir_control_flow.h"
/* Based on nir_opt_peephole_select, and hacked up to more aggressively
* flatten anything that can be flattened
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index eaf3b3c..8801839 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -189,7 +189,7 @@ ir3_ra_alloc_reg_set(void *memctx)
}
/* allocate the reg-set.. */
- set->regs = ra_alloc_reg_set(set, ra_reg_count);
+ set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c
index 24e0156..b2a639c 100644
--- a/src/gallium/drivers/i915/i915_surface.c
+++ b/src/gallium/drivers/i915/i915_surface.c
@@ -120,7 +120,8 @@ i915_surface_copy_render(struct pipe_context *pipe,
util_blitter_blit_generic(i915->blitter, dst_view, &dstbox,
src_view, src_box, src_width0, src_height0,
- PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
+ PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+ FALSE);
return;
fallback:
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 3fae3bc..9346ea3 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -121,7 +121,8 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_target_nv50.cpp \
codegen/nv50_ir_target_nv50.h \
codegen/nv50_ir_util.cpp \
- codegen/nv50_ir_util.h
+ codegen/nv50_ir_util.h \
+ codegen/unordered_set.h
NVC0_CODEGEN_SOURCES := \
codegen/nv50_ir_emit_gk110.cpp \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 3ddaeaf..ba1b085 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -29,8 +29,8 @@
#include <deque>
#include <list>
#include <vector>
-#include <tr1/unordered_set>
+#include "codegen/unordered_set.h"
#include "codegen/nv50_ir_util.h"
#include "codegen/nv50_ir_graph.h"
@@ -585,10 +585,10 @@ public:
static inline Value *get(Iterator&);
- std::tr1::unordered_set<ValueRef *> uses;
+ unordered_set<ValueRef *> uses;
std::list<ValueDef *> defs;
- typedef std::tr1::unordered_set<ValueRef *>::iterator UseIterator;
- typedef std::tr1::unordered_set<ValueRef *>::const_iterator UseCIterator;
+ typedef unordered_set<ValueRef *>::iterator UseIterator;
+ typedef unordered_set<ValueRef *>::const_iterator UseCIterator;
typedef std::list<ValueDef *>::iterator DefIterator;
typedef std::list<ValueDef *>::const_iterator DefCIterator;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index f06056f..8f15429 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -933,6 +933,7 @@ CodeEmitterGK110::emitCVT(const Instruction *i)
code[0] |= typeSizeofLog2(dType) << 10;
code[0] |= typeSizeofLog2(i->sType) << 12;
+ code[1] |= i->subOp << 12;
if (isSignedIntType(dType))
code[0] |= 0x4000;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index ef5c87d..6e22788 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -818,6 +818,7 @@ CodeEmitterGM107::emitI2F()
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
emitCC (0x2f);
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
+ emitField(0x29, 2, insn->subOp);
emitRND (0x27, rnd, -1);
emitField(0x0d, 1, isSignedType(insn->sType));
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
@@ -850,6 +851,7 @@ CodeEmitterGM107::emitI2I()
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
emitCC (0x2f);
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
+ emitField(0x29, 2, insn->subOp);
emitField(0x0d, 1, isSignedType(insn->sType));
emitField(0x0c, 1, isSignedType(insn->dType));
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index f607f3b..6bf5219 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1020,6 +1020,10 @@ CodeEmitterNVC0::emitCVT(Instruction *i)
code[0] |= util_logbase2(typeSizeof(dType)) << 20;
code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
+ // for 8/16 source types, the byte/word is in subOp. word 1 is
+ // represented as 2.
+ code[1] |= i->subOp << 0x17;
+
if (sat)
code[0] |= 0x20;
if (abs)
@@ -2614,11 +2618,12 @@ private:
int imul; // integer MUL to MUL delay 3
} res;
struct ScoreData {
- int r[64];
+ int r[256];
int p[8];
int c;
} rd, wr;
int base;
+ int regs;
void rebase(const int base)
{
@@ -2627,7 +2632,7 @@ private:
return;
this->base = 0;
- for (int i = 0; i < 64; ++i) {
+ for (int i = 0; i < regs; ++i) {
rd.r[i] += delta;
wr.r[i] += delta;
}
@@ -2646,16 +2651,17 @@ private:
res.imul += delta;
res.tex += delta;
}
- void wipe()
+ void wipe(int regs)
{
memset(&rd, 0, sizeof(rd));
memset(&wr, 0, sizeof(wr));
memset(&res, 0, sizeof(res));
+ this->regs = regs;
}
int getLatest(const ScoreData& d) const
{
int max = 0;
- for (int i = 0; i < 64; ++i)
+ for (int i = 0; i < regs; ++i)
if (d.r[i] > max)
max = d.r[i];
for (int i = 0; i < 8; ++i)
@@ -2690,7 +2696,7 @@ private:
}
void setMax(const RegScores *that)
{
- for (int i = 0; i < 64; ++i) {
+ for (int i = 0; i < regs; ++i) {
rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
}
@@ -2711,7 +2717,7 @@ private:
}
void print(int cycle)
{
- for (int i = 0; i < 64; ++i) {
+ for (int i = 0; i < regs; ++i) {
if (rd.r[i] > cycle)
INFO("rd $r%i @ %i\n", i, rd.r[i]);
if (wr.r[i] > cycle)
@@ -2806,9 +2812,10 @@ SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
bool
SchedDataCalculator::visit(Function *func)
{
+ int regs = targ->getFileSize(FILE_GPR) + 1;
scoreBoards.resize(func->cfg.getSize());
for (size_t i = 0; i < scoreBoards.size(); ++i)
- scoreBoards[i].wipe();
+ scoreBoards[i].wipe(regs);
return true;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 4847a0f..f153674 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -2990,9 +2990,15 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
case TGSI_OPCODE_UBFE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
- src1 = fetchSrc(1, c);
- src2 = fetchSrc(2, c);
- mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&
+ tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {
+ src1 = loadImm(NULL, tgsi.getSrc(2).getValueU32(c, info) << 8 |
+ tgsi.getSrc(1).getValueU32(c, info));
+ } else {
+ src1 = fetchSrc(1, c);
+ src2 = fetchSrc(2, c);
+ mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+ }
mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1);
}
break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 1f3fce2..420cc4e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -193,100 +193,16 @@ GM107LoweringPass::visit(Instruction *i)
checkPredicate(i);
switch (i->op) {
- case OP_TEX:
- case OP_TXB:
- case OP_TXL:
- case OP_TXF:
- case OP_TXG:
- return handleTEX(i->asTex());
- case OP_TXD:
- return handleTXD(i->asTex());
- case OP_TXLQ:
- return handleTXLQ(i->asTex());
- case OP_TXQ:
- return handleTXQ(i->asTex());
- case OP_EX2:
- bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
- i->setSrc(0, i->getDef(0));
- break;
- case OP_POW:
- return handlePOW(i);
- case OP_DIV:
- return handleDIV(i);
- case OP_MOD:
- return handleMOD(i);
- case OP_SQRT:
- return handleSQRT(i);
- case OP_EXPORT:
- return handleEXPORT(i);
case OP_PFETCH:
return handlePFETCH(i);
- case OP_EMIT:
- case OP_RESTART:
- return handleOUT(i);
- case OP_RDSV:
- return handleRDSV(i);
- case OP_WRSV:
- return handleWRSV(i);
- case OP_LOAD:
- if (i->src(0).getFile() == FILE_SHADER_INPUT) {
- if (prog->getType() == Program::TYPE_COMPUTE) {
- i->getSrc(0)->reg.file = FILE_MEMORY_CONST;
- i->getSrc(0)->reg.fileIndex = 0;
- } else
- if (prog->getType() == Program::TYPE_GEOMETRY &&
- i->src(0).isIndirect(0)) {
- // XXX: this assumes vec4 units
- Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
- i->getIndirect(0, 0), bld.mkImm(4));
- i->setIndirect(0, 0, ptr);
- i->op = OP_VFETCH;
- } else {
- i->op = OP_VFETCH;
- assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
- }
- } else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
- if (i->src(0).isIndirect(1)) {
- Value *ptr;
- if (i->src(0).isIndirect(0))
- ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(),
- i->getIndirect(0, 1), bld.mkImm(0x1010),
- i->getIndirect(0, 0));
- else
- ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
- i->getIndirect(0, 1), bld.mkImm(16));
- i->setIndirect(0, 1, NULL);
- i->setIndirect(0, 0, ptr);
- i->subOp = NV50_IR_SUBOP_LDC_IS;
- }
- }
- break;
- case OP_ATOM:
- {
- const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL;
- handleATOM(i);
- handleCasExch(i, cctl);
- }
- break;
- case OP_SULDB:
- case OP_SULDP:
- case OP_SUSTB:
- case OP_SUSTP:
- case OP_SUREDB:
- case OP_SUREDP:
- handleSurfaceOpNVE4(i->asTex());
- break;
case OP_DFDX:
case OP_DFDY:
- handleDFDX(i);
- break;
+ return handleDFDX(i);
case OP_POPCNT:
- handlePOPCNT(i);
- break;
+ return handlePOPCNT(i);
default:
- break;
+ return NVC0LoweringPass::visit(i);
}
- return true;
}
} // namespace nv50_ir
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index c3c302d..b1f4065 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -224,7 +224,7 @@ NVC0LegalizePostRA::findFirstUses(
const Instruction *texi,
const Instruction *insn,
std::list<TexUse> &uses,
- std::tr1::unordered_set<const Instruction *>& visited)
+ unordered_set<const Instruction *>& visited)
{
for (int d = 0; insn->defExists(d); ++d) {
Value *v = insn->getDef(d);
@@ -323,7 +323,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
if (!uses)
return false;
for (size_t i = 0; i < texes.size(); ++i) {
- std::tr1::unordered_set<const Instruction *> visited;
+ unordered_set<const Instruction *> visited;
findFirstUses(texes[i], texes[i], uses[i], visited);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 260e101..2ce52e5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -20,8 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <tr1/unordered_set>
-
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_build_util.h"
@@ -73,7 +71,7 @@ private:
inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
void findFirstUses(const Instruction *tex, const Instruction *def,
std::list<TexUse>&,
- std::tr1::unordered_set<const Instruction *>&);
+ unordered_set<const Instruction *>&);
void findOverwritingDefs(const Instruction *tex, Instruction *insn,
const BasicBlock *term,
std::list<TexUse>&);
@@ -111,10 +109,11 @@ protected:
void checkPredicate(Instruction *);
+ virtual bool visit(Instruction *);
+
private:
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
- virtual bool visit(Instruction *);
void readTessCoord(LValue *dst, int c);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index cea96dc..b01ef41 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1023,27 +1023,53 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_AND:
{
- CmpInstruction *cmp = i->getSrc(t)->getInsn()->asCmp();
- if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
- return;
- if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
- return;
- if (imm0.reg.data.f32 != 1.0)
- return;
- if (i->getSrc(t)->getInsn()->dType != TYPE_U32)
- return;
+ Instruction *src = i->getSrc(t)->getInsn();
+ ImmediateValue imm1;
+ if (imm0.reg.data.u32 == 0) {
+ i->op = OP_MOV;
+ i->setSrc(0, new_ImmediateValue(prog, 0u));
+ i->src(0).mod = Modifier(0);
+ i->setSrc(1, NULL);
+ } else if (imm0.reg.data.u32 == ~0U) {
+ i->op = i->src(t).mod.getOp();
+ if (t) {
+ i->setSrc(0, i->getSrc(t));
+ i->src(0).mod = i->src(t).mod;
+ }
+ i->setSrc(1, NULL);
+ } else if (src->asCmp()) {
+ CmpInstruction *cmp = src->asCmp();
+ if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
+ return;
+ if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
+ return;
+ if (imm0.reg.data.f32 != 1.0)
+ return;
+ if (cmp->dType != TYPE_U32)
+ return;
- i->getSrc(t)->getInsn()->dType = TYPE_F32;
- if (i->src(t).mod != Modifier(0)) {
- assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT));
- i->src(t).mod = Modifier(0);
- cmp->setCond = inverseCondCode(cmp->setCond);
- }
- i->op = OP_MOV;
- i->setSrc(s, NULL);
- if (t) {
- i->setSrc(0, i->getSrc(t));
- i->setSrc(t, NULL);
+ cmp->dType = TYPE_F32;
+ if (i->src(t).mod != Modifier(0)) {
+ assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT));
+ i->src(t).mod = Modifier(0);
+ cmp->setCond = inverseCondCode(cmp->setCond);
+ }
+ i->op = OP_MOV;
+ i->setSrc(s, NULL);
+ if (t) {
+ i->setSrc(0, i->getSrc(t));
+ i->setSrc(t, NULL);
+ }
+ } else if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32) &&
+ src->op == OP_SHR &&
+ src->src(1).getImmediate(imm1) &&
+ i->src(t).mod == Modifier(0) &&
+ util_is_power_of_two(imm0.reg.data.u32 + 1)) {
+ // low byte = offset, high byte = width
+ uint32_t ext = (util_last_bit(imm0.reg.data.u32) << 8) | imm1.reg.data.u32;
+ i->op = OP_EXTBF;
+ i->setSrc(0, src->getSrc(0));
+ i->setSrc(1, new_ImmediateValue(prog, ext));
}
}
break;
@@ -1106,6 +1132,84 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->op = OP_MOV;
break;
}
+ case OP_CVT: {
+ Storage res;
+
+ // TODO: handle 64-bit values properly
+ if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8)
+ return;
+
+ // TODO: handle single byte/word extractions
+ if (i->subOp)
+ return;
+
+ bld.setPosition(i, true); /* make sure bld is init'ed */
+
+#define CASE(type, dst, fmin, fmax, imin, imax, umin, umax) \
+ case type: \
+ switch (i->sType) { \
+ case TYPE_F32: \
+ res.data.dst = util_iround(i->saturate ? \
+ CLAMP(imm0.reg.data.f32, fmin, fmax) : \
+ imm0.reg.data.f32); \
+ break; \
+ case TYPE_S32: \
+ res.data.dst = i->saturate ? \
+ CLAMP(imm0.reg.data.s32, imin, imax) : \
+ imm0.reg.data.s32; \
+ break; \
+ case TYPE_U32: \
+ res.data.dst = i->saturate ? \
+ CLAMP(imm0.reg.data.u32, umin, umax) : \
+ imm0.reg.data.u32; \
+ break; \
+ case TYPE_S16: \
+ res.data.dst = i->saturate ? \
+ CLAMP(imm0.reg.data.s16, imin, imax) : \
+ imm0.reg.data.s16; \
+ break; \
+ case TYPE_U16: \
+ res.data.dst = i->saturate ? \
+ CLAMP(imm0.reg.data.u16, umin, umax) : \
+ imm0.reg.data.u16; \
+ break; \
+ default: return; \
+ } \
+ i->setSrc(0, bld.mkImm(res.data.dst)); \
+ break
+
+ switch(i->dType) {
+ CASE(TYPE_U16, u16, 0, UINT16_MAX, 0, UINT16_MAX, 0, UINT16_MAX);
+ CASE(TYPE_S16, s16, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, 0, INT16_MAX);
+ CASE(TYPE_U32, u32, 0, UINT32_MAX, 0, INT32_MAX, 0, UINT32_MAX);
+ CASE(TYPE_S32, s32, INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX, 0, INT32_MAX);
+ case TYPE_F32:
+ switch (i->sType) {
+ case TYPE_F32:
+ res.data.f32 = i->saturate ?
+ CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) :
+ imm0.reg.data.f32;
+ break;
+ case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break;
+ case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break;
+ case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break;
+ case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break;
+ default:
+ return;
+ }
+ i->setSrc(0, bld.mkImm(res.data.f32));
+ break;
+ default:
+ return;
+ }
+#undef CASE
+
+ i->setType(i->dType); /* Remove i->sType, which we don't need anymore */
+ i->op = OP_MOV;
+ i->saturate = 0;
+ i->src(0).mod = Modifier(0); /* Clear the already applied modifier */
+ break;
+ }
default:
return;
}
@@ -1212,7 +1316,8 @@ private:
void handleRCP(Instruction *);
void handleSLCT(Instruction *);
void handleLOGOP(Instruction *);
- void handleCVT(Instruction *);
+ void handleCVT_NEG(Instruction *);
+ void handleCVT_EXTBF(Instruction *);
void handleSUCLAMP(Instruction *);
BuildUtil bld;
@@ -1463,12 +1568,12 @@ AlgebraicOpt::handleLOGOP(Instruction *logop)
// nv50:
// F2I(NEG(I2F(ABS(SET))))
void
-AlgebraicOpt::handleCVT(Instruction *cvt)
+AlgebraicOpt::handleCVT_NEG(Instruction *cvt)
{
+ Instruction *insn = cvt->getSrc(0)->getInsn();
if (cvt->sType != TYPE_F32 ||
cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0))
return;
- Instruction *insn = cvt->getSrc(0)->getInsn();
if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32)
return;
if (insn->src(0).mod != Modifier(0))
@@ -1498,6 +1603,104 @@ AlgebraicOpt::handleCVT(Instruction *cvt)
delete_Instruction(prog, cvt);
}
+// Some shaders extract packed bytes out of words and convert them to
+// e.g. float. The Fermi+ CVT instruction can extract those directly, as can
+// nv50 for word sizes.
+//
+// CVT(EXTBF(x, byte/word))
+// CVT(AND(bytemask, x))
+// CVT(AND(bytemask, SHR(x, 8/16/24)))
+// CVT(SHR(x, 16/24))
+void
+AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
+{
+ Instruction *insn = cvt->getSrc(0)->getInsn();
+ ImmediateValue imm;
+ Value *arg = NULL;
+ unsigned width, offset;
+ if ((cvt->sType != TYPE_U32 && cvt->sType != TYPE_S32) || !insn)
+ return;
+ if (insn->op == OP_EXTBF && insn->src(1).getImmediate(imm)) {
+ width = (imm.reg.data.u32 >> 8) & 0xff;
+ offset = imm.reg.data.u32 & 0xff;
+ arg = insn->getSrc(0);
+
+ if (width != 8 && width != 16)
+ return;
+ if (width == 8 && offset & 0x7)
+ return;
+ if (width == 16 && offset & 0xf)
+ return;
+ } else if (insn->op == OP_AND) {
+ int s;
+ if (insn->src(0).getImmediate(imm))
+ s = 0;
+ else if (insn->src(1).getImmediate(imm))
+ s = 1;
+ else
+ return;
+
+ if (imm.reg.data.u32 == 0xff)
+ width = 8;
+ else if (imm.reg.data.u32 == 0xffff)
+ width = 16;
+ else
+ return;
+
+ arg = insn->getSrc(!s);
+ Instruction *shift = arg->getInsn();
+ offset = 0;
+ if (shift && shift->op == OP_SHR &&
+ shift->sType == cvt->sType &&
+ shift->src(1).getImmediate(imm) &&
+ ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) ||
+ (width == 16 && (imm.reg.data.u32 & 0xf) == 0))) {
+ arg = shift->getSrc(0);
+ offset = imm.reg.data.u32;
+ }
+ } else if (insn->op == OP_SHR &&
+ insn->sType == cvt->sType &&
+ insn->src(1).getImmediate(imm)) {
+ arg = insn->getSrc(0);
+ if (imm.reg.data.u32 == 24) {
+ width = 8;
+ offset = 24;
+ } else if (imm.reg.data.u32 == 16) {
+ width = 16;
+ offset = 16;
+ } else {
+ return;
+ }
+ }
+
+ if (!arg)
+ return;
+
+ // Irrespective of what came earlier, we can undo a shift on the argument
+ // by adjusting the offset.
+ Instruction *shift = arg->getInsn();
+ if (shift && shift->op == OP_SHL &&
+ shift->src(1).getImmediate(imm) &&
+ ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) ||
+ (width == 16 && (imm.reg.data.u32 & 0xf) == 0)) &&
+ imm.reg.data.u32 <= offset) {
+ arg = shift->getSrc(0);
+ offset -= imm.reg.data.u32;
+ }
+
+ // The unpackSnorm lowering still leaves a few shifts behind, but it's too
+ // annoying to detect them.
+
+ if (width == 8) {
+ cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U8 : TYPE_S8;
+ } else {
+ assert(width == 16);
+ cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U16 : TYPE_S16;
+ }
+ cvt->setSrc(0, arg);
+ cvt->subOp = offset >> 3;
+}
+
// SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6)
void
AlgebraicOpt::handleSUCLAMP(Instruction *insn)
@@ -1568,7 +1771,9 @@ AlgebraicOpt::visit(BasicBlock *bb)
handleLOGOP(i);
break;
case OP_CVT:
- handleCVT(i);
+ handleCVT_NEG(i);
+ if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32))
+ handleCVT_EXTBF(i);
break;
case OP_SUCLAMP:
handleSUCLAMP(i);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 78bc97f..0cd21cf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -25,7 +25,6 @@
#include <stack>
#include <limits>
-#include <tr1/unordered_set>
namespace nv50_ir {
@@ -1551,7 +1550,7 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst)
// Keep track of which instructions to delete later. Deleting them
// inside the loop is unsafe since a single instruction may have
// multiple destinations that all need to be spilled (like OP_SPLIT).
- std::tr1::unordered_set<Instruction *> to_del;
+ unordered_set<Instruction *> to_del;
for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
++d) {
@@ -1593,7 +1592,7 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst)
}
}
- for (std::tr1::unordered_set<Instruction *>::const_iterator it = to_del.begin();
+ for (unordered_set<Instruction *>::const_iterator it = to_del.begin();
it != to_del.end(); ++it)
delete_Instruction(func->getProgram(), *it);
}
diff --git a/src/gallium/drivers/nouveau/codegen/unordered_set.h b/src/gallium/drivers/nouveau/codegen/unordered_set.h
new file mode 100644
index 0000000..8ef6d46
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/unordered_set.h
@@ -0,0 +1,48 @@
+#ifndef __NV50_UNORDERED_SET_H__
+#define __NV50_UNORDERED_SET_H__
+
+#if (__cplusplus >= 201103L) || defined(ANDROID)
+#include <unordered_set>
+#else
+#include <tr1/unordered_set>
+#endif
+
+namespace nv50_ir {
+
+#if __cplusplus >= 201103L
+using std::unordered_set;
+#elif !defined(ANDROID)
+using std::tr1::unordered_set;
+#else // Android release before lollipop
+using std::isfinite;
+typedef std::tr1::unordered_set<void *> voidptr_unordered_set;
+
+template <typename V>
+class unordered_set : public voidptr_unordered_set {
+ public:
+ typedef voidptr_unordered_set _base;
+ typedef _base::iterator _biterator;
+ typedef _base::const_iterator const_biterator;
+
+ class iterator : public _biterator {
+ public:
+ iterator(const _biterator & i) : _biterator(i) {}
+ V operator*() const { return reinterpret_cast<V>(*_biterator(*this)); }
+ };
+ class const_iterator : public const_biterator {
+ public:
+ const_iterator(const iterator & i) : const_biterator(i) {}
+ const_iterator(const const_biterator & i) : const_biterator(i) {}
+ const V operator*() const { return reinterpret_cast<const V>(*const_biterator(*this)); }
+ };
+
+ iterator begin() { return _base::begin(); }
+ iterator end() { return _base::end(); }
+ const_iterator begin() const { return _base::begin(); }
+ const_iterator end() const { return _base::end(); }
+};
+#endif
+
+} // namespace nv50_ir
+
+#endif // __NV50_UNORDERED_SET_H__
diff --git a/src/gallium/drivers/nouveau/nouveau_compiler.c b/src/gallium/drivers/nouveau/nouveau_compiler.c
index 8660498..495450b 100644
--- a/src/gallium/drivers/nouveau/nouveau_compiler.c
+++ b/src/gallium/drivers/nouveau/nouveau_compiler.c
@@ -190,6 +190,10 @@ main(int argc, char *argv[])
type = PIPE_SHADER_GEOMETRY;
else if (!strncmp(text, "COMP", 4))
type = PIPE_SHADER_COMPUTE;
+ else if (!strncmp(text, "TESS_CTRL", 9))
+ type = PIPE_SHADER_TESS_CTRL;
+ else if (!strncmp(text, "TESS_EVAL", 9))
+ type = PIPE_SHADER_TESS_EVAL;
else {
_debug_printf("Unrecognized TGSI header\n");
return 1;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 9505a0b..410e631 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -117,7 +117,6 @@ nv50_blend_state_create(struct pipe_context *pipe,
struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj);
int i;
bool emit_common_func = cso->rt[0].blend_enable;
- uint32_t ms;
if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1);
@@ -189,15 +188,6 @@ nv50_blend_state_create(struct pipe_context *pipe,
SB_DATA (so, nv50_colormask(cso->rt[0].colormask));
}
- ms = 0;
- if (cso->alpha_to_coverage)
- ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
- if (cso->alpha_to_one)
- ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
-
- SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
- SB_DATA (so, ms);
-
assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
return so;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 985603d..b304a17 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -1,4 +1,6 @@
+#include "util/u_format.h"
+
#include "nv50/nv50_context.h"
#include "nv50/nv50_defs.xml.h"
@@ -314,6 +316,25 @@ nv50_validate_derived_2(struct nv50_context *nv50)
}
static void
+nv50_validate_derived_3(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ uint32_t ms = 0;
+
+ if ((!fb->nr_cbufs || !fb->cbufs[0] ||
+ !util_format_is_pure_integer(fb->cbufs[0]->format)) && nv50->blend) {
+ if (nv50->blend->pipe.alpha_to_coverage)
+ ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
+ if (nv50->blend->pipe.alpha_to_one)
+ ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
+ }
+
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
+ PUSH_DATA (push, ms);
+}
+
+static void
nv50_validate_clip(struct nv50_context *nv50)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
@@ -474,6 +495,7 @@ static struct state_validate {
{ nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
{ nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
+ { nv50_validate_derived_3, NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER },
{ nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
{ nv50_constbufs_validate, NV50_NEW_CONSTBUF },
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
index cf75d1e..4b1d00c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
@@ -19,7 +19,7 @@
struct nv50_blend_stateobj {
struct pipe_blend_state pipe;
int size;
- uint32_t state[84]; // TODO: allocate less if !independent_blend_enable
+ uint32_t state[82]; // TODO: allocate less if !independent_blend_enable
};
struct nv50_rasterizer_stateobj {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index b1ae016..64348b3 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -68,6 +68,10 @@ nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
return NV50_SURFACE_FORMAT_R16_UNORM;
case 4:
return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+ case 8:
+ return NV50_SURFACE_FORMAT_RGBA16_FLOAT;
+ case 16:
+ return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
default:
return 0;
}
@@ -1003,6 +1007,8 @@ nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
/* zsa state */
BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1);
PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(DEPTH_BOUNDS_EN), 1);
+ PUSH_DATA (push, 0);
BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1);
@@ -1387,18 +1393,24 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
PUSH_DATA (push, info->dst.box.z + i);
} else {
const unsigned z = info->dst.box.z + i;
+ const uint64_t address = dst->base.address +
+ dst->level[info->dst.level].offset +
+ z * dst->layer_stride;
BEGIN_NV04(push, NV50_2D(DST_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, dst->base.address + z * dst->layer_stride);
- PUSH_DATA (push, dst->base.address + z * dst->layer_stride);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
}
if (src->layout_3d) {
/* not possible because of depth tiling */
assert(0);
} else {
const unsigned z = info->src.box.z + i;
+ const uint64_t address = src->base.address +
+ src->level[info->src.level].offset +
+ z * src->layer_stride;
BEGIN_NV04(push, NV50_2D(SRC_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, src->base.address + z * src->layer_stride);
- PUSH_DATA (push, src->base.address + z * src->layer_stride);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
}
BEGIN_NV04(push, NV50_2D(BLIT_SRC_Y_INT), 1); /* trigger */
PUSH_DATA (push, srcy >> 32);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 84f8db6..7a15a11 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -132,6 +132,9 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
pipe_resource_reference(res, NULL);
}
util_dynarray_fini(&nvc0->global_residents);
+
+ if (nvc0->tcp_empty)
+ nvc0->base.pipe.delete_tcs_state(&nvc0->base.pipe, nvc0->tcp_empty);
}
static void
@@ -306,13 +309,6 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
pipe->memory_barrier = nvc0_memory_barrier;
pipe->get_sample_position = nvc0_context_get_sample_position;
- if (!screen->cur_ctx) {
- nvc0->state = screen->save_state;
- screen->cur_ctx = nvc0;
- nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx);
- }
- screen->base.pushbuf->kick_notify = nvc0_default_kick_notify;
-
nvc0_init_query_functions(nvc0);
nvc0_init_surface_functions(nvc0);
nvc0_init_state_functions(nvc0);
@@ -326,6 +322,21 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
/* shader builtin library is per-screen, but we need a context for m2mf */
nvc0_program_library_upload(nvc0);
+ nvc0_program_init_tcp_empty(nvc0);
+ if (!nvc0->tcp_empty)
+ goto out_err;
+ /* set the empty tctl prog on next draw in case one is never set */
+ nvc0->dirty |= NVC0_NEW_TCTLPROG;
+
+ /* now that there are no more opportunities for errors, set the current
+ * context if there isn't already one.
+ */
+ if (!screen->cur_ctx) {
+ nvc0->state = screen->save_state;
+ screen->cur_ctx = nvc0;
+ nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx);
+ }
+ screen->base.pushbuf->kick_notify = nvc0_default_kick_notify;
/* add permanently resident buffers to bufctxts */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index f449942..df1a891 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -128,6 +128,8 @@ struct nvc0_context {
struct nvc0_program *fragprog;
struct nvc0_program *compprog;
+ struct nvc0_program *tcp_empty;
+
struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[6];
uint16_t constbuf_valid[6];
@@ -227,6 +229,7 @@ void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_library_upload(struct nvc0_context *);
uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
uint32_t label);
+void nvc0_program_init_tcp_empty(struct nvc0_context *);
/* nvc0_query.c */
void nvc0_init_query_functions(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 507a250..12f1bb7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -22,6 +22,8 @@
#include "pipe/p_defines.h"
+#include "tgsi/tgsi_ureg.h"
+
#include "nvc0/nvc0_context.h"
#include "codegen/nv50_ir_driver.h"
@@ -799,3 +801,18 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
return prog->code_base + base + syms[i].offset;
return prog->code_base; /* no symbols or symbol not found */
}
+
+void
+nvc0_program_init_tcp_empty(struct nvc0_context *nvc0)
+{
+ struct ureg_program *ureg;
+
+ ureg = ureg_create(TGSI_PROCESSOR_TESS_CTRL);
+ if (!ureg)
+ return;
+
+ ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, 1);
+ ureg_END(ureg);
+
+ nvc0->tcp_empty = ureg_create_shader_and_destroy(ureg, &nvc0->base.pipe);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 8aa127a..8f8ac2d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -148,8 +148,13 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
PUSH_DATA (push, tp->num_gprs);
} else {
- BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
+ tp = nvc0->tcp_empty;
+ /* not a whole lot we can do to handle this failure */
+ if (!nvc0_program_validate(nvc0, tp))
+ assert(!"unable to validate empty tcp");
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
PUSH_DATA (push, 0x20);
+ PUSH_DATA (push, tp->code_base);
}
nvc0_program_update_context_state(nvc0, tp, 1);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 2a33857..ee29912 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -90,7 +90,6 @@ nvc0_blend_state_create(struct pipe_context *pipe,
struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj);
int i;
int r; /* reference */
- uint32_t ms;
uint8_t blend_en = 0;
bool indep_masks = false;
bool indep_funcs = false;
@@ -176,15 +175,6 @@ nvc0_blend_state_create(struct pipe_context *pipe,
}
}
- ms = 0;
- if (cso->alpha_to_coverage)
- ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
- if (cso->alpha_to_one)
- ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
-
- SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
- SB_DATA (so, ms);
-
assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
return so;
}
@@ -234,7 +224,7 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe,
SB_IMMED_3D(so, MULTISAMPLE_ENABLE, cso->multisample);
SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth);
- if (cso->line_smooth)
+ if (cso->line_smooth || cso->multisample)
SB_BEGIN_3D(so, LINE_WIDTH_SMOOTH, 1);
else
SB_BEGIN_3D(so, LINE_WIDTH_ALIASED, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index ce1119c..47bd66d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -1,4 +1,5 @@
+#include "util/u_format.h"
#include "util/u_math.h"
#include "nvc0/nvc0_context.h"
@@ -555,6 +556,25 @@ nvc0_validate_derived_2(struct nvc0_context *nvc0)
}
static void
+nvc0_validate_derived_3(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ uint32_t ms = 0;
+
+ if ((!fb->nr_cbufs || !fb->cbufs[0] ||
+ !util_format_is_pure_integer(fb->cbufs[0]->format)) && nvc0->blend) {
+ if (nvc0->blend->pipe.alpha_to_coverage)
+ ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
+ if (nvc0->blend->pipe.alpha_to_one)
+ ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_CTRL), 1);
+ PUSH_DATA (push, ms);
+}
+
+static void
nvc0_validate_tess_state(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -628,6 +648,7 @@ static struct state_validate {
{ nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
NVC0_NEW_RASTERIZER },
{ nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
+ { nvc0_validate_derived_3, NVC0_NEW_BLEND | NVC0_NEW_FRAMEBUFFER },
{ nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
NVC0_NEW_VERTPROG |
NVC0_NEW_TEVLPROG |
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
index 18fcc12..8bc33c6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
@@ -17,7 +17,7 @@
struct nvc0_blend_stateobj {
struct pipe_blend_state pipe;
int size;
- uint32_t state[72];
+ uint32_t state[70];
};
struct nvc0_rasterizer_stateobj {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 51a6f93..dbdf292 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -887,6 +887,7 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)
/* zsa state */
IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(DEPTH_BOUNDS_EN), 0);
IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0);
IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0);
@@ -1336,18 +1337,24 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
PUSH_DATA (push, info->dst.box.z + i);
} else {
const unsigned z = info->dst.box.z + i;
+ const uint64_t address = dst->base.address +
+ dst->level[info->dst.level].offset +
+ z * dst->layer_stride;
BEGIN_NVC0(push, NVC0_2D(DST_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, dst->base.address + z * dst->layer_stride);
- PUSH_DATA (push, dst->base.address + z * dst->layer_stride);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
}
if (src->layout_3d) {
/* not possible because of depth tiling */
assert(0);
} else {
const unsigned z = info->src.box.z + i;
+ const uint64_t address = src->base.address +
+ src->level[info->src.level].offset +
+ z * src->layer_stride;
BEGIN_NVC0(push, NVC0_2D(SRC_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, src->base.address + z * src->layer_stride);
- PUSH_DATA (push, src->base.address + z * src->layer_stride);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
}
BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_Y_INT), 1); /* trigger */
PUSH_DATA (push, srcy >> 32);
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
index 14f93fb..e8f4087 100644
--- a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
@@ -693,7 +693,8 @@ void rc_init_regalloc_state(struct rc_regalloc_state *s)
};
/* Allocate the main ra data structure */
- s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW);
+ s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW,
+ true);
/* Create the register classes */
for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 6ea8f24..b8cc316 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -667,7 +667,8 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
r300_blitter_begin(r300, R300_COPY);
util_blitter_blit_generic(r300->blitter, dst_view, &dstbox,
src_view, src_box, src_width0, src_height0,
- PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
+ PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+ FALSE);
r300_blitter_end(r300);
pipe_surface_reference(&dst_view, NULL);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index b0002c3..22a0950 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -732,7 +732,8 @@ void r600_resource_copy_region(struct pipe_context *ctx,
r600_blitter_begin(ctx, R600_COPY_TEXTURE);
util_blitter_blit_generic(rctx->blitter, dst_view, &dstbox,
src_view, src_box, src_width0, src_height0,
- PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
+ PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+ FALSE);
r600_blitter_end(ctx);
pipe_surface_reference(&dst_view, NULL);
diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
index 381f06d..fdbe1c0 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -262,7 +262,7 @@ static const struct alu_op_info alu_op_table[] = {
{"PRED_SETNE_PUSH_INT", 2, { 0x4D, 0x4D },{ AF_VS, AF_VS, AF_VS, AF_VS}, AF_PRED_PUSH | AF_CC_NE | AF_INT_CMP },
{"PRED_SETLT_PUSH_INT", 2, { 0x4E, 0x4E },{ AF_VS, AF_VS, AF_VS, AF_VS}, AF_PRED_PUSH | AF_CC_LT | AF_INT_CMP },
{"PRED_SETLE_PUSH_INT", 2, { 0x4F, 0x4F },{ AF_VS, AF_VS, AF_VS, AF_VS}, AF_PRED_PUSH | AF_CC_LE | AF_INT_CMP },
- {"FLT_TO_INT", 1, { 0x6B, 0x50 },{ AF_S, AF_S, AF_VS, AF_VS}, AF_INT_DST | AF_CVT },
+ {"FLT_TO_INT", 1, { 0x6B, 0x50 },{ AF_S, AF_S, AF_V, AF_V}, AF_INT_DST | AF_CVT },
{"BFREV_INT", 1, { -1, 0x51 },{ 0, 0, AF_VS, AF_VS}, AF_INT_DST },
{"ADDC_UINT", 2, { -1, 0x52 },{ 0, 0, AF_VS, AF_VS}, AF_UINT_DST },
{"SUBB_UINT", 2, { -1, 0x53 },{ 0, 0, AF_VS, AF_VS}, AF_UINT_DST },
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 9b66105..384ba80 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -90,7 +90,7 @@
struct r600_context;
struct r600_bytecode;
-struct r600_shader_key;
+union r600_shader_key;
/* This is an atom containing GPU commands that never change.
* This is supposed to be copied directly into the CS. */
@@ -643,7 +643,7 @@ void r600_resource_copy_region(struct pipe_context *ctx,
/* r600_shader.c */
int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_pipe_shader *shader,
- struct r600_shader_key key);
+ union r600_shader_key key);
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 8d1f95a..4c4b600 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -62,7 +62,7 @@ The compiler must issue the source argument to slots z, y, and x
static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
- struct r600_shader_key key);
+ union r600_shader_key key);
static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
@@ -133,7 +133,7 @@ static int store_shader(struct pipe_context *ctx,
int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_pipe_shader *shader,
- struct r600_shader_key key)
+ union r600_shader_key key)
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_pipe_shader_selector *sel = shader->selector;
@@ -141,7 +141,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
- unsigned export_shader = key.vs_as_es;
+ unsigned export_shader = key.vs.as_es;
shader->shader.bc.isa = rctx->isa;
@@ -1802,7 +1802,7 @@ static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
- struct r600_shader_key key)
+ union r600_shader_key key)
{
struct r600_screen *rscreen = rctx->screen;
struct r600_shader *shader = &pipeshader->shader;
@@ -1816,7 +1816,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
unsigned opcode;
int i, j, k, r = 0;
int next_param_base = 0, next_clip_base;
- int max_color_exports = MAX2(key.nr_cbufs, 1);
+ int max_color_exports = MAX2(key.ps.nr_cbufs, 1);
/* Declarations used by llvm code */
bool use_llvm = false;
bool indirect_gprs;
@@ -1830,8 +1830,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.shader = shader;
ctx.native_integers = true;
- shader->vs_as_gs_a = key.vs_as_gs_a;
- shader->vs_as_es = key.vs_as_es;
+ shader->vs_as_gs_a = key.vs.as_gs_a;
+ shader->vs_as_es = key.vs.as_es;
r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
rscreen->has_compressed_msaa_texturing);
@@ -1844,9 +1844,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
shader->processor_type = ctx.type;
ctx.bc->type = shader->processor_type;
- ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+ ring_outputs = key.vs.as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
- if (key.vs_as_es) {
+ if (key.vs.as_es) {
ctx.gs_for_vs = &rctx->gs_shader->current->shader;
} else {
ctx.gs_for_vs = NULL;
@@ -1866,7 +1866,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
shader->nr_ps_color_exports = 0;
shader->nr_ps_max_color_exports = 0;
- shader->two_side = key.color_two_side;
+ shader->two_side = key.ps.color_two_side;
/* register allocations */
/* Values [0,127] correspond to GPR[0..127].
@@ -1970,7 +1970,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
shader->fs_write_all = FALSE;
if (shader->vs_as_gs_a)
- vs_add_primid_output(&ctx, key.vs_prim_id_out);
+ vs_add_primid_output(&ctx, key.vs.prim_id_out);
while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
tgsi_parse_token(&ctx.parse);
@@ -2091,7 +2091,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
radeon_llvm_ctx.stream_outputs = &so;
- radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
+ radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one;
radeon_llvm_ctx.has_compressed_msaa_texturing =
ctx.bc->has_compressed_msaa_texturing;
mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
@@ -2270,7 +2270,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
convert_edgeflag_to_int(&ctx);
if (ring_outputs) {
- if (key.vs_as_es)
+ if (key.vs.as_es)
emit_gs_ring_writes(&ctx, FALSE);
} else {
/* Export output */
@@ -2386,7 +2386,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
j--;
continue;
}
- output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+ output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3;
output[j].array_base = shader->output[i].sid;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
shader->nr_ps_color_exports++;
@@ -2399,7 +2399,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
output[j].swizzle_x = 0;
output[j].swizzle_y = 1;
output[j].swizzle_z = 2;
- output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+ output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3;
output[j].burst_count = 1;
output[j].array_base = k;
output[j].op = CF_OP_EXPORT;
@@ -6151,10 +6151,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
if (r)
return r;
- r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[2]);
+ r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]);
if (r)
return r;
- r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[2], &ctx->src[1]);
+ r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]);
if (r)
return r;
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 5d05c81..927bac5 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -95,13 +95,17 @@ struct r600_shader {
struct r600_shader_array * arrays;
};
-struct r600_shader_key {
- unsigned color_two_side:1;
- unsigned alpha_to_one:1;
- unsigned nr_cbufs:4;
- unsigned vs_as_es:1;
- unsigned vs_as_gs_a:1;
- unsigned vs_prim_id_out:8;
+union r600_shader_key {
+ struct {
+ unsigned nr_cbufs:4;
+ unsigned color_two_side:1;
+ unsigned alpha_to_one:1;
+ } ps;
+ struct {
+ unsigned prim_id_out:8;
+ unsigned as_es:1; /* export shader */
+ unsigned as_gs_a:1;
+ } vs;
};
struct r600_shader_array {
@@ -122,7 +126,7 @@ struct r600_pipe_shader {
unsigned flatshade;
unsigned pa_cl_vs_out_cntl;
unsigned nr_ps_color_outputs;
- struct r600_shader_key key;
+ union r600_shader_key key;
unsigned db_shader_control;
unsigned ps_depth_export;
unsigned enabled_stream_buffers_mask;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index aa4a8d0..a05dd83 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -702,29 +702,39 @@ void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
}
/* Compute the key for the hw shader variant */
-static inline struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
+static inline union r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
struct r600_pipe_shader_selector * sel)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_shader_key key;
+ union r600_shader_key key;
memset(&key, 0, sizeof(key));
- if (sel->type == PIPE_SHADER_FRAGMENT) {
- key.color_two_side = rctx->rasterizer && rctx->rasterizer->two_side;
- key.alpha_to_one = rctx->alpha_to_one &&
- rctx->rasterizer && rctx->rasterizer->multisample_enable &&
- !rctx->framebuffer.cb0_is_integer;
- key.nr_cbufs = rctx->framebuffer.state.nr_cbufs;
- /* Dual-source blending only makes sense with nr_cbufs == 1. */
- if (key.nr_cbufs == 1 && rctx->dual_src_blend)
- key.nr_cbufs = 2;
- } else if (sel->type == PIPE_SHADER_VERTEX) {
- key.vs_as_es = (rctx->gs_shader != NULL);
+ switch (sel->type) {
+ case PIPE_SHADER_VERTEX: {
+ key.vs.as_es = (rctx->gs_shader != NULL);
if (rctx->ps_shader->current->shader.gs_prim_id_input && !rctx->gs_shader) {
- key.vs_as_gs_a = true;
- key.vs_prim_id_out = rctx->ps_shader->current->shader.input[rctx->ps_shader->current->shader.ps_prim_id_input].spi_sid;
+ key.vs.as_gs_a = true;
+ key.vs.prim_id_out = rctx->ps_shader->current->shader.input[rctx->ps_shader->current->shader.ps_prim_id_input].spi_sid;
}
+ break;
+ }
+ case PIPE_SHADER_GEOMETRY:
+ break;
+ case PIPE_SHADER_FRAGMENT: {
+ key.ps.color_two_side = rctx->rasterizer && rctx->rasterizer->two_side;
+ key.ps.alpha_to_one = rctx->alpha_to_one &&
+ rctx->rasterizer && rctx->rasterizer->multisample_enable &&
+ !rctx->framebuffer.cb0_is_integer;
+ key.ps.nr_cbufs = rctx->framebuffer.state.nr_cbufs;
+ /* Dual-source blending only makes sense with nr_cbufs == 1. */
+ if (key.ps.nr_cbufs == 1 && rctx->dual_src_blend)
+ key.ps.nr_cbufs = 2;
+ break;
}
+ default:
+ assert(0);
+ }
+
return key;
}
@@ -734,7 +744,7 @@ static int r600_shader_select(struct pipe_context *ctx,
struct r600_pipe_shader_selector* sel,
bool *dirty)
{
- struct r600_shader_key key;
+ union r600_shader_key key;
struct r600_pipe_shader * shader = NULL;
int r;
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 16ee541..81f3f45 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -209,8 +209,6 @@ static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
static unsigned calc_ctx_size(struct ruvd_decoder *dec)
{
- unsigned width_in_mb, height_in_mb, ctx_size;
-
unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
@@ -223,8 +221,7 @@ static unsigned calc_ctx_size(struct ruvd_decoder *dec)
width = align (width, 16);
height = align (height, 16);
- ctx_size = ((width + 255) / 16)*((height + 255) / 16) * 16 * max_references + 52 * 1024;
- return ctx_size;
+ return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
}
/* calculate size of reference picture buffer */
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 48972bd..b7450b6 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -586,7 +586,8 @@ void si_resource_copy_region(struct pipe_context *ctx,
si_blitter_begin(ctx, SI_COPY);
util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox,
src_view, src_box, src_width0, src_height0,
- PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
+ PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+ FALSE);
si_blitter_end(ctx);
pipe_surface_reference(&dst_view, NULL);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 4288e9b..fa6c15a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2277,7 +2277,7 @@ static void tex_fetch_args(
unsigned sampler_index;
unsigned num_deriv_channels = 0;
bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
- LLVMValueRef res_ptr, samp_ptr;
+ LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
@@ -2293,9 +2293,19 @@ static void tex_fetch_args(
samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
samp_ptr = build_indexed_load_const(si_shader_ctx, samp_ptr, ind_index);
+
+ if (target == TGSI_TEXTURE_2D_MSAA ||
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
+ ind_index = LLVMBuildAdd(gallivm->builder, ind_index,
+ lp_build_const_int32(gallivm,
+ SI_FMASK_TEX_OFFSET), "");
+ fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+ fmask_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+ }
} else {
res_ptr = si_shader_ctx->resources[sampler_index];
samp_ptr = si_shader_ctx->samplers[sampler_index];
+ fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
}
if (target == TGSI_TEXTURE_BUFFER) {
@@ -2493,7 +2503,7 @@ static void tex_fetch_args(
txf_emit_data.dst_type = LLVMVectorType(
LLVMInt32TypeInContext(gallivm->context), 4);
txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count);
- txf_emit_data.args[1] = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
+ txf_emit_data.args[1] = fmask_ptr;
txf_emit_data.args[2] = lp_build_const_int32(gallivm, inst.Texture.Texture);
txf_emit_data.arg_count = 3;
@@ -2524,8 +2534,7 @@ static void tex_fetch_args(
* resource descriptor is 0 (invalid),
*/
LLVMValueRef fmask_desc =
- LLVMBuildBitCast(gallivm->builder,
- si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index],
+ LLVMBuildBitCast(gallivm->builder, fmask_ptr,
LLVMVectorType(uint_bld->elem_type, 8), "");
LLVMValueRef fmask_word1 =
@@ -3973,7 +3982,7 @@ static void si_dump_key(unsigned shader, union si_shader_key *key)
fprintf(stderr, " es_enabled_outputs = 0x%"PRIx64"\n",
key->vs.es_enabled_outputs);
fprintf(stderr, " as_es = %u\n", key->vs.as_es);
- fprintf(stderr, " as_es = %u\n", key->vs.as_ls);
+ fprintf(stderr, " as_ls = %u\n", key->vs.as_ls);
break;
case PIPE_SHADER_TESS_CTRL:
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index 654c46f..3a63af8 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -270,6 +270,7 @@ struct vc4_context {
struct ra_regs *regs;
unsigned int reg_class_any;
+ unsigned int reg_class_a_or_b_or_acc;
unsigned int reg_class_r4_or_a;
unsigned int reg_class_a;
diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index 7978ea1..5b43583 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -143,15 +143,6 @@ qir_opt_algebraic(struct vc4_compile *c)
case QOP_SEL_X_Y_ZC:
case QOP_SEL_X_Y_NS:
case QOP_SEL_X_Y_NC:
- if (qir_reg_equals(inst->src[0], inst->src[1])) {
- /* Turn "dst = (sf == x) ? a : a)" into
- * "dst = a"
- */
- replace_with_mov(c, inst, inst->src[1]);
- progress = true;
- break;
- }
-
if (is_zero(c, inst->src[1])) {
/* Replace references to a 0 uniform value
* with the SEL_X_0 equivalent.
@@ -207,6 +198,7 @@ qir_opt_algebraic(struct vc4_compile *c)
/* FADD(a, FSUB(0, b)) -> FSUB(a, b) */
if (inst->src[1].file == QFILE_TEMP &&
+ c->defs[inst->src[1].index] &&
c->defs[inst->src[1].index]->op == QOP_FSUB) {
struct qinst *fsub = c->defs[inst->src[1].index];
if (is_zero(c, fsub->src[0])) {
@@ -221,6 +213,7 @@ qir_opt_algebraic(struct vc4_compile *c)
/* FADD(FSUB(0, b), a) -> FSUB(a, b) */
if (inst->src[0].file == QFILE_TEMP &&
+ c->defs[inst->src[0].index] &&
c->defs[inst->src[0].index]->op == QOP_FSUB) {
struct qinst *fsub = c->defs[inst->src[0].index];
if (is_zero(c, fsub->src[0])) {
@@ -236,18 +229,20 @@ qir_opt_algebraic(struct vc4_compile *c)
break;
case QOP_FMUL:
- if (replace_x_0_with_0(c, inst, 0) ||
- replace_x_0_with_0(c, inst, 1) ||
- fmul_replace_one(c, inst, 0) ||
- fmul_replace_one(c, inst, 1)) {
+ if (!inst->dst.pack &&
+ (replace_x_0_with_0(c, inst, 0) ||
+ replace_x_0_with_0(c, inst, 1) ||
+ fmul_replace_one(c, inst, 0) ||
+ fmul_replace_one(c, inst, 1))) {
progress = true;
break;
}
break;
case QOP_MUL24:
- if (replace_x_0_with_0(c, inst, 0) ||
- replace_x_0_with_0(c, inst, 1)) {
+ if (!inst->dst.pack &&
+ (replace_x_0_with_0(c, inst, 0) ||
+ replace_x_0_with_0(c, inst, 1))) {
progress = true;
break;
}
@@ -280,6 +275,14 @@ qir_opt_algebraic(struct vc4_compile *c)
}
break;
+ case QOP_RCP:
+ if (is_1f(c, inst->src[0])) {
+ replace_with_mov(c, inst, inst->src[0]);
+ progress = true;
+ break;
+ }
+ break;
+
default:
break;
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index a755de9..fd2539a 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -39,21 +39,27 @@ qir_opt_copy_propagation(struct vc4_compile *c)
{
bool progress = false;
bool debug = false;
- struct qreg *movs = calloc(c->num_temps, sizeof(struct qreg));
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
int index = inst->src[i].index;
if (inst->src[i].file == QFILE_TEMP &&
- (movs[index].file == QFILE_TEMP ||
- movs[index].file == QFILE_UNIF)) {
+ c->defs[index] &&
+ c->defs[index]->op == QOP_MOV &&
+ (c->defs[index]->src[0].file == QFILE_TEMP ||
+ c->defs[index]->src[0].file == QFILE_UNIF)) {
+ /* If it has a pack, it shouldn't be an SSA
+ * def.
+ */
+ assert(!c->defs[index]->dst.pack);
+
if (debug) {
fprintf(stderr, "Copy propagate: ");
qir_dump_inst(c, inst);
fprintf(stderr, "\n");
}
- inst->src[i] = movs[index];
+ inst->src[i] = c->defs[index]->src[0];
if (debug) {
fprintf(stderr, "to: ");
@@ -64,14 +70,6 @@ qir_opt_copy_propagation(struct vc4_compile *c)
progress = true;
}
}
-
- if (inst->op == QOP_MOV &&
- inst->dst.file == QFILE_TEMP &&
- inst->src[0].file != QFILE_VPM) {
- movs[inst->dst.index] = inst->src[0];
- }
}
-
- free(movs);
return progress;
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
index e04f028..f2cdf8f 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
@@ -68,7 +68,7 @@ qir_opt_vpm_writes(struct vc4_compile *c)
continue;
struct qinst *inst = c->defs[temp];
- if (qir_is_multi_instruction(inst))
+ if (!inst || qir_is_multi_instruction(inst))
continue;
if (qir_depends_on_flags(inst) || inst->sf)
@@ -79,22 +79,6 @@ qir_opt_vpm_writes(struct vc4_compile *c)
continue;
}
- /* A QOP_TEX_RESULT destination is r4, so we can't move
- * accesses to it past another QOP_TEX_RESULT which would
- * update it.
- */
- int src;
- for (src = 0; src < qir_get_op_nsrc(inst->op); src++) {
- if (inst->src[src].file == QFILE_TEMP) {
- if (c->defs[inst->src[src].index]->op ==
- QOP_TEX_RESULT) {
- break;
- }
- }
- }
- if (src != qir_get_op_nsrc(inst->op))
- continue;
-
/* Move the generating instruction to the end of the program
* to maintain the order of the VPM writes.
*/
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 13c4721..e002983 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -818,6 +818,72 @@ declare_uniform_range(struct vc4_compile *c, uint32_t start, uint32_t size)
c->ubo_ranges[array_id].used = false;
}
+static bool
+ntq_src_is_only_ssa_def_user(nir_src *src)
+{
+ if (!src->is_ssa)
+ return false;
+
+ if (!list_empty(&src->ssa->if_uses))
+ return false;
+
+ return (src->ssa->uses.next == &src->use_link &&
+ src->ssa->uses.next->next == &src->ssa->uses);
+}
+
+/**
+ * In general, emits a nir_pack_unorm_4x8 as a series of MOVs with the pack
+ * bit set.
+ *
+ * However, as an optimization, it tries to find the instructions generating
+ * the sources to be packed and just emit the pack flag there, if possible.
+ */
+static void
+ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr)
+{
+ struct qreg result = qir_get_temp(c);
+ struct nir_alu_instr *vec4 = NULL;
+
+ /* If packing from a vec4 op (as expected), identify it so that we can
+ * peek back at what generated its sources.
+ */
+ if (instr->src[0].src.is_ssa &&
+ instr->src[0].src.ssa->parent_instr->type == nir_instr_type_alu &&
+ nir_instr_as_alu(instr->src[0].src.ssa->parent_instr)->op ==
+ nir_op_vec4) {
+ vec4 = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
+ }
+
+ for (int i = 0; i < 4; i++) {
+ int swiz = instr->src[0].swizzle[i];
+ struct qreg src;
+ if (vec4) {
+ src = ntq_get_src(c, vec4->src[swiz].src,
+ vec4->src[swiz].swizzle[0]);
+ } else {
+ src = ntq_get_src(c, instr->src[0].src, swiz);
+ }
+
+ if (vec4 &&
+ ntq_src_is_only_ssa_def_user(&vec4->src[swiz].src) &&
+ src.file == QFILE_TEMP &&
+ c->defs[src.index] &&
+ qir_is_mul(c->defs[src.index]) &&
+ !c->defs[src.index]->dst.pack) {
+ struct qinst *rewrite = c->defs[src.index];
+ c->defs[src.index] = NULL;
+ rewrite->dst = result;
+ rewrite->dst.pack = QPU_PACK_MUL_8A + i;
+ continue;
+ }
+
+ qir_PACK_8_F(c, result, src, i);
+ }
+
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+ *dest = result;
+}
+
static void
ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
{
@@ -839,17 +905,7 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
}
if (instr->op == nir_op_pack_unorm_4x8) {
- struct qreg result;
- for (int i = 0; i < 4; i++) {
- struct qreg src = ntq_get_src(c, instr->src[0].src,
- instr->src[0].swizzle[i]);
- if (i == 0)
- result = qir_PACK_8888_F(c, src);
- else
- result = qir_PACK_8_F(c, result, src, i);
- }
- struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
- *dest = result;
+ ntq_emit_pack_unorm_4x8(c, instr);
return;
}
@@ -1130,20 +1186,24 @@ emit_frag_end(struct vc4_compile *c)
static void
emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
{
- struct qreg xyi[2];
+ struct qreg packed = qir_get_temp(c);
for (int i = 0; i < 2; i++) {
struct qreg scale =
qir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
- xyi[i] = qir_FTOI(c, qir_FMUL(c,
- qir_FMUL(c,
- c->outputs[c->output_position_index + i],
- scale),
- rcp_w));
+ struct qreg packed_chan = packed;
+ packed_chan.pack = QPU_PACK_A_16A + i;
+
+ qir_FTOI_dest(c, packed_chan,
+ qir_FMUL(c,
+ qir_FMUL(c,
+ c->outputs[c->output_position_index + i],
+ scale),
+ rcp_w));
}
- qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
+ qir_VPM_WRITE(c, packed);
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 254140a..9d93071 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -71,12 +71,11 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_RSQ] = { "rsq", 1, 1, false, true },
[QOP_EXP2] = { "exp2", 1, 2, false, true },
[QOP_LOG2] = { "log2", 1, 2, false, true },
- [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1, false, true },
- [QOP_PACK_8A_F] = { "pack_8a_f", 1, 2, false, true },
- [QOP_PACK_8B_F] = { "pack_8b_f", 1, 2, false, true },
- [QOP_PACK_8C_F] = { "pack_8c_f", 1, 2, false, true },
- [QOP_PACK_8D_F] = { "pack_8d_f", 1, 2, false, true },
- [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
+ [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1 },
+ [QOP_PACK_8A_F] = { "pack_8a_f", 1, 1 },
+ [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 },
+ [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 },
+ [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 },
[QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
[QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
@@ -169,6 +168,18 @@ qir_is_multi_instruction(struct qinst *inst)
}
bool
+qir_is_mul(struct qinst *inst)
+{
+ switch (inst->op) {
+ case QOP_FMUL:
+ case QOP_MUL24:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
qir_is_tex(struct qinst *inst)
{
return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT;
@@ -273,6 +284,14 @@ qir_dump_inst(struct vc4_compile *c, struct qinst *inst)
inst->sf ? ".sf" : "");
qir_print_reg(c, inst->dst, true);
+ if (inst->dst.pack) {
+ if (inst->dst.pack) {
+ if (qir_is_mul(inst))
+ vc4_qpu_disasm_pack_mul(stderr, inst->dst.pack);
+ else
+ vc4_qpu_disasm_pack_a(stderr, inst->dst.pack);
+ }
+ }
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
fprintf(stderr, ", ");
qir_print_reg(c, inst->src[i], false);
@@ -348,7 +367,7 @@ qir_emit(struct vc4_compile *c, struct qinst *inst)
if (inst->dst.file == QFILE_TEMP)
c->defs[inst->dst.index] = inst;
- list_addtail(&inst->link, &c->instructions);
+ qir_emit_nodef(c, inst);
}
bool
@@ -389,8 +408,11 @@ qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst)
struct qreg
qir_follow_movs(struct vc4_compile *c, struct qreg reg)
{
- while (reg.file == QFILE_TEMP && c->defs[reg.index]->op == QOP_MOV)
+ while (reg.file == QFILE_TEMP &&
+ c->defs[reg.index] &&
+ c->defs[reg.index]->op == QOP_MOV) {
reg = c->defs[reg.index]->src[0];
+ }
return reg;
}
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index cade795..a2b21fa 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -58,6 +58,7 @@ enum qfile {
struct qreg {
enum qfile file;
uint32_t index;
+ int pack;
};
enum qop {
@@ -104,7 +105,6 @@ enum qop {
QOP_LOG2,
QOP_VW_SETUP,
QOP_VR_SETUP,
- QOP_PACK_SCALED,
QOP_PACK_8888_F,
QOP_PACK_8A_F,
QOP_PACK_8B_F,
@@ -444,13 +444,20 @@ struct qreg qir_uniform(struct vc4_compile *c,
enum quniform_contents contents,
uint32_t data);
void qir_reorder_uniforms(struct vc4_compile *c);
+
void qir_emit(struct vc4_compile *c, struct qinst *inst);
+static inline void qir_emit_nodef(struct vc4_compile *c, struct qinst *inst)
+{
+ list_addtail(&inst->link, &c->instructions);
+}
+
struct qreg qir_get_temp(struct vc4_compile *c);
int qir_get_op_nsrc(enum qop qop);
bool qir_reg_equals(struct qreg a, struct qreg b);
bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
bool qir_is_multi_instruction(struct qinst *inst);
+bool qir_is_mul(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
@@ -509,6 +516,12 @@ qir_##name(struct vc4_compile *c, struct qreg a) \
struct qreg t = qir_get_temp(c); \
qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \
return t; \
+} \
+static inline void \
+qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \
+ struct qreg a) \
+{ \
+ qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef)); \
}
#define QIR_ALU2(name) \
@@ -518,6 +531,12 @@ qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \
struct qreg t = qir_get_temp(c); \
qir_emit(c, qir_inst(QOP_##name, t, a, b)); \
return t; \
+} \
+static inline void \
+qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \
+ struct qreg a, struct qreg b) \
+{ \
+ qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, b)); \
}
#define QIR_NODST_1(name) \
@@ -534,6 +553,14 @@ qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \
qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \
}
+#define QIR_PACK(name) \
+static inline struct qreg \
+qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a) \
+{ \
+ qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef)); \
+ return dest; \
+}
+
QIR_ALU1(MOV)
QIR_ALU2(FADD)
QIR_ALU2(FSUB)
@@ -570,12 +597,11 @@ QIR_ALU1(RCP)
QIR_ALU1(RSQ)
QIR_ALU1(EXP2)
QIR_ALU1(LOG2)
-QIR_ALU2(PACK_SCALED)
QIR_ALU1(PACK_8888_F)
-QIR_ALU2(PACK_8A_F)
-QIR_ALU2(PACK_8B_F)
-QIR_ALU2(PACK_8C_F)
-QIR_ALU2(PACK_8D_F)
+QIR_PACK(PACK_8A_F)
+QIR_PACK(PACK_8B_F)
+QIR_PACK(PACK_8C_F)
+QIR_PACK(PACK_8D_F)
QIR_ALU1(VARY_ADD_C)
QIR_NODST_2(TEX_S)
QIR_NODST_2(TEX_T)
@@ -627,11 +653,12 @@ qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
}
static inline struct qreg
-qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
+qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan)
{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
- return t;
+ qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef));
+ if (dest.file == QFILE_TEMP)
+ c->defs[dest.index] = NULL;
+ return dest;
}
static inline struct qreg
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h
index fbb90ba..0719d28 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -24,6 +24,7 @@
#ifndef VC4_QPU_H
#define VC4_QPU_H
+#include <stdio.h>
#include <stdint.h>
#include "util/u_math.h"
@@ -206,6 +207,12 @@ void
vc4_qpu_disasm(const uint64_t *instructions, int num_instructions);
void
+vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack);
+
+void
+vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack);
+
+void
vc4_qpu_validate(uint64_t *insts, uint32_t num_inst);
#endif /* VC4_QPU_H */
diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
index 00aeb30..0879787 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
@@ -245,6 +245,18 @@ get_special_write_desc(int reg, bool is_a)
return special_write[reg];
}
+void
+vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack)
+{
+ fprintf(out, ".%s", DESC(qpu_pack_mul, pack));
+}
+
+void
+vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack)
+{
+ fprintf(out, "%s", DESC(qpu_pack_a, pack));
+}
+
static void
print_alu_dst(uint64_t inst, bool is_mul)
{
@@ -263,9 +275,9 @@ print_alu_dst(uint64_t inst, bool is_mul)
fprintf(stderr, "%s%d?", file, waddr);
if (is_mul && (inst & QPU_PM)) {
- fprintf(stderr, ".%s", DESC(qpu_pack_mul, pack));
+ vc4_qpu_disasm_pack_mul(stderr, pack);
} else if (is_a && !(inst & QPU_PM)) {
- fprintf(stderr, "%s", DESC(qpu_pack_a, pack));
+ vc4_qpu_disasm_pack_a(stderr, pack);
}
}
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index f324056..adf3a8b 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -179,10 +179,9 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
static const struct {
uint32_t op;
- bool is_mul;
} translate[] = {
-#define A(name) [QOP_##name] = {QPU_A_##name, false}
-#define M(name) [QOP_##name] = {QPU_M_##name, true}
+#define A(name) [QOP_##name] = {QPU_A_##name}
+#define M(name) [QOP_##name] = {QPU_M_##name}
A(FADD),
A(FSUB),
A(FMIN),
@@ -336,28 +335,12 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_PACK_8B_F:
case QOP_PACK_8C_F:
case QOP_PACK_8D_F:
- /* If dst doesn't happen to already contain src[0],
- * then we have to move it in.
- */
- if (qinst->src[0].file != QFILE_NULL &&
- (src[0].mux != dst.mux || src[0].addr != dst.addr)) {
- /* Don't overwrite src1 while setting up
- * the dst!
- */
- if (dst.mux == src[1].mux &&
- dst.addr == src[1].addr) {
- queue(c, qpu_m_MOV(qpu_rb(31), src[1]));
- src[1] = qpu_rb(31);
- }
-
- queue(c, qpu_m_MOV(dst, src[0]));
- }
-
- queue(c, qpu_m_MOV(dst, src[1]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A +
- qinst->op - QOP_PACK_8A_F,
- QPU_PACK);
+ queue(c,
+ qpu_m_MOV(dst, src[0]) |
+ QPU_PM |
+ QPU_SET_FIELD(QPU_PACK_MUL_8A +
+ qinst->op - QOP_PACK_8A_F,
+ QPU_PACK));
break;
case QOP_FRAG_X:
@@ -419,24 +402,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_a_FADD(dst, src[0], qpu_r5()));
break;
- case QOP_PACK_SCALED: {
- uint64_t a = (qpu_a_MOV(dst, src[0]) |
- QPU_SET_FIELD(QPU_PACK_A_16A,
- QPU_PACK));
- uint64_t b = (qpu_a_MOV(dst, src[1]) |
- QPU_SET_FIELD(QPU_PACK_A_16B,
- QPU_PACK));
-
- if (dst.mux == src[1].mux && dst.addr == src[1].addr) {
- queue(c, b);
- queue(c, a);
- } else {
- queue(c, a);
- queue(c, b);
- }
- break;
- }
-
case QOP_TEX_S:
case QOP_TEX_T:
case QOP_TEX_R:
@@ -529,14 +494,24 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
fixup_raddr_conflict(c, dst, &src[0], &src[1]);
- if (translate[qinst->op].is_mul) {
+ if (qir_is_mul(qinst)) {
queue(c, qpu_m_alu2(translate[qinst->op].op,
dst,
src[0], src[1]));
+ if (qinst->dst.pack) {
+ *last_inst(c) |= QPU_PM;
+ *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
+ QPU_PACK);
+ }
} else {
queue(c, qpu_a_alu2(translate[qinst->op].op,
dst,
src[0], src[1]));
+ if (qinst->dst.pack) {
+ assert(dst.mux == QPU_MUX_A);
+ *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
+ QPU_PACK);
+ }
}
break;
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index a29db1f..3ced50f 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -113,9 +113,10 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
if (vc4->regs)
return;
- vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs));
+ vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), true);
vc4->reg_class_any = ra_alloc_reg_class(vc4->regs);
+ vc4->reg_class_a_or_b_or_acc = ra_alloc_reg_class(vc4->regs);
vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs);
vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) {
@@ -130,10 +131,12 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
*/
if (vc4_regs[i].mux == QPU_MUX_R4) {
ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
+ ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
continue;
}
ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
+ ra_class_add_reg(vc4->regs, vc4->reg_class_a_or_b_or_acc, i);
}
for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2) {
@@ -177,7 +180,8 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
uint8_t class_bits[c->num_temps];
struct qpu_reg *temp_registers = calloc(c->num_temps,
sizeof(*temp_registers));
- memset(def, 0, sizeof(def));
+ for (int i = 0; i < ARRAY_SIZE(def); i++)
+ def[i] = ~0;
memset(use, 0, sizeof(use));
/* If things aren't ever written (undefined values), just read from
@@ -196,7 +200,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
uint32_t ip = 0;
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
if (inst->dst.file == QFILE_TEMP) {
- def[inst->dst.index] = ip;
+ def[inst->dst.index] = MIN2(ip, def[inst->dst.index]);
use[inst->dst.index] = ip;
}
@@ -267,17 +271,33 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
break;
- case QOP_PACK_SCALED:
- /* The pack flags require an A-file dst register. */
- class_bits[inst->dst.index] &= CLASS_BIT_A;
- break;
-
default:
break;
}
+ if (inst->dst.pack && !qir_is_mul(inst)) {
+ /* The non-MUL pack flags require an A-file dst
+ * register.
+ */
+ class_bits[inst->dst.index] &= CLASS_BIT_A;
+ }
+
if (qir_src_needs_a_file(inst)) {
- class_bits[inst->src[0].index] &= CLASS_BIT_A;
+ switch (inst->op) {
+ case QOP_UNPACK_8A_F:
+ case QOP_UNPACK_8B_F:
+ case QOP_UNPACK_8C_F:
+ case QOP_UNPACK_8D_F:
+ /* Special case: these can be done as R4
+ * unpacks, as well.
+ */
+ class_bits[inst->src[0].index] &= (CLASS_BIT_A |
+ CLASS_BIT_R4);
+ break;
+ default:
+ class_bits[inst->src[0].index] &= CLASS_BIT_A;
+ break;
+ }
}
ip++;
}
@@ -287,9 +307,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
switch (class_bits[i]) {
case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4:
- case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
ra_set_node_class(g, node, vc4->reg_class_any);
break;
+ case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
+ ra_set_node_class(g, node, vc4->reg_class_a_or_b_or_acc);
+ break;
case CLASS_BIT_A | CLASS_BIT_R4:
ra_set_node_class(g, node, vc4->reg_class_r4_or_a);
break;
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 1e493f4..266ebba 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -663,6 +663,7 @@ struct pipe_blit_info
boolean render_condition_enable; /**< whether the blit should honor the
current render condition */
+ boolean alpha_blend; /* dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a) */
};
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 63c3f8e..7c23a27 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -346,6 +346,15 @@ namespace {
// Kernel metadata
+ struct kernel_arg_md {
+ llvm::StringRef type_name;
+ llvm::StringRef access_qual;
+ kernel_arg_md(llvm::StringRef type_name_, llvm::StringRef access_qual_):
+ type_name(type_name_), access_qual(access_qual_) {}
+ };
+
+#if HAVE_LLVM >= 0x0306
+
const llvm::MDNode *
get_kernel_metadata(const llvm::Function *kernel_func) {
auto mod = kernel_func->getParent();
@@ -356,12 +365,8 @@ namespace {
const llvm::MDNode *kernel_node = nullptr;
for (unsigned i = 0; i < kernels_node->getNumOperands(); ++i) {
-#if HAVE_LLVM >= 0x0306
auto func = llvm::mdconst::dyn_extract<llvm::Function>(
-#else
- auto func = llvm::dyn_cast<llvm::Function>(
-#endif
- kernels_node->getOperand(i)->getOperand(0));
+ kernels_node->getOperand(i)->getOperand(0));
if (func == kernel_func) {
kernel_node = kernels_node->getOperand(i);
break;
@@ -387,13 +392,6 @@ namespace {
return node;
}
- struct kernel_arg_md {
- llvm::StringRef type_name;
- llvm::StringRef access_qual;
- kernel_arg_md(llvm::StringRef type_name_, llvm::StringRef access_qual_):
- type_name(type_name_), access_qual(access_qual_) {}
- };
-
std::vector<kernel_arg_md>
get_kernel_arg_md(const llvm::Function *kernel_func) {
auto num_args = kernel_func->getArgumentList().size();
@@ -415,6 +413,17 @@ namespace {
return res;
}
+#else
+
+ std::vector<kernel_arg_md>
+ get_kernel_arg_md(const llvm::Function *kernel_func) {
+ return std::vector<kernel_arg_md>(
+ kernel_func->getArgumentList().size(),
+ kernel_arg_md("", ""));
+ }
+
+#endif // HAVE_LLVM >= 0x0306
+
std::vector<module::argument>
get_kernel_args(const llvm::Module *mod, const std::string &kernel_name,
const clang::LangAS::Map &address_spaces) {
diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c
index c5ffcb1..69e0fa2 100644
--- a/src/gallium/state_trackers/nine/adapter9.c
+++ b/src/gallium/state_trackers/nine/adapter9.c
@@ -545,7 +545,7 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
/*D3DDEVCAPS_RTPATCHES |*/
/*D3DDEVCAPS_RTPATCHHANDLEZERO |*/
/*D3DDEVCAPS_SEPARATETEXTUREMEMORIES |*/
- /*D3DDEVCAPS_TEXTURENONLOCALVIDMEM |*/
+ D3DDEVCAPS_TEXTURENONLOCALVIDMEM |
/* D3DDEVCAPS_TEXTURESYSTEMMEMORY |*/
D3DDEVCAPS_TEXTUREVIDEOMEMORY |
D3DDEVCAPS_TLVERTEXSYSTEMMEMORY |
@@ -561,32 +561,32 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
D3DPMISCCAPS_TSSARGTEMP |
D3DPMISCCAPS_BLENDOP |
D3DPIPECAP(INDEP_BLEND_ENABLE, D3DPMISCCAPS_INDEPENDENTWRITEMASKS) |
- /*D3DPMISCCAPS_PERSTAGECONSTANT |*/
+ /*D3DPMISCCAPS_PERSTAGECONSTANT |*/ /* TODO */
/*D3DPMISCCAPS_POSTBLENDSRGBCONVERT |*/ /* TODO */
D3DPMISCCAPS_FOGANDSPECULARALPHA |
D3DPIPECAP(BLEND_EQUATION_SEPARATE, D3DPMISCCAPS_SEPARATEALPHABLEND) |
D3DPIPECAP(MIXED_COLORBUFFER_FORMATS, D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS) |
D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING |
- /*D3DPMISCCAPS_FOGVERTEXCLAMPED*/0;
+ D3DPMISCCAPS_FOGVERTEXCLAMPED;
if (!screen->get_param(screen, PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION))
pCaps->PrimitiveMiscCaps |= D3DPMISCCAPS_CLIPTLVERTS;
pCaps->RasterCaps =
D3DPIPECAP(ANISOTROPIC_FILTER, D3DPRASTERCAPS_ANISOTROPY) |
- /*D3DPRASTERCAPS_COLORPERSPECTIVE |*/
+ D3DPRASTERCAPS_COLORPERSPECTIVE |
D3DPRASTERCAPS_DITHER |
D3DPRASTERCAPS_DEPTHBIAS |
- /*D3DPRASTERCAPS_FOGRANGE |*/
- /*D3DPRASTERCAPS_FOGTABLE |*/
- /*D3DPRASTERCAPS_FOGVERTEX |*/
+ D3DPRASTERCAPS_FOGRANGE |
+ D3DPRASTERCAPS_FOGTABLE |
+ D3DPRASTERCAPS_FOGVERTEX |
D3DPRASTERCAPS_MIPMAPLODBIAS |
D3DPRASTERCAPS_MULTISAMPLE_TOGGLE |
D3DPRASTERCAPS_SCISSORTEST |
D3DPRASTERCAPS_SLOPESCALEDEPTHBIAS |
/*D3DPRASTERCAPS_WBUFFER |*/
- /*D3DPRASTERCAPS_WFOG |*/
+ D3DPRASTERCAPS_WFOG |
/*D3DPRASTERCAPS_ZBUFFERLESSHSR |*/
- /*D3DPRASTERCAPS_ZFOG |*/
+ D3DPRASTERCAPS_ZFOG |
D3DPRASTERCAPS_ZTEST;
pCaps->ZCmpCaps = D3DPCMPCAPS_NEVER |
@@ -697,15 +697,12 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
pCaps->MaxAnisotropy =
(DWORD)screen->get_paramf(screen, PIPE_CAPF_MAX_TEXTURE_ANISOTROPY);
- pCaps->MaxVertexW = 1.0f; /* XXX */
- pCaps->GuardBandLeft = screen->get_paramf(screen,
- PIPE_CAPF_GUARD_BAND_LEFT);
- pCaps->GuardBandTop = screen->get_paramf(screen,
- PIPE_CAPF_GUARD_BAND_TOP);
- pCaps->GuardBandRight = screen->get_paramf(screen,
- PIPE_CAPF_GUARD_BAND_RIGHT);
- pCaps->GuardBandBottom = screen->get_paramf(screen,
- PIPE_CAPF_GUARD_BAND_BOTTOM);
+ /* Values for GeForce 9600 GT */
+ pCaps->MaxVertexW = 1e10f;
+ pCaps->GuardBandLeft = -1e9f;
+ pCaps->GuardBandTop = -1e9f;
+ pCaps->GuardBandRight = 1e9f;
+ pCaps->GuardBandBottom = 1e9f;
pCaps->ExtentsAdjust = 0.0f;
pCaps->StencilCaps =
@@ -724,8 +721,6 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
/*D3DFVFCAPS_DONOTSTRIPELEMENTS |*/
D3DFVFCAPS_PSIZE;
- /* XXX: Some of these are probably not in SM2.0 so cap them when I figure
- * them out. For now leave them all enabled. */
pCaps->TextureOpCaps = D3DTEXOPCAPS_DISABLE |
D3DTEXOPCAPS_SELECTARG1 |
D3DTEXOPCAPS_SELECTARG2 |
@@ -796,7 +791,8 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This,
pCaps->MaxVertexShaderConst = NINE_MAX_CONST_F;
pCaps->PixelShaderVersion = D3DPS_VERSION(3,0);
- pCaps->PixelShader1xMaxValue = 8.0f; /* XXX: wine */
+ /* Value for GeForce 9600 GT */
+ pCaps->PixelShader1xMaxValue = 65504.f;
pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET |
D3DDEVCAPS2_VERTEXELEMENTSCANSHARESTREAMOFFSET |
diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c
index 17a8f44..d13138b 100644
--- a/src/gallium/state_trackers/nine/basetexture9.c
+++ b/src/gallium/state_trackers/nine/basetexture9.c
@@ -57,7 +57,8 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This,
user_assert(!(Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL)) ||
Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
user_assert(!(Usage & D3DUSAGE_DYNAMIC) ||
- Pool != D3DPOOL_MANAGED, D3DERR_INVALIDCALL);
+ !(Pool == D3DPOOL_MANAGED ||
+ Pool == D3DPOOL_SCRATCH), D3DERR_INVALIDCALL);
hr = NineResource9_ctor(&This->base, pParams, initResource, alloc, Type, Pool, Usage);
if (FAILED(hr))
@@ -85,6 +86,9 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This,
util_format_has_depth(util_format_description(This->base.info.format));
list_inithead(&This->list);
+ list_inithead(&This->list2);
+ if (Pool == D3DPOOL_MANAGED)
+ list_add(&This->list2, &This->base.base.device->managed_textures);
return D3D_OK;
}
@@ -98,7 +102,9 @@ NineBaseTexture9_dtor( struct NineBaseTexture9 *This )
pipe_sampler_view_reference(&This->view[1], NULL);
if (This->list.prev != NULL && This->list.next != NULL)
- list_del(&This->list),
+ list_del(&This->list);
+ if (This->list2.prev != NULL && This->list2.next != NULL)
+ list_del(&This->list2);
NineResource9_dtor(&This->base);
}
@@ -153,6 +159,8 @@ NineBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This,
user_assert(FilterType != D3DTEXF_NONE, D3DERR_INVALIDCALL);
This->mipfilter = FilterType;
+ This->dirty_mip = TRUE;
+ NineBaseTexture9_GenerateMipSubLevels(This);
return D3D_OK;
}
@@ -310,14 +318,12 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This )
tex->dirty_box.width, tex->dirty_box.height, tex->dirty_box.depth);
if (tex->dirty_box.width) {
- for (l = 0; l <= last_level; ++l) {
+ for (l = min_level_dirty; l <= last_level; ++l) {
u_box_minify_2d(&box, &tex->dirty_box, l);
- NineVolume9_AddDirtyRegion(tex->volumes[l], &tex->dirty_box);
+ NineVolume9_UploadSelf(tex->volumes[l], &box);
}
memset(&tex->dirty_box, 0, sizeof(tex->dirty_box));
}
- for (l = min_level_dirty; l <= last_level; ++l)
- NineVolume9_UploadSelf(tex->volumes[l]);
} else {
assert(!"invalid texture type");
}
@@ -361,8 +367,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This )
box.width = u_minify(This->base.info.width0, l);
box.height = u_minify(This->base.info.height0, l);
box.depth = u_minify(This->base.info.depth0, l);
- NineVolume9_AddDirtyRegion(tex->volumes[l], &box);
- NineVolume9_UploadSelf(tex->volumes[l]);
+ NineVolume9_UploadSelf(tex->volumes[l], &box);
}
} else {
assert(!"invalid texture type");
@@ -381,8 +386,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This )
void WINAPI
NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This )
{
- struct pipe_resource *resource = This->base.resource;
-
+ struct pipe_resource *resource;
unsigned base_level = 0;
unsigned last_level = This->base.info.last_level - This->managed.lod;
unsigned first_layer = 0;
@@ -405,6 +409,8 @@ NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This )
last_layer = util_max_layer(This->view[0]->texture, base_level);
+ resource = This->base.resource;
+
util_gen_mipmap(This->pipe, resource,
resource->format, base_level, last_level,
first_layer, last_layer, filter);
@@ -530,6 +536,11 @@ NineBaseTexture9_UpdateSamplerView( struct NineBaseTexture9 *This,
swizzle[2] = PIPE_SWIZZLE_RED;
swizzle[3] = PIPE_SWIZZLE_RED;
}
+ } else if (resource->format == PIPE_FORMAT_RGTC2_UNORM) {
+ swizzle[0] = PIPE_SWIZZLE_GREEN;
+ swizzle[1] = PIPE_SWIZZLE_RED;
+ swizzle[2] = PIPE_SWIZZLE_ONE;
+ swizzle[3] = PIPE_SWIZZLE_ONE;
} else if (resource->format != PIPE_FORMAT_A8_UNORM &&
resource->format != PIPE_FORMAT_RGTC1_UNORM) {
/* exceptions:
@@ -578,6 +589,21 @@ NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This )
NineBaseTexture9_UploadSelf(This);
}
+void
+NineBaseTexture9_UnLoad( struct NineBaseTexture9 *This )
+{
+ if (This->base.pool != D3DPOOL_MANAGED ||
+ This->managed.lod_resident == -1)
+ return;
+
+ pipe_resource_reference(&This->base.resource, NULL);
+ This->managed.lod_resident = -1;
+ This->managed.dirty = TRUE;
+
+ /* If the texture is bound, we have to re-upload it */
+ BASETEX_REGISTER_UPDATE(This);
+}
+
#ifdef DEBUG
void
NineBaseTexture9_Dump( struct NineBaseTexture9 *This )
diff --git a/src/gallium/state_trackers/nine/basetexture9.h b/src/gallium/state_trackers/nine/basetexture9.h
index 9d6fb0c..b19a621 100644
--- a/src/gallium/state_trackers/nine/basetexture9.h
+++ b/src/gallium/state_trackers/nine/basetexture9.h
@@ -30,7 +30,8 @@
struct NineBaseTexture9
{
struct NineResource9 base;
- struct list_head list;
+ struct list_head list; /* for update_textures */
+ struct list_head list2; /* for managed_textures */
/* g3d */
struct pipe_context *pipe;
@@ -94,6 +95,9 @@ NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This );
void WINAPI
NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This );
+void
+NineBaseTexture9_UnLoad( struct NineBaseTexture9 *This );
+
/* For D3DPOOL_MANAGED only (after SetLOD change): */
HRESULT
NineBaseTexture9_CreatePipeResource( struct NineBaseTexture9 *This,
diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c
index edea1f2..abba263 100644
--- a/src/gallium/state_trackers/nine/cubetexture9.c
+++ b/src/gallium/state_trackers/nine/cubetexture9.c
@@ -43,7 +43,7 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
struct pipe_screen *screen = pParams->device->screen;
enum pipe_format pf;
unsigned i, l, f, offset, face_size = 0;
- unsigned *level_offsets;
+ unsigned *level_offsets = NULL;
D3DSURFACE_DESC sfdesc;
void *p;
HRESULT hr;
@@ -70,6 +70,13 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2)
return D3DERR_INVALIDCALL;
+ if (compressed_format(Format)) {
+ const unsigned w = util_format_get_blockwidth(pf);
+ const unsigned h = util_format_get_blockheight(pf);
+
+ user_assert(!(EdgeLength % w) && !(EdgeLength % h), D3DERR_INVALIDCALL);
+ }
+
info->screen = pParams->device->screen;
info->target = PIPE_TEXTURE_CUBE;
info->format = pf;
@@ -106,7 +113,7 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
face_size = nine_format_get_size_and_offsets(pf, level_offsets,
EdgeLength, EdgeLength,
info->last_level);
- This->managed_buffer = MALLOC(6 * face_size);
+ This->managed_buffer = align_malloc(6 * face_size, 32);
if (!This->managed_buffer)
return E_OUTOFMEMORY;
}
@@ -150,8 +157,12 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This,
}
}
- for (i = 0; i < 6; ++i) /* width = 0 means empty, depth stays 1 */
+ for (i = 0; i < 6; ++i) {
+ /* Textures start initially dirty */
+ This->dirty_rect[i].width = EdgeLength;
+ This->dirty_rect[i].height = EdgeLength;
This->dirty_rect[i].depth = 1;
+ }
return D3D_OK;
}
@@ -259,13 +270,17 @@ NineCubeTexture9_AddDirtyRect( struct NineCubeTexture9 *This,
user_assert(FaceType < 6, D3DERR_INVALIDCALL);
if (This->base.base.pool != D3DPOOL_MANAGED) {
- if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP)
+ if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) {
This->base.dirty_mip = TRUE;
+ BASETEX_REGISTER_UPDATE(&This->base);
+ }
return D3D_OK;
}
- This->base.managed.dirty = TRUE;
- BASETEX_REGISTER_UPDATE(&This->base);
+ if (This->base.base.pool == D3DPOOL_MANAGED) {
+ This->base.managed.dirty = TRUE;
+ BASETEX_REGISTER_UPDATE(&This->base);
+ }
if (!pDirtyRect) {
u_box_origin_2d(This->base.base.info.width0,
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 55948cb..99197a4 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -119,48 +119,6 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf);
}
-void
-NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask )
-{
- struct pipe_context *pipe = This->pipe;
-
- DBG("This=%p mask=%u\n", This, mask);
-
- if (mask & 0x1) {
- struct pipe_constant_buffer cb;
- cb.buffer_offset = 0;
-
- if (This->prefer_user_constbuf) {
- cb.buffer = NULL;
- cb.user_buffer = This->state.vs_const_f;
- } else {
- cb.buffer = This->constbuf_vs;
- cb.user_buffer = NULL;
- }
- cb.buffer_size = This->vs_const_size;
- pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
-
- if (This->prefer_user_constbuf) {
- cb.user_buffer = This->state.ps_const_f;
- } else {
- cb.buffer = This->constbuf_ps;
- }
- cb.buffer_size = This->ps_const_size;
- pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
- }
-
- if (mask & 0x2) {
- struct pipe_poly_stipple stipple;
- memset(&stipple, ~0, sizeof(stipple));
- pipe->set_polygon_stipple(pipe, &stipple);
- }
-
- This->state.changed.group = NINE_STATE_ALL;
- This->state.changed.vtxbuf = (1ULL << This->caps.MaxStreams) - 1;
- This->state.changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1;
- This->state.changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK;
-}
-
#define GET_PCAP(n) pScreen->get_param(pScreen, PIPE_CAP_##n)
HRESULT
NineDevice9_ctor( struct NineDevice9 *This,
@@ -186,6 +144,7 @@ NineDevice9_ctor( struct NineDevice9 *This,
if (FAILED(hr)) { return hr; }
list_inithead(&This->update_textures);
+ list_inithead(&This->managed_textures);
This->screen = pScreen;
This->caps = *pCaps;
@@ -341,16 +300,19 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+ This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
if (!This->state.vs_const_f || !This->state.ps_const_f ||
- !This->state.vs_lconstf_temp)
+ !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp)
return E_OUTOFMEMORY;
if (strstr(pScreen->get_name(pScreen), "AMD") ||
strstr(pScreen->get_name(pScreen), "ATI")) {
- This->prefer_user_constbuf = TRUE;
This->driver_bugs.buggy_barycentrics = TRUE;
}
+ /* Disable NV path for now, needs some fixes */
+ This->prefer_user_constbuf = TRUE;
+
tmpl.target = PIPE_BUFFER;
tmpl.format = PIPE_FORMAT_R8_UNORM;
tmpl.height0 = 1;
@@ -376,6 +338,8 @@ NineDevice9_ctor( struct NineDevice9 *This,
{
struct pipe_resource tmplt;
struct pipe_sampler_view templ;
+ struct pipe_sampler_state samp;
+ memset(&samp, 0, sizeof(samp));
tmplt.target = PIPE_TEXTURE_2D;
tmplt.width0 = 1;
@@ -404,22 +368,39 @@ NineDevice9_ctor( struct NineDevice9 *This,
templ.swizzle_a = PIPE_SWIZZLE_ONE;
templ.target = This->dummy_texture->target;
- This->dummy_sampler = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ);
- if (!This->dummy_sampler)
+ This->dummy_sampler_view = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ);
+ if (!This->dummy_sampler_view)
return D3DERR_DRIVERINTERNALERROR;
+
+ samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ samp.max_lod = 15.0f;
+ samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ samp.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+ samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ samp.compare_mode = PIPE_TEX_COMPARE_NONE;
+ samp.compare_func = PIPE_FUNC_LEQUAL;
+ samp.normalized_coords = 1;
+ samp.seamless_cube_map = 1;
+ This->dummy_sampler_state = samp;
}
/* Allocate upload helper for drivers that suck (from st pov ;). */
- {
- unsigned bind = 0;
- This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
- This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
+ This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
+ This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
+ This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS);
+
+ if (!This->driver_caps.user_vbufs)
+ This->vertex_uploader = u_upload_create(This->pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER);
+ if (!This->driver_caps.user_ibufs)
+ This->index_uploader = u_upload_create(This->pipe, 128 * 1024, 4, PIPE_BIND_INDEX_BUFFER);
+ if (!This->driver_caps.user_cbufs) {
+ unsigned alignment = GET_PCAP(CONSTANT_BUFFER_OFFSET_ALIGNMENT);
- if (!This->driver_caps.user_vbufs) bind |= PIPE_BIND_VERTEX_BUFFER;
- if (!This->driver_caps.user_ibufs) bind |= PIPE_BIND_INDEX_BUFFER;
- if (bind)
- This->upload = u_upload_create(This->pipe, 1 << 20, 4, bind);
+ This->constbuf_uploader = u_upload_create(This->pipe, This->vs_const_size,
+ alignment, PIPE_BIND_CONSTANT_BUFFER);
}
This->driver_caps.window_space_position_support = GET_PCAP(TGSI_VS_WINDOW_SPACE_POSITION);
@@ -429,10 +410,15 @@ NineDevice9_ctor( struct NineDevice9 *This,
nine_ff_init(This); /* initialize fixed function code */
NineDevice9_SetDefaultState(This, FALSE);
- NineDevice9_RestoreNonCSOState(This, ~0);
+
+ {
+ struct pipe_poly_stipple stipple;
+ memset(&stipple, ~0, sizeof(stipple));
+ This->pipe->set_polygon_stipple(This->pipe, &stipple);
+ }
This->update = &This->state;
- nine_update_state(This, ~0);
+ nine_update_state(This);
ID3DPresentGroup_Release(This->present);
@@ -452,12 +438,16 @@ NineDevice9_dtor( struct NineDevice9 *This )
nine_ff_fini(This);
nine_state_clear(&This->state, TRUE);
- if (This->upload)
- u_upload_destroy(This->upload);
+ if (This->vertex_uploader)
+ u_upload_destroy(This->vertex_uploader);
+ if (This->index_uploader)
+ u_upload_destroy(This->index_uploader);
+ if (This->constbuf_uploader)
+ u_upload_destroy(This->constbuf_uploader);
nine_bind(&This->record, NULL);
- pipe_sampler_view_reference(&This->dummy_sampler, NULL);
+ pipe_sampler_view_reference(&This->dummy_sampler_view, NULL);
pipe_resource_reference(&This->dummy_texture, NULL);
pipe_resource_reference(&This->constbuf_vs, NULL);
pipe_resource_reference(&This->constbuf_ps, NULL);
@@ -465,6 +455,7 @@ NineDevice9_dtor( struct NineDevice9 *This )
FREE(This->state.vs_const_f);
FREE(This->state.ps_const_f);
FREE(This->state.vs_lconstf_temp);
+ FREE(This->state.ps_lconstf_temp);
if (This->swapchains) {
for (i = 0; i < This->nswapchains; ++i)
@@ -547,10 +538,14 @@ NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This )
HRESULT WINAPI
NineDevice9_EvictManagedResources( struct NineDevice9 *This )
{
- /* We don't really need to do anything here, but might want to free up
- * the GPU virtual address space by killing pipe_resources.
- */
- STUB(D3D_OK);
+ struct NineBaseTexture9 *tex;
+
+ DBG("This=%p\n", This);
+ LIST_FOR_EACH_ENTRY(tex, &This->managed_textures, list2) {
+ NineBaseTexture9_UnLoad(tex);
+ }
+
+ return D3D_OK;
}
HRESULT WINAPI
@@ -599,11 +594,11 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
UINT YHotSpot,
IDirect3DSurface9 *pCursorBitmap )
{
- /* TODO: hardware cursor */
struct NineSurface9 *surf = NineSurface9(pCursorBitmap);
struct pipe_context *pipe = This->pipe;
struct pipe_box box;
struct pipe_transfer *transfer;
+ BOOL hw_cursor;
void *ptr;
DBG_FLAG(DBG_SWAPCHAIN, "This=%p XHotSpot=%u YHotSpot=%u "
@@ -611,8 +606,15 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
user_assert(pCursorBitmap, D3DERR_INVALIDCALL);
- This->cursor.w = MIN2(surf->desc.Width, This->cursor.image->width0);
- This->cursor.h = MIN2(surf->desc.Height, This->cursor.image->height0);
+ if (This->swapchains[0]->params.Windowed) {
+ This->cursor.w = MIN2(surf->desc.Width, 32);
+ This->cursor.h = MIN2(surf->desc.Height, 32);
+ hw_cursor = 1; /* always use hw cursor for windowed mode */
+ } else {
+ This->cursor.w = MIN2(surf->desc.Width, This->cursor.image->width0);
+ This->cursor.h = MIN2(surf->desc.Height, This->cursor.image->height0);
+ hw_cursor = This->cursor.w == 32 && This->cursor.h == 32;
+ }
u_box_origin_2d(This->cursor.w, This->cursor.h, &box);
@@ -643,16 +645,21 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This,
lock.pBits, lock.Pitch,
This->cursor.w, This->cursor.h);
- if (!This->cursor.software &&
- This->cursor.w == 32 && This->cursor.h == 32)
- ID3DPresent_SetCursor(This->swapchains[0]->present,
- lock.pBits, &This->cursor.hotspot,
- This->cursor.visible);
+ if (hw_cursor)
+ hw_cursor = ID3DPresent_SetCursor(This->swapchains[0]->present,
+ lock.pBits,
+ &This->cursor.hotspot,
+ This->cursor.visible) == D3D_OK;
NineSurface9_UnlockRect(surf);
}
pipe->transfer_unmap(pipe, transfer);
+ /* hide cursor if we emulate it */
+ if (!hw_cursor)
+ ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, FALSE);
+ This->cursor.software = !hw_cursor;
+
return D3D_OK;
}
@@ -670,7 +677,7 @@ NineDevice9_SetCursorPosition( struct NineDevice9 *This,
This->cursor.pos.y = Y;
if (!This->cursor.software)
- ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos);
+ This->cursor.software = ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos) != D3D_OK;
}
BOOL WINAPI
@@ -683,7 +690,7 @@ NineDevice9_ShowCursor( struct NineDevice9 *This,
This->cursor.visible = bShow && (This->cursor.hotspot.x != -1);
if (!This->cursor.software)
- ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, bShow);
+ This->cursor.software = ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, bShow) != D3D_OK;
return old;
}
@@ -752,8 +759,8 @@ NineDevice9_Reset( struct NineDevice9 *This,
for (i = 0; i < This->nswapchains; ++i) {
D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i];
hr = NineSwapChain9_Resize(This->swapchains[i], params, NULL);
- if (FAILED(hr))
- return (hr == D3DERR_OUTOFVIDEOMEMORY) ? hr : D3DERR_DEVICELOST;
+ if (hr != D3D_OK)
+ return hr;
}
nine_pipe_context_clear(This);
@@ -1108,6 +1115,13 @@ create_zs_or_rt_surface(struct NineDevice9 *This,
default: break;
}
+ if (compressed_format(Format)) {
+ const unsigned w = util_format_get_blockwidth(templ.format);
+ const unsigned h = util_format_get_blockheight(templ.format);
+
+ user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL);
+ }
+
if (Pool == D3DPOOL_DEFAULT && Format != D3DFMT_NULL) {
/* resource_create doesn't return an error code, so check format here */
user_assert(templ.format != PIPE_FORMAT_NONE, D3DERR_INVALIDCALL);
@@ -1173,6 +1187,8 @@ NineDevice9_UpdateSurface( struct NineDevice9 *This,
{
struct NineSurface9 *dst = NineSurface9(pDestinationSurface);
struct NineSurface9 *src = NineSurface9(pSourceSurface);
+ int copy_width, copy_height;
+ RECT destRect;
DBG("This=%p pSourceSurface=%p pDestinationSurface=%p "
"pSourceRect=%p pDestPoint=%p\n", This,
@@ -1184,13 +1200,75 @@ NineDevice9_UpdateSurface( struct NineDevice9 *This,
if (pDestPoint)
DBG("pDestPoint = (%u,%u)\n", pDestPoint->x, pDestPoint->y);
+ user_assert(dst && src, D3DERR_INVALIDCALL);
+
user_assert(dst->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
user_assert(src->base.pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL);
user_assert(dst->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL);
user_assert(src->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL);
- return NineSurface9_CopySurface(dst, src, pDestPoint, pSourceRect);
+ user_assert(!src->lock_count, D3DERR_INVALIDCALL);
+ user_assert(!dst->lock_count, D3DERR_INVALIDCALL);
+
+ user_assert(dst->desc.Format == src->desc.Format, D3DERR_INVALIDCALL);
+ user_assert(!depth_stencil_format(dst->desc.Format), D3DERR_INVALIDCALL);
+
+ if (pSourceRect) {
+ copy_width = pSourceRect->right - pSourceRect->left;
+ copy_height = pSourceRect->bottom - pSourceRect->top;
+
+ user_assert(pSourceRect->left >= 0 &&
+ copy_width > 0 &&
+ pSourceRect->right <= src->desc.Width &&
+ pSourceRect->top >= 0 &&
+ copy_height > 0 &&
+ pSourceRect->bottom <= src->desc.Height,
+ D3DERR_INVALIDCALL);
+ } else {
+ copy_width = src->desc.Width;
+ copy_height = src->desc.Height;
+ }
+
+ destRect.right = copy_width;
+ destRect.bottom = copy_height;
+
+ if (pDestPoint) {
+ user_assert(pDestPoint->x >= 0 && pDestPoint->y >= 0,
+ D3DERR_INVALIDCALL);
+ destRect.right += pDestPoint->x;
+ destRect.bottom += pDestPoint->y;
+ }
+
+ user_assert(destRect.right <= dst->desc.Width &&
+ destRect.bottom <= dst->desc.Height,
+ D3DERR_INVALIDCALL);
+
+ if (compressed_format(dst->desc.Format)) {
+ const unsigned w = util_format_get_blockwidth(dst->base.info.format);
+ const unsigned h = util_format_get_blockheight(dst->base.info.format);
+
+ if (pDestPoint) {
+ user_assert(!(pDestPoint->x % w) && !(pDestPoint->y % h),
+ D3DERR_INVALIDCALL);
+ }
+
+ if (pSourceRect) {
+ user_assert(!(pSourceRect->left % w) && !(pSourceRect->top % h),
+ D3DERR_INVALIDCALL);
+ }
+ if (!(copy_width == src->desc.Width &&
+ copy_width == dst->desc.Width &&
+ copy_height == src->desc.Height &&
+ copy_height == dst->desc.Height)) {
+ user_assert(!(copy_width % w) && !(copy_height % h),
+ D3DERR_INVALIDCALL);
+ }
+ }
+
+ NineSurface9_CopyMemToDefault(dst, src, pDestPoint, pSourceRect);
+
+ return D3D_OK;
}
HRESULT WINAPI
@@ -1202,6 +1280,7 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
struct NineBaseTexture9 *srcb = NineBaseTexture9(pSourceTexture);
unsigned l, m;
unsigned last_level = dstb->base.info.last_level;
+ RECT rect;
DBG("This=%p pSourceTexture=%p pDestinationTexture=%p\n", This,
pSourceTexture, pDestinationTexture);
@@ -1227,10 +1306,6 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
user_assert(dstb->base.type == srcb->base.type, D3DERR_INVALIDCALL);
- /* TODO: We can restrict the update to the dirty portions of the source.
- * Yes, this seems silly, but it's what MSDN says ...
- */
-
/* Find src level that matches dst level 0: */
user_assert(srcb->base.info.width0 >= dstb->base.info.width0 &&
srcb->base.info.height0 >= dstb->base.info.height0 &&
@@ -1254,9 +1329,25 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
struct NineTexture9 *dst = NineTexture9(dstb);
struct NineTexture9 *src = NineTexture9(srcb);
- for (l = 0; l <= last_level; ++l, ++m)
- NineSurface9_CopySurface(dst->surfaces[l],
- src->surfaces[m], NULL, NULL);
+ if (src->dirty_rect.width == 0)
+ return D3D_OK;
+
+ pipe_box_to_rect(&rect, &src->dirty_rect);
+ for (l = 0; l < m; ++l)
+ rect_minify_inclusive(&rect);
+
+ for (l = 0; l <= last_level; ++l, ++m) {
+ fit_rect_format_inclusive(dst->base.base.info.format,
+ &rect,
+ dst->surfaces[l]->desc.Width,
+ dst->surfaces[l]->desc.Height);
+ NineSurface9_CopyMemToDefault(dst->surfaces[l],
+ src->surfaces[m],
+ (POINT *)&rect,
+ &rect);
+ rect_minify_inclusive(&rect);
+ }
+ u_box_origin_2d(0, 0, &src->dirty_rect);
} else
if (dstb->base.type == D3DRTYPE_CUBETEXTURE) {
struct NineCubeTexture9 *dst = NineCubeTexture9(dstb);
@@ -1265,10 +1356,25 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
/* GPUs usually have them stored as arrays of mip-mapped 2D textures. */
for (z = 0; z < 6; ++z) {
+ if (src->dirty_rect[z].width == 0)
+ continue;
+
+ pipe_box_to_rect(&rect, &src->dirty_rect[z]);
+ for (l = 0; l < m; ++l)
+ rect_minify_inclusive(&rect);
+
for (l = 0; l <= last_level; ++l, ++m) {
- NineSurface9_CopySurface(dst->surfaces[l * 6 + z],
- src->surfaces[m * 6 + z], NULL, NULL);
+ fit_rect_format_inclusive(dst->base.base.info.format,
+ &rect,
+ dst->surfaces[l * 6 + z]->desc.Width,
+ dst->surfaces[l * 6 + z]->desc.Height);
+ NineSurface9_CopyMemToDefault(dst->surfaces[l * 6 + z],
+ src->surfaces[m * 6 + z],
+ (POINT *)&rect,
+ &rect);
+ rect_minify_inclusive(&rect);
}
+ u_box_origin_2d(0, 0, &src->dirty_rect[z]);
m -= l;
}
} else
@@ -1276,9 +1382,12 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This,
struct NineVolumeTexture9 *dst = NineVolumeTexture9(dstb);
struct NineVolumeTexture9 *src = NineVolumeTexture9(srcb);
+ if (src->dirty_box.width == 0)
+ return D3D_OK;
for (l = 0; l <= last_level; ++l, ++m)
- NineVolume9_CopyVolume(dst->volumes[l],
- src->volumes[m], 0, 0, 0, NULL);
+ NineVolume9_CopyMemToDefault(dst->volumes[l],
+ src->volumes[m], 0, 0, 0, NULL);
+ u_box_3d(0, 0, 0, 0, 0, 0, &src->dirty_box);
} else{
assert(!"invalid texture type");
}
@@ -1308,7 +1417,12 @@ NineDevice9_GetRenderTargetData( struct NineDevice9 *This,
user_assert(dst->desc.MultiSampleType < 2, D3DERR_INVALIDCALL);
user_assert(src->desc.MultiSampleType < 2, D3DERR_INVALIDCALL);
- return NineSurface9_CopySurface(dst, src, NULL, NULL);
+ user_assert(src->desc.Width == dst->desc.Width, D3DERR_INVALIDCALL);
+ user_assert(src->desc.Height == dst->desc.Height, D3DERR_INVALIDCALL);
+
+ NineSurface9_CopyDefaultToMem(dst, src);
+
+ return D3D_OK;
}
HRESULT WINAPI
@@ -1448,6 +1562,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
blit.filter = Filter == D3DTEXF_LINEAR ?
PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST;
blit.scissor_enable = FALSE;
+ blit.alpha_blend = FALSE;
/* If both of a src and dst dimension are negative, flip them. */
if (blit.dst.box.width < 0 && blit.src.box.width < 0) {
@@ -1464,8 +1579,12 @@ NineDevice9_StretchRect( struct NineDevice9 *This,
user_assert(!scaled || dst != src, D3DERR_INVALIDCALL);
user_assert(!scaled ||
- !NineSurface9_IsOffscreenPlain(dst) ||
+ !NineSurface9_IsOffscreenPlain(dst), D3DERR_INVALIDCALL);
+ user_assert(!NineSurface9_IsOffscreenPlain(dst) ||
NineSurface9_IsOffscreenPlain(src), D3DERR_INVALIDCALL);
+ user_assert(NineSurface9_IsOffscreenPlain(dst) ||
+ dst->desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL),
+ D3DERR_INVALIDCALL);
user_assert(!scaled ||
(!util_format_is_compressed(dst->base.info.format) &&
!util_format_is_compressed(src->base.info.format)),
@@ -1561,11 +1680,8 @@ NineDevice9_ColorFill( struct NineDevice9 *This,
}
d3dcolor_to_pipe_color_union(&rgba, color);
- fallback =
- !This->screen->is_format_supported(This->screen, surf->base.info.format,
- surf->base.info.target,
- surf->base.info.nr_samples,
- PIPE_BIND_RENDER_TARGET);
+ fallback = !(surf->base.info.bind & PIPE_BIND_RENDER_TARGET);
+
if (!fallback) {
psurf = NineSurface9_GetSurface(surf, 0);
if (!psurf)
@@ -1774,7 +1890,7 @@ NineDevice9_Clear( struct NineDevice9 *This,
return D3D_OK;
d3dcolor_to_pipe_color_union(&rgba, Color);
- nine_update_state(This, NINE_STATE_FB);
+ nine_update_state_framebuffer(This);
rect.x1 = This->state.viewport.X;
rect.y1 = This->state.viewport.Y;
@@ -2012,8 +2128,10 @@ NineDevice9_SetLight( struct NineDevice9 *This,
return E_OUTOFMEMORY;
state->ff.num_lights = N;
- for (; n < Index; ++n)
+ for (; n < Index; ++n) {
+ memset(&state->ff.light[n], 0, sizeof(D3DLIGHT9));
state->ff.light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
+ }
}
state->ff.light[Index] = *pLight;
@@ -2508,6 +2626,7 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
DWORD Value )
{
struct nine_state *state = This->update;
+ int bumpmap_index = -1;
DBG("Stage=%u Type=%u Value=%08x\n", Stage, Type, Value);
nine_dump_D3DTSS_value(DBG_FF, Type, Value);
@@ -2516,6 +2635,36 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
user_assert(Type < Elements(state->ff.tex_stage[0]), D3DERR_INVALIDCALL);
state->ff.tex_stage[Stage][Type] = Value;
+ switch (Type) {
+ case D3DTSS_BUMPENVMAT00:
+ bumpmap_index = 4 * Stage;
+ break;
+ case D3DTSS_BUMPENVMAT10:
+ bumpmap_index = 4 * Stage + 1;
+ break;
+ case D3DTSS_BUMPENVMAT01:
+ bumpmap_index = 4 * Stage + 2;
+ break;
+ case D3DTSS_BUMPENVMAT11:
+ bumpmap_index = 4 * Stage + 3;
+ break;
+ case D3DTSS_BUMPENVLSCALE:
+ bumpmap_index = 4 * 8 + 2 * Stage;
+ break;
+ case D3DTSS_BUMPENVLOFFSET:
+ bumpmap_index = 4 * 8 + 2 * Stage + 1;
+ break;
+ case D3DTSS_TEXTURETRANSFORMFLAGS:
+ state->changed.group |= NINE_STATE_PS1X_SHADER;
+ break;
+ default:
+ break;
+ }
+
+ if (bumpmap_index >= 0) {
+ state->bumpmap_vars[bumpmap_index] = Value;
+ state->changed.group |= NINE_STATE_PS_CONST;
+ }
state->changed.group |= NINE_STATE_FF_PSSTAGES;
state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
@@ -2560,12 +2709,11 @@ NineDevice9_SetSamplerState( struct NineDevice9 *This,
if (Sampler >= D3DDMAPSAMPLER)
Sampler = Sampler - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS;
- state->samp[Sampler][Type] = Value;
- state->changed.group |= NINE_STATE_SAMPLER;
- state->changed.sampler[Sampler] |= 1 << Type;
-
- if (Type == D3DSAMP_SRGBTEXTURE)
- state->changed.srgb = TRUE;
+ if (state->samp[Sampler][Type] != Value || unlikely(This->is_recording)) {
+ state->samp[Sampler][Type] = Value;
+ state->changed.group |= NINE_STATE_SAMPLER;
+ state->changed.sampler[Sampler] |= 1 << Type;
+ }
return D3D_OK;
}
@@ -2724,7 +2872,7 @@ NineDevice9_DrawPrimitive( struct NineDevice9 *This,
DBG("iface %p, PrimitiveType %u, StartVertex %u, PrimitiveCount %u\n",
This, PrimitiveType, StartVertex, PrimitiveCount);
- nine_update_state(This, ~0);
+ nine_update_state(This);
init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
info.indexed = FALSE;
@@ -2757,7 +2905,7 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This,
user_assert(This->state.idxbuf, D3DERR_INVALIDCALL);
user_assert(This->state.vdecl, D3DERR_INVALIDCALL);
- nine_update_state(This, ~0);
+ nine_update_state(This);
init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
info.indexed = TRUE;
@@ -2789,7 +2937,7 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
user_assert(pVertexStreamZeroData && VertexStreamZeroStride,
D3DERR_INVALIDCALL);
- nine_update_state(This, ~0);
+ nine_update_state(This);
init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
info.indexed = FALSE;
@@ -2803,13 +2951,16 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This,
vtxbuf.buffer = NULL;
vtxbuf.user_buffer = pVertexStreamZeroData;
- if (!This->driver_caps.user_vbufs)
- u_upload_data(This->upload,
+ if (!This->driver_caps.user_vbufs) {
+ u_upload_data(This->vertex_uploader,
0,
(info.max_index + 1) * VertexStreamZeroStride, /* XXX */
vtxbuf.user_buffer,
&vtxbuf.buffer_offset,
&vtxbuf.buffer);
+ u_upload_unmap(This->vertex_uploader);
+ vtxbuf.user_buffer = NULL;
+ }
This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vtxbuf);
@@ -2851,7 +3002,7 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
user_assert(IndexDataFormat == D3DFMT_INDEX16 ||
IndexDataFormat == D3DFMT_INDEX32, D3DERR_INVALIDCALL);
- nine_update_state(This, ~0);
+ nine_update_state(This);
init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
info.indexed = TRUE;
@@ -2872,23 +3023,28 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This,
if (!This->driver_caps.user_vbufs) {
const unsigned base = info.min_index * VertexStreamZeroStride;
- u_upload_data(This->upload,
+ u_upload_data(This->vertex_uploader,
base,
(info.max_index -
info.min_index + 1) * VertexStreamZeroStride, /* XXX */
(const uint8_t *)vbuf.user_buffer + base,
&vbuf.buffer_offset,
&vbuf.buffer);
+ u_upload_unmap(This->vertex_uploader);
/* Won't be used: */
vbuf.buffer_offset -= base;
+ vbuf.user_buffer = NULL;
}
- if (!This->driver_caps.user_ibufs)
- u_upload_data(This->upload,
+ if (!This->driver_caps.user_ibufs) {
+ u_upload_data(This->index_uploader,
0,
info.count * ibuf.index_size,
ibuf.user_buffer,
&ibuf.offset,
&ibuf.buffer);
+ u_upload_unmap(This->index_uploader);
+ ibuf.user_buffer = NULL;
+ }
This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vbuf);
This->pipe->set_index_buffer(This->pipe, &ibuf);
@@ -2935,7 +3091,7 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This,
if (!screen->get_param(screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS))
STUB(D3DERR_INVALIDCALL);
- nine_update_state(This, ~0);
+ nine_update_state(This);
/* TODO: Create shader with stream output. */
STUB(D3DERR_INVALIDCALL);
@@ -3105,6 +3261,13 @@ NineDevice9_SetVertexShader( struct NineDevice9 *This,
DBG("This=%p pShader=%p\n", This, pShader);
+ if (!This->is_recording && state->vs == (struct NineVertexShader9*)pShader)
+ return D3D_OK;
+
+ /* ff -> non-ff: commit back non-ff constants */
+ if (!state->vs && pShader)
+ state->commit |= NINE_STATE_COMMIT_CONST_VS;
+
nine_bind(&state->vs, pShader);
state->changed.group |= NINE_STATE_VS;
@@ -3139,6 +3302,12 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
return D3D_OK;
user_assert(pConstantData, D3DERR_INVALIDCALL);
+ if (!This->is_recording) {
+ if (!memcmp(&state->vs_const_f[StartRegister * 4], pConstantData,
+ Vector4fCount * 4 * sizeof(state->vs_const_f[0])))
+ return D3D_OK;
+ }
+
memcpy(&state->vs_const_f[StartRegister * 4],
pConstantData,
Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
@@ -3188,6 +3357,11 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
user_assert(pConstantData, D3DERR_INVALIDCALL);
if (This->driver_caps.vs_integer) {
+ if (!This->is_recording) {
+ if (!memcmp(&state->vs_const_i[StartRegister][0], pConstantData,
+ Vector4iCount * sizeof(state->vs_const_i[0])))
+ return D3D_OK;
+ }
memcpy(&state->vs_const_i[StartRegister][0],
pConstantData,
Vector4iCount * sizeof(state->vs_const_i[0]));
@@ -3252,6 +3426,16 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
+ if (!This->is_recording) {
+ bool noChange = true;
+ for (i = 0; i < BoolCount; i++) {
+ if (!!state->vs_const_b[StartRegister + i] != !!pConstantData[i])
+ noChange = false;
+ }
+ if (noChange)
+ return D3D_OK;
+ }
+
for (i = 0; i < BoolCount; i++)
state->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
@@ -3433,6 +3617,13 @@ NineDevice9_SetPixelShader( struct NineDevice9 *This,
DBG("This=%p pShader=%p\n", This, pShader);
+ if (!This->is_recording && state->ps == (struct NinePixelShader9*)pShader)
+ return D3D_OK;
+
+ /* ff -> non-ff: commit back non-ff constants */
+ if (!state->ps && pShader)
+ state->commit |= NINE_STATE_COMMIT_CONST_PS;
+
nine_bind(&state->ps, pShader);
state->changed.group |= NINE_STATE_PS;
@@ -3473,6 +3664,12 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This,
return D3D_OK;
user_assert(pConstantData, D3DERR_INVALIDCALL);
+ if (!This->is_recording) {
+ if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData,
+ Vector4fCount * 4 * sizeof(state->ps_const_f[0])))
+ return D3D_OK;
+ }
+
memcpy(&state->ps_const_f[StartRegister * 4],
pConstantData,
Vector4fCount * 4 * sizeof(state->ps_const_f[0]));
@@ -3522,6 +3719,11 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
user_assert(pConstantData, D3DERR_INVALIDCALL);
if (This->driver_caps.ps_integer) {
+ if (!This->is_recording) {
+ if (!memcmp(&state->ps_const_i[StartRegister][0], pConstantData,
+ Vector4iCount * sizeof(state->ps_const_i[0])))
+ return D3D_OK;
+ }
memcpy(&state->ps_const_i[StartRegister][0],
pConstantData,
Vector4iCount * sizeof(state->ps_const_i[0]));
@@ -3585,6 +3787,16 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
+ if (!This->is_recording) {
+ bool noChange = true;
+ for (i = 0; i < BoolCount; i++) {
+ if (!!state->ps_const_b[StartRegister + i] != !!pConstantData[i])
+ noChange = false;
+ }
+ if (noChange)
+ return D3D_OK;
+ }
+
for (i = 0; i < BoolCount; i++)
state->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h
index 7460745..98d9c4d 100644
--- a/src/gallium/state_trackers/nine/device9.h
+++ b/src/gallium/state_trackers/nine/device9.h
@@ -69,6 +69,7 @@ struct NineDevice9
struct nine_state state; /* device state */
struct list_head update_textures;
+ struct list_head managed_textures;
boolean is_recording;
boolean in_scene;
@@ -83,7 +84,8 @@ struct NineDevice9
uint16_t max_ps_const_f;
struct pipe_resource *dummy_texture;
- struct pipe_sampler_view *dummy_sampler;
+ struct pipe_sampler_view *dummy_sampler_view;
+ struct pipe_sampler_state dummy_sampler_state;
struct gen_mipmap_state *gen_mipmap;
@@ -113,6 +115,7 @@ struct NineDevice9
struct {
boolean user_vbufs;
boolean user_ibufs;
+ boolean user_cbufs;
boolean window_space_position_support;
boolean vs_integer;
boolean ps_integer;
@@ -122,7 +125,9 @@ struct NineDevice9
boolean buggy_barycentrics;
} driver_bugs;
- struct u_upload_mgr *upload;
+ struct u_upload_mgr *vertex_uploader;
+ struct u_upload_mgr *index_uploader;
+ struct u_upload_mgr *constbuf_uploader;
struct nine_range_pool range_pool;
@@ -180,10 +185,6 @@ NineDevice9_GetCSO( struct NineDevice9 *This );
const D3DCAPS9 *
NineDevice9_GetCaps( struct NineDevice9 *This );
-/* Mask: 0x1 = constant buffers, 0x2 = stipple */
-void
-NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask );
-
/*** Direct3D public ***/
HRESULT WINAPI
diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c
index 8a53f0d..fe8933b 100644
--- a/src/gallium/state_trackers/nine/nine_ff.c
+++ b/src/gallium/state_trackers/nine/nine_ff.c
@@ -22,6 +22,7 @@
#include "tgsi/tgsi_dump.h"
#include "util/u_box.h"
#include "util/u_hash_table.h"
+#include "util/u_upload_mgr.h"
#define NINE_TGSI_LAZY_DEVS 1
@@ -30,13 +31,6 @@
#define NINE_FF_NUM_VS_CONST 256
#define NINE_FF_NUM_PS_CONST 24
-#define NINED3DTSS_TCI_DISABLE 0
-#define NINED3DTSS_TCI_PASSTHRU 1
-#define NINED3DTSS_TCI_CAMERASPACENORMAL 2
-#define NINED3DTSS_TCI_CAMERASPACEPOSITION 3
-#define NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 4
-#define NINED3DTSS_TCI_SPHEREMAP 5
-
struct fvec4
{
float x, y, z, w;
@@ -63,16 +57,20 @@ struct nine_ff_vs_key
uint32_t fog_range : 1;
uint32_t color0in_one : 1;
uint32_t color1in_one : 1;
- uint32_t pad1 : 8;
- uint32_t tc_gen : 24; /* 8 * 3 bits */
- uint32_t pad2 : 8;
- uint32_t tc_idx : 24;
+ uint32_t fog : 1;
+ uint32_t pad1 : 7;
+ uint32_t tc_dim_input: 16; /* 8 * 2 bits */
+ uint32_t pad2 : 16;
+ uint32_t tc_dim_output: 24; /* 8 * 3 bits */
uint32_t pad3 : 8;
- uint32_t tc_dim : 24; /* 8 * 3 bits */
+ uint32_t tc_gen : 24; /* 8 * 3 bits */
uint32_t pad4 : 8;
+ uint32_t tc_idx : 24;
+ uint32_t pad5 : 8;
+ uint32_t passthrough;
};
- uint64_t value64[2]; /* don't forget to resize VertexShader9.ff_key */
- uint32_t value32[4];
+ uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
+ uint32_t value32[6];
};
};
@@ -106,15 +104,18 @@ struct nine_ff_ps_key
uint32_t alphaarg2 : 3;
uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
- uint32_t projected : 1;
+ uint32_t pad : 1;
/* that's 32 bit exactly */
} ts[8];
- uint32_t fog : 1; /* for vFog with programmable VS */
+ uint32_t projected : 16;
+ uint32_t fog : 1; /* for vFog coming from VS */
uint32_t fog_mode : 2;
- uint32_t specular : 1; /* 9 32-bit words with this */
+ uint32_t specular : 1;
+ uint32_t pad1 : 12; /* 9 32-bit words with this */
uint8_t colorarg_b4[3];
uint8_t colorarg_b5[3];
uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
+ uint8_t pad2[3];
};
uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
uint32_t value32[12];
@@ -222,7 +223,6 @@ static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
* CONST[28].x___ RS.FogEnd
* CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
* CONST[28].__z_ RS.FogDensity
- * CONST[29] RS.FogColor
* CONST[30].x___ TWEENFACTOR
*
@@ -334,16 +334,15 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
{
const struct nine_ff_vs_key *key = vs->key;
struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
- struct ureg_dst oPos, oCol[2], oTex[8], oPsz, oFog;
- struct ureg_dst rCol[2]; /* oCol if no fog, TEMP otherwise */
+ struct ureg_dst oPos, oCol[2], oPsz, oFog;
struct ureg_dst rVtx, rNrm;
struct ureg_dst r[8];
struct ureg_dst AR;
- struct ureg_dst tmp, tmp_x, tmp_z;
+ struct ureg_dst tmp, tmp_x, tmp_y, tmp_z;
unsigned i, c;
unsigned label[32], l = 0;
unsigned num_r = 8;
- boolean need_rNrm = key->lighting || key->pointscale;
+ boolean need_rNrm = key->lighting || key->pointscale || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
boolean need_rVtx = key->lighting || key->fog_mode;
const unsigned texcoord_sn = get_texcoord_sn(device->screen);
@@ -406,9 +405,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
if (key->vertexpointsize)
vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
- if (key->vertexblend_indexed)
+ if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
- if (key->vertexblend)
+ if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
if (key->vertextween) {
vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
@@ -420,19 +419,16 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
+ if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
+ oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0);
+ oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
+ }
if (key->vertexpointsize || key->pointscale) {
oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
TGSI_WRITEMASK_X, 0, 1);
oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
}
- if (key->fog_mode) {
- /* We apply fog to the vertex colors, oFog is for programmable shaders only ?
- */
- oFog = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_FOG, 0,
- TGSI_WRITEMASK_X, 0, 1);
- oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
- }
/* Declare TEMPs:
*/
@@ -440,18 +436,11 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
r[i] = ureg_DECL_local_temporary(ureg);
tmp = r[0];
tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
+ tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
if (key->lighting || key->vertexblend)
AR = ureg_DECL_address(ureg);
- if (key->fog_mode) {
- rCol[0] = r[2];
- rCol[1] = r[3];
- } else {
- rCol[0] = oCol[0];
- rCol[1] = oCol[1];
- }
-
rVtx = ureg_writemask(r[1], TGSI_WRITEMASK_XYZ);
rNrm = ureg_writemask(r[2], TGSI_WRITEMASK_XYZ);
@@ -560,8 +549,6 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_CLAMP(ureg, oPsz, vs->aPsz, _XXXX(cPsz1), _YYYY(cPsz1));
#endif
} else if (key->pointscale) {
- struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
- struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
@@ -582,72 +569,85 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
#endif
}
- /* Texture coordinate generation:
- * XXX: D3DTTFF_PROJECTED, transform matrix
- */
for (i = 0; i < 8; ++i) {
- struct ureg_dst dst[5];
- struct ureg_src src;
- unsigned c;
+ struct ureg_dst oTex, input_coord, transformed, t;
+ unsigned c, writemask;
const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
- const unsigned dim = (key->tc_dim >> (i * 3)) & 0x7;
+ unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
+ const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
+ /* No texture output of index s */
if (tci == NINED3DTSS_TCI_DISABLE)
continue;
- oTex[i] = ureg_DECL_output(ureg, texcoord_sn, i);
-
- if (tci == NINED3DTSS_TCI_PASSTHRU)
- vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
-
- if (!dim) {
- dst[c = 4] = oTex[i];
- } else {
- dst[4] = r[5];
- src = ureg_src(dst[4]);
- for (c = 0; c < (dim - 1); ++c)
- dst[c] = ureg_writemask(tmp, (1 << dim) - 1);
- dst[c] = ureg_writemask(oTex[i], (1 << dim) - 1);
- }
+ oTex = ureg_DECL_output(ureg, texcoord_sn, i);
+ input_coord = r[5];
+ transformed = r[6];
+ /* Get the coordinate */
switch (tci) {
case NINED3DTSS_TCI_PASSTHRU:
- ureg_MOV(ureg, dst[4], vs->aTex[idx]);
+ /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
+ * Else the idx is used only to determine wrapping mode. */
+ vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
+ ureg_MOV(ureg, input_coord, vs->aTex[idx]);
break;
case NINED3DTSS_TCI_CAMERASPACENORMAL:
- assert(dim <= 3);
- ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rNrm));
- ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+ ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rNrm));
+ ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+ dim_input = 4;
break;
case NINED3DTSS_TCI_CAMERASPACEPOSITION:
- ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rVtx));
- ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+ ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx));
+ ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+ dim_input = 4;
break;
case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
tmp.WriteMask = TGSI_WRITEMASK_XYZ;
ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rNrm));
ureg_MUL(ureg, tmp, ureg_src(rNrm), _X(tmp));
ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
- ureg_SUB(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp));
- ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+ ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp));
+ ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+ dim_input = 4;
tmp.WriteMask = TGSI_WRITEMASK_XYZW;
break;
case NINED3DTSS_TCI_SPHEREMAP:
assert(!"TODO");
break;
default:
+ assert(0);
break;
}
- if (!dim)
- continue;
- dst[c].WriteMask = ~dst[c].WriteMask;
- if (dst[c].WriteMask)
- ureg_MOV(ureg, dst[c], src); /* store untransformed components */
- dst[c].WriteMask = ~dst[c].WriteMask;
- if (dim > 0) ureg_MUL(ureg, dst[0], _XXXX(src), _CONST(128 + i * 4));
- if (dim > 1) ureg_MAD(ureg, dst[1], _YYYY(src), _CONST(129 + i * 4), ureg_src(tmp));
- if (dim > 2) ureg_MAD(ureg, dst[2], _ZZZZ(src), _CONST(130 + i * 4), ureg_src(tmp));
- if (dim > 3) ureg_MAD(ureg, dst[3], _WWWW(src), _CONST(131 + i * 4), ureg_src(tmp));
+
+ /* Apply the transformation */
+ /* dim_output == 0 => do not transform the components.
+ * XYZRHW also disables transformation */
+ if (!dim_output || key->position_t) {
+ transformed = input_coord;
+ writemask = TGSI_WRITEMASK_XYZW;
+ } else {
+ for (c = 0; c < dim_output; c++) {
+ t = ureg_writemask(transformed, 1 << c);
+ switch (dim_input) {
+ /* dim_input = 1 2 3: -> we add trailing 1 to input*/
+ case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
+ break;
+ case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
+ ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
+ break;
+ case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
+ ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
+ break;
+ case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
+ default:
+ assert(0);
+ }
+ }
+ writemask = (1 << dim_output) - 1;
+ }
+
+ ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
}
/* === Lighting:
@@ -692,8 +692,6 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
* specular += light.specular * atten * powFact;
*/
if (key->lighting) {
- struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
-
struct ureg_dst rAtt = ureg_writemask(r[1], TGSI_WRITEMASK_W);
struct ureg_dst rHit = ureg_writemask(r[3], TGSI_WRITEMASK_XYZ);
struct ureg_dst rMid = ureg_writemask(r[4], TGSI_WRITEMASK_XYZ);
@@ -851,22 +849,22 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
}
- ureg_MAD(ureg, rCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
- ureg_MUL(ureg, rCol[1], ureg_src(rS), vs->mtlS);
+ ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
+ ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
} else
/* COLOR */
if (key->darkness) {
if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
- ureg_MAD(ureg, rCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
+ ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
} else {
- ureg_MAD(ureg, ureg_writemask(rCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
+ ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE);
- ureg_ADD(ureg, ureg_writemask(rCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
+ ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
}
- ureg_MUL(ureg, rCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS);
+ ureg_MUL(ureg, oCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS);
} else {
- ureg_MOV(ureg, rCol[0], vs->aCol[0]);
- ureg_MOV(ureg, rCol[1], vs->aCol[1]);
+ ureg_MOV(ureg, oCol[0], vs->aCol[0]);
+ ureg_MOV(ureg, oCol[1], vs->aCol[1]);
}
/* === Process fog.
@@ -874,10 +872,6 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
* exp(x) = ex2(log2(e) * x)
*/
if (key->fog_mode) {
- /* Fog doesn't affect alpha, TODO: combine with light code output */
- ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), _W(rCol[0]));
- ureg_MOV(ureg, ureg_writemask(oCol[1], TGSI_WRITEMASK_W), _W(rCol[1]));
-
if (key->position_t) {
ureg_MOV(ureg, ureg_saturate(tmp_x), ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
} else
@@ -905,10 +899,58 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
}
ureg_MOV(ureg, oFog, _X(tmp));
- ureg_LRP(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _X(tmp), ureg_src(rCol[0]), _CONST(29));
- ureg_LRP(ureg, ureg_writemask(oCol[1], TGSI_WRITEMASK_XYZ), _X(tmp), ureg_src(rCol[1]), _CONST(29));
+ } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
+ ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
}
+ if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
+ struct ureg_src input;
+ struct ureg_dst output;
+ input = vs->aWgt;
+ output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18);
+ ureg_MOV(ureg, output, input);
+ }
+ if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
+ struct ureg_src input;
+ struct ureg_dst output;
+ input = vs->aInd;
+ output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
+ ureg_MOV(ureg, output, input);
+ }
+ if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
+ struct ureg_src input;
+ struct ureg_dst output;
+ input = vs->aNrm;
+ output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
+ ureg_MOV(ureg, output, input);
+ }
+ if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
+ struct ureg_src input;
+ struct ureg_dst output;
+ input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
+ output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
+ ureg_MOV(ureg, output, input);
+ }
+ if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
+ struct ureg_src input;
+ struct ureg_dst output;
+ input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
+ output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
+ ureg_MOV(ureg, output, input);
+ }
+ if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
+ struct ureg_src input;
+ struct ureg_dst output;
+ input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
+ input = ureg_scalar(input, TGSI_SWIZZLE_X);
+ output = oFog;
+ ureg_MOV(ureg, output, input);
+ }
+ if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
+ (void) 0; /* TODO: replace z of position output ? */
+ }
+
+
if (key->position_t && device->driver_caps.window_space_position_support)
ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
@@ -1270,10 +1312,18 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
}
- if (key->ts[s].projected)
- ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
- else
+ if (key->projected & (3 << (s *2))) {
+ unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
+ if (dim == 4)
+ ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
+ else {
+ ureg_RCP(ureg, ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X), ureg_scalar(ps.vT[s], dim-1));
+ ureg_MUL(ureg, ps.rTmp, _XXXX(ps.rTmpSrc), ps.vT[s]);
+ ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
+ }
+ } else {
ureg_TEX(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
+ }
}
if (s == 0 &&
@@ -1316,6 +1366,10 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
colorarg[2] != alphaarg[2])
dst.WriteMask = TGSI_WRITEMASK_XYZ;
+ /* Special DOTPRODUCT behaviour (see wine tests) */
+ if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
+ dst.WriteMask = TGSI_WRITEMASK_XYZW;
+
if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
@@ -1406,12 +1460,18 @@ nine_ff_get_vs(struct NineDevice9 *device)
else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
s = usage / NINE_DECLUSAGE_COUNT;
if (s < 8)
- input_texture_coord[s] = 1;
+ input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type);
else
DBG("FF given texture coordinate >= 8. Ignoring\n");
- }
+ } else if (usage < NINE_DECLUSAGE_NONE)
+ key.passthrough |= 1 << usage;
}
}
+ /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
+ * We do restrict to indices 0 */
+ key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
+ (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
+ (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
if (!key.vertexpointsize)
key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE];
@@ -1427,6 +1487,7 @@ nine_ff_get_vs(struct NineDevice9 *device)
key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE];
key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE];
}
+ key.fog = !!state->rs[D3DRS_FOGENABLE];
key.fog_mode = state->rs[D3DRS_FOGENABLE] ? state->rs[D3DRS_FOGVERTEXMODE] : 0;
if (key.fog_mode)
key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE];
@@ -1448,7 +1509,7 @@ nine_ff_get_vs(struct NineDevice9 *device)
for (s = 0; s < 8; ++s) {
unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
- unsigned dim = MIN2(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7, 4);
+ unsigned dim;
if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
gen = NINED3DTSS_TCI_PASSTHRU;
@@ -1458,7 +1519,14 @@ nine_ff_get_vs(struct NineDevice9 *device)
key.tc_gen |= gen << (s * 3);
key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3);
- key.tc_dim |= dim << (s * 3);
+ key.tc_dim_input |= ((input_texture_coord[s]-1) & 0x3) << (s * 2);
+
+ dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
+ if (dim > 4)
+ dim = input_texture_coord[s];
+ if (dim == 1) /* NV behaviour */
+ dim = 0;
+ key.tc_dim_output |= dim << (s * 3);
}
vs = util_hash_table_get(device->ff.ht_vs, &key);
@@ -1473,6 +1541,7 @@ nine_ff_get_vs(struct NineDevice9 *device)
memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs);
+ (void)err;
assert(err == PIPE_OK);
device->ff.num_vs++;
NineUnknown_ConvertRefToBind(NineUnknown(vs));
@@ -1543,8 +1612,6 @@ nine_ff_get_ps(struct NineDevice9 *device)
}
key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
- key.ts[s].projected = !!(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & D3DTTFF_PROJECTED);
-
if (state->texture[s]) {
switch (state->texture[s]->base.type) {
case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
@@ -1558,10 +1625,14 @@ nine_ff_get_ps(struct NineDevice9 *device)
key.ts[s].textarget = 1;
}
}
+
+ key.projected = nine_ff_get_projected_key(state);
+
for (; s < 8; ++s)
key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
if (state->rs[D3DRS_FOGENABLE])
key.fog_mode = state->rs[D3DRS_FOGTABLEMODE];
+ key.fog = !!state->rs[D3DRS_FOGENABLE];
ps = util_hash_table_get(device->ff.ht_ps, &key);
if (ps)
@@ -1573,6 +1644,7 @@ nine_ff_get_ps(struct NineDevice9 *device)
memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps);
+ (void)err;
assert(err == PIPE_OK);
device->ff.num_ps++;
NineUnknown_ConvertRefToBind(NineUnknown(ps));
@@ -1689,7 +1761,6 @@ nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
if (isinf(dst[28].y))
dst[28].y = 0.0f;
dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
- d3dcolor_to_rgba(&dst[29].x, state->rs[D3DRS_FOGCOLOR]);
}
static void
@@ -1703,7 +1774,7 @@ nine_ff_load_tex_matrices(struct NineDevice9 *device)
return;
for (s = 0; s < 8; ++s) {
if (IS_D3DTS_DIRTY(state, TEXTURE0 + s))
- M[32 + s] = *nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE);
+ nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE));
}
}
@@ -1762,28 +1833,22 @@ nine_ff_load_viewport_info(struct NineDevice9 *device)
void
nine_ff_update(struct NineDevice9 *device)
{
- struct pipe_context *pipe = device->pipe;
struct nine_state *state = &device->state;
+ struct pipe_constant_buffer cb;
DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
/* NOTE: the only reference belongs to the hash table */
- if (!device->state.vs)
+ if (!device->state.vs) {
device->ff.vs = nine_ff_get_vs(device);
- if (!device->state.ps)
+ device->state.changed.group |= NINE_STATE_VS;
+ }
+ if (!device->state.ps) {
device->ff.ps = nine_ff_get_ps(device);
+ device->state.changed.group |= NINE_STATE_PS;
+ }
if (!device->state.vs) {
- if (device->state.ff.clobber.vs_const) {
- device->state.ff.clobber.vs_const = FALSE;
- device->state.changed.group |=
- NINE_STATE_FF_VSTRANSF |
- NINE_STATE_FF_MATERIAL |
- NINE_STATE_FF_LIGHTING |
- NINE_STATE_FF_OTHER;
- device->state.ff.changed.transform[0] |= 0xff000c;
- device->state.ff.changed.transform[8] |= 0xff;
- }
nine_ff_load_vs_transforms(device);
nine_ff_load_tex_matrices(device);
nine_ff_load_lights(device);
@@ -1792,57 +1857,45 @@ nine_ff_update(struct NineDevice9 *device)
memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform));
- device->state.changed.group |= NINE_STATE_VS;
- device->state.changed.group |= NINE_STATE_VS_CONST;
-
- if (device->prefer_user_constbuf) {
- struct pipe_context *pipe = device->pipe;
- struct pipe_constant_buffer cb;
- cb.buffer_offset = 0;
- cb.buffer = NULL;
- cb.user_buffer = device->ff.vs_const;
- cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
- pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
- } else {
- struct pipe_box box;
- u_box_1d(0, NINE_FF_NUM_VS_CONST * 4 * sizeof(float), &box);
- pipe->transfer_inline_write(pipe, device->constbuf_vs, 0,
- 0, &box,
- device->ff.vs_const, 0, 0);
- nine_ranges_insert(&device->state.changed.vs_const_f, 0, NINE_FF_NUM_VS_CONST,
- &device->range_pool);
+ cb.buffer_offset = 0;
+ cb.buffer = NULL;
+ cb.user_buffer = device->ff.vs_const;
+ cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
+
+ if (!device->driver_caps.user_cbufs) {
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb.buffer_size,
+ cb.user_buffer,
+ &cb.buffer_offset,
+ &cb.buffer);
+ u_upload_unmap(device->constbuf_uploader);
+ cb.user_buffer = NULL;
}
+ state->pipe.cb_vs_ff = cb;
+ state->commit |= NINE_STATE_COMMIT_CONST_VS;
}
if (!device->state.ps) {
- if (device->state.ff.clobber.ps_const) {
- device->state.ff.clobber.ps_const = FALSE;
- device->state.changed.group |=
- NINE_STATE_FF_PSSTAGES |
- NINE_STATE_FF_OTHER;
- }
nine_ff_load_ps_params(device);
- device->state.changed.group |= NINE_STATE_PS;
- device->state.changed.group |= NINE_STATE_PS_CONST;
-
- if (device->prefer_user_constbuf) {
- struct pipe_context *pipe = device->pipe;
- struct pipe_constant_buffer cb;
- cb.buffer_offset = 0;
- cb.buffer = NULL;
- cb.user_buffer = device->ff.ps_const;
- cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
- pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
- } else {
- struct pipe_box box;
- u_box_1d(0, NINE_FF_NUM_PS_CONST * 4 * sizeof(float), &box);
- pipe->transfer_inline_write(pipe, device->constbuf_ps, 0,
- 0, &box,
- device->ff.ps_const, 0, 0);
- nine_ranges_insert(&device->state.changed.ps_const_f, 0, NINE_FF_NUM_PS_CONST,
- &device->range_pool);
+ cb.buffer_offset = 0;
+ cb.buffer = NULL;
+ cb.user_buffer = device->ff.ps_const;
+ cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
+
+ if (!device->driver_caps.user_cbufs) {
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb.buffer_size,
+ cb.user_buffer,
+ &cb.buffer_offset,
+ &cb.buffer);
+ u_upload_unmap(device->constbuf_uploader);
+ cb.user_buffer = NULL;
}
+ state->pipe.cb_ps_ff = cb;
+ state->commit |= NINE_STATE_COMMIT_CONST_PS;
}
device->state.changed.group &= ~NINE_STATE_FF;
diff --git a/src/gallium/state_trackers/nine/nine_ff.h b/src/gallium/state_trackers/nine/nine_ff.h
index 7cefa65..9c33c76 100644
--- a/src/gallium/state_trackers/nine/nine_ff.h
+++ b/src/gallium/state_trackers/nine/nine_ff.h
@@ -3,6 +3,7 @@
#define _NINE_FF_H_
#include "device9.h"
+#include "vertexdeclaration9.h"
boolean nine_ff_init(struct NineDevice9 *);
void nine_ff_fini(struct NineDevice9 *);
@@ -29,4 +30,84 @@ nine_d3d_matrix_inverse_3x3(D3DMATRIX *, const D3DMATRIX *);
void
nine_d3d_matrix_transpose(D3DMATRIX *, const D3DMATRIX *);
+#define NINED3DTSS_TCI_DISABLE 0
+#define NINED3DTSS_TCI_PASSTHRU 1
+#define NINED3DTSS_TCI_CAMERASPACENORMAL 2
+#define NINED3DTSS_TCI_CAMERASPACEPOSITION 3
+#define NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 4
+#define NINED3DTSS_TCI_SPHEREMAP 5
+
+static inline unsigned
+nine_decltype_get_dim(BYTE type)
+{
+ switch (type) {
+ case D3DDECLTYPE_FLOAT1: return 1;
+ case D3DDECLTYPE_FLOAT2: return 2;
+ case D3DDECLTYPE_FLOAT3: return 3;
+ case D3DDECLTYPE_FLOAT4: return 4;
+ case D3DDECLTYPE_D3DCOLOR: return 1;
+ case D3DDECLTYPE_UBYTE4: return 4;
+ case D3DDECLTYPE_SHORT2: return 2;
+ case D3DDECLTYPE_SHORT4: return 4;
+ case D3DDECLTYPE_UBYTE4N: return 4;
+ case D3DDECLTYPE_SHORT2N: return 2;
+ case D3DDECLTYPE_SHORT4N: return 4;
+ case D3DDECLTYPE_USHORT2N: return 2;
+ case D3DDECLTYPE_USHORT4N: return 4;
+ case D3DDECLTYPE_UDEC3: return 3;
+ case D3DDECLTYPE_DEC3N: return 3;
+ case D3DDECLTYPE_FLOAT16_2: return 2;
+ case D3DDECLTYPE_FLOAT16_4: return 4;
+ default:
+ assert(!"Implementation error !");
+ }
+ return 0;
+}
+
+static inline uint16_t
+nine_ff_get_projected_key(struct nine_state *state)
+{
+ unsigned s, i;
+ uint16_t projected = 0;
+ char input_texture_coord[8];
+ memset(&input_texture_coord, 0, sizeof(input_texture_coord));
+
+ if (state->vdecl) {
+ for (i = 0; i < state->vdecl->nelems; i++) {
+ uint16_t usage = state->vdecl->usage_map[i];
+ if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
+ s = usage / NINE_DECLUSAGE_COUNT;
+ if (s < 8)
+ input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type);
+ }
+ }
+ }
+
+ for (s = 0; s < 8; ++s) {
+ unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
+ unsigned dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
+ unsigned proj = !!(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & D3DTTFF_PROJECTED);
+
+ if (!state->vs) {
+ if (dim > 4)
+ dim = input_texture_coord[s];
+
+ if (!dim && gen == NINED3DTSS_TCI_PASSTHRU)
+ dim = input_texture_coord[s];
+ else if (!dim)
+ dim = 4;
+
+ if (dim == 1) /* NV behaviour */
+ proj = 0;
+ if (dim > input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU)
+ proj = 0;
+ } else {
+ dim = 4;
+ }
+ if (proj)
+ projected |= (dim-1) << (2 * s);
+ }
+ return projected;
+}
+
#endif /* _NINE_FF_H_ */
diff --git a/src/gallium/state_trackers/nine/nine_pipe.c b/src/gallium/state_trackers/nine/nine_pipe.c
index 4cf37b9..2be30f7 100644
--- a/src/gallium/state_trackers/nine/nine_pipe.c
+++ b/src/gallium/state_trackers/nine/nine_pipe.c
@@ -27,7 +27,8 @@
#include "cso_cache/cso_context.h"
void
-nine_convert_dsa_state(struct cso_context *ctx, const DWORD *rs)
+nine_convert_dsa_state(struct pipe_depth_stencil_alpha_state *dsa_state,
+ const DWORD *rs)
{
struct pipe_depth_stencil_alpha_state dsa;
@@ -65,16 +66,15 @@ nine_convert_dsa_state(struct cso_context *ctx, const DWORD *rs)
dsa.alpha.ref_value = (float)rs[D3DRS_ALPHAREF] / 255.0f;
}
- cso_set_depth_stencil_alpha(ctx, &dsa);
+ *dsa_state = dsa;
}
-/* TODO: Keep a static copy in device so we don't have to memset every time ? */
void
-nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs)
+nine_convert_rasterizer_state(struct pipe_rasterizer_state *rast_state, const DWORD *rs)
{
struct pipe_rasterizer_state rast;
- memset(&rast, 0, sizeof(rast)); /* memcmp safety */
+ memset(&rast, 0, sizeof(rast));
rast.flatshade = rs[D3DRS_SHADEMODE] == D3DSHADE_FLAT;
/* rast.light_twoside = 0; */
@@ -92,7 +92,7 @@ nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs)
/* rast.poly_stipple_enable = 0; */
/* rast.point_smooth = 0; */
rast.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
- rast.point_quad_rasterization = !!rs[D3DRS_POINTSPRITEENABLE];
+ rast.point_quad_rasterization = 1;
rast.point_size_per_vertex = rs[NINED3DRS_VSPOINTSIZE];
rast.multisample = !!rs[D3DRS_MULTISAMPLEANTIALIAS];
rast.line_smooth = !!rs[D3DRS_ANTIALIASEDLINEENABLE];
@@ -110,12 +110,28 @@ nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs)
/* rast.line_stipple_pattern = 0; */
rast.sprite_coord_enable = rs[D3DRS_POINTSPRITEENABLE] ? 0xff : 0x00;
rast.line_width = 1.0f;
- rast.point_size = rs[NINED3DRS_VSPOINTSIZE] ? 1.0f : asfloat(rs[D3DRS_POINTSIZE]); /* XXX: D3DRS_POINTSIZE_MIN/MAX */
- rast.offset_units = asfloat(rs[D3DRS_DEPTHBIAS]) * asfloat(rs[NINED3DRS_ZBIASSCALE]);
+ if (rs[NINED3DRS_VSPOINTSIZE]) {
+ rast.point_size = 1.0f;
+ } else {
+ rast.point_size = CLAMP(asfloat(rs[D3DRS_POINTSIZE]),
+ asfloat(rs[D3DRS_POINTSIZE_MIN]),
+ asfloat(rs[D3DRS_POINTSIZE_MAX]));
+ }
+ /* offset_units has the ogl/d3d11 meaning.
+ * d3d9: offset = scale * dz + bias
+ * ogl/d3d11: offset = scale * dz + r * bias
+ * with r implementation dependant and is supposed to be
+ * the smallest value the depth buffer format can hold.
+ * In practice on current and past hw it seems to be 2^-23
+ * for all formats except float formats where it varies depending
+ * on the content.
+ * For now use 1 << 23, but in the future perhaps add a way in gallium
+ * to get r for the format or get the gallium behaviour */
+ rast.offset_units = asfloat(rs[D3DRS_DEPTHBIAS]) * (float)(1 << 23);
rast.offset_scale = asfloat(rs[D3DRS_SLOPESCALEDEPTHBIAS]);
/* rast.offset_clamp = 0.0f; */
- cso_set_rasterizer(ctx, &rast);
+ *rast_state = rast;
}
static inline void
@@ -137,7 +153,7 @@ nine_convert_blend_state_fixup(struct pipe_blend_state *blend, const DWORD *rs)
}
void
-nine_convert_blend_state(struct cso_context *ctx, const DWORD *rs)
+nine_convert_blend_state(struct pipe_blend_state *blend_state, const DWORD *rs)
{
struct pipe_blend_state blend;
@@ -181,7 +197,7 @@ nine_convert_blend_state(struct cso_context *ctx, const DWORD *rs)
/* blend.force_srgb = !!rs[D3DRS_SRGBWRITEENABLE]; */
- cso_set_blend(ctx, &blend);
+ *blend_state = blend;
}
void
@@ -239,8 +255,8 @@ nine_pipe_context_clear(struct NineDevice9 *This)
cso_set_samplers(cso, PIPE_SHADER_VERTEX, 0, NULL);
cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 0, NULL);
- pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
- pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, 0, NULL);
+ cso_set_sampler_views(cso, PIPE_SHADER_VERTEX, 0, NULL);
+ cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 0, NULL);
pipe->set_vertex_buffers(pipe, 0, This->caps.MaxStreams, NULL);
pipe->set_index_buffer(pipe, NULL);
diff --git a/src/gallium/state_trackers/nine/nine_pipe.h b/src/gallium/state_trackers/nine/nine_pipe.h
index 43a7737..8611786 100644
--- a/src/gallium/state_trackers/nine/nine_pipe.h
+++ b/src/gallium/state_trackers/nine/nine_pipe.h
@@ -27,6 +27,7 @@
#include "pipe/p_format.h"
#include "pipe/p_screen.h"
#include "pipe/p_state.h" /* pipe_box */
+#include "util/macros.h"
#include "util/u_rect.h"
#include "util/u_format.h"
#include "nine_helpers.h"
@@ -36,9 +37,9 @@ struct cso_context;
extern const enum pipe_format nine_d3d9_to_pipe_format_map[120];
extern const D3DFORMAT nine_pipe_to_d3d9_format_map[PIPE_FORMAT_COUNT];
-void nine_convert_dsa_state(struct cso_context *, const DWORD *);
-void nine_convert_rasterizer_state(struct cso_context *, const DWORD *);
-void nine_convert_blend_state(struct cso_context *, const DWORD *);
+void nine_convert_dsa_state(struct pipe_depth_stencil_alpha_state *, const DWORD *);
+void nine_convert_rasterizer_state(struct pipe_rasterizer_state *, const DWORD *);
+void nine_convert_blend_state(struct pipe_blend_state *, const DWORD *);
void nine_convert_sampler_state(struct cso_context *, int idx, const DWORD *);
void nine_pipe_context_clear(struct NineDevice9 *);
@@ -81,6 +82,49 @@ rect_to_pipe_box(struct pipe_box *dst, const RECT *src)
dst->depth = 1;
}
+static inline void
+pipe_box_to_rect(RECT *dst, const struct pipe_box *src)
+{
+ dst->left = src->x;
+ dst->right = src->x + src->width;
+ dst->top = src->y;
+ dst->bottom = src->y + src->height;
+}
+
+static inline void
+rect_minify_inclusive(RECT *rect)
+{
+ rect->left = rect->left >> 2;
+ rect->top = rect->top >> 2;
+ rect->right = DIV_ROUND_UP(rect->right, 2);
+ rect->bottom = DIV_ROUND_UP(rect->bottom, 2);
+}
+
+/* We suppose:
+ * 0 <= rect->left < rect->right
+ * 0 <= rect->top < rect->bottom
+ */
+static inline void
+fit_rect_format_inclusive(enum pipe_format format, RECT *rect, int width, int height)
+{
+ const unsigned w = util_format_get_blockwidth(format);
+ const unsigned h = util_format_get_blockheight(format);
+
+ if (util_format_is_compressed(format)) {
+ rect->left = rect->left - rect->left % w;
+ rect->top = rect->top - rect->top % h;
+ rect->right = (rect->right % w) == 0 ?
+ rect->right :
+ rect->right - (rect->right % w) + w;
+ rect->bottom = (rect->bottom % h) == 0 ?
+ rect->bottom :
+ rect->bottom - (rect->bottom % h) + h;
+ }
+
+ rect->right = MIN2(rect->right, width);
+ rect->bottom = MIN2(rect->bottom, height);
+}
+
static inline boolean
rect_to_pipe_box_clamp(struct pipe_box *dst, const RECT *src)
{
@@ -164,6 +208,23 @@ pipe_to_d3d9_format(enum pipe_format format)
return nine_pipe_to_d3d9_format_map[format];
}
+/* ATI1 and ATI2 are not officially compressed in d3d9 */
+static inline boolean
+compressed_format( D3DFORMAT fmt )
+{
+ switch (fmt) {
+ case D3DFMT_DXT1:
+ case D3DFMT_DXT2:
+ case D3DFMT_DXT3:
+ case D3DFMT_DXT4:
+ case D3DFMT_DXT5:
+ return TRUE;
+ default:
+ break;
+ }
+ return FALSE;
+}
+
static inline boolean
depth_stencil_format( D3DFORMAT fmt )
{
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 754f5af..28f2787 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -89,6 +89,15 @@ static inline const char *d3dsio_to_string(unsigned opcode);
#define NINE_SWIZZLE4(x,y,z,w) \
TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
+#define NINE_CONSTANT_SRC(index) \
+ ureg_src_register(TGSI_FILE_CONSTANT, index)
+
+#define NINE_APPLY_SWIZZLE(src, s) \
+ ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
+
+#define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
+ NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
+
#define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
#define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
#define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
@@ -444,6 +453,9 @@ struct shader_translator
BYTE minor;
} version;
unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
+ unsigned num_constf_allowed;
+ unsigned num_consti_allowed;
+ unsigned num_constb_allowed;
boolean native_integers;
boolean inline_subroutines;
@@ -505,7 +517,6 @@ struct shader_translator
#define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
#define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
-#define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3)
#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
@@ -528,7 +539,7 @@ static boolean
tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
{
INT i;
- if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) {
+ if (index < 0 || index >= tx->num_constf_allowed) {
tx->failure = TRUE;
return FALSE;
}
@@ -543,7 +554,7 @@ tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
static boolean
tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
{
- if (index < 0 || index >= NINE_MAX_CONST_I) {
+ if (index < 0 || index >= tx->num_consti_allowed) {
tx->failure = TRUE;
return FALSE;
}
@@ -554,7 +565,7 @@ tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
static boolean
tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
{
- if (index < 0 || index >= NINE_MAX_CONST_B) {
+ if (index < 0 || index >= tx->num_constb_allowed) {
tx->failure = TRUE;
return FALSE;
}
@@ -568,9 +579,7 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
{
unsigned n;
- FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER)
- if (IS_VS && index >= NINE_MAX_CONST_F_SHADER)
- WARN("lconstf index %i too high, indirect access won't work\n", index);
+ FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
for (n = 0; n < tx->num_lconstf; ++n)
if (tx->lconstf[n].idx == index)
@@ -592,7 +601,7 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
static void
tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
{
- FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I)
+ FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
tx->lconsti[index].idx = index;
tx->lconsti[index].reg = tx->native_integers ?
ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
@@ -601,7 +610,7 @@ tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
static void
tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
{
- FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B)
+ FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
tx->lconstb[index].idx = index;
tx->lconstb[index].reg = tx->native_integers ?
ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
@@ -672,6 +681,54 @@ tx_pred_alloc(struct shader_translator *tx, INT idx)
tx->regs.p = ureg_DECL_predicate(tx->ureg);
}
+/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
+ * the projection should be applied on the texture. It doesn't
+ * apply on texkill.
+ * The doc is very imprecise here (it says the projection is done
+ * before rasterization, thus in vs, which seems wrong since ps instructions
+ * are affected differently)
+ * For now we only apply to the ps TEX instruction and TEXBEM.
+ * Perhaps some other instructions would need it */
+static inline void
+apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
+ struct ureg_src src, INT idx)
+{
+ struct ureg_dst tmp;
+ unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
+
+ /* no projection */
+ if (dim == 1) {
+ ureg_MOV(tx->ureg, dst, src);
+ } else {
+ tmp = tx_scratch_scalar(tx);
+ ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
+ ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
+ }
+}
+
+static inline void
+TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
+ unsigned target, struct ureg_src src0,
+ struct ureg_src src1, INT idx)
+{
+ unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
+ struct ureg_dst tmp;
+
+ /* dim == 1: no projection
+ * Looks like must be disabled when it makes no
+ * sense according the texture dimensions
+ */
+ if (dim == 1 || dim <= target) {
+ ureg_TEX(tx->ureg, dst, target, src0, src1);
+ } else if (dim == 4) {
+ ureg_TXP(tx->ureg, dst, target, src0, src1);
+ } else {
+ tmp = tx_scratch(tx);
+ apply_ps1x_projection(tx, tmp, src0, idx);
+ ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
+ }
+}
+
static inline void
tx_texcoord_alloc(struct shader_translator *tx, INT idx)
{
@@ -1086,9 +1143,18 @@ _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
assert(param->idx >= 0 && param->idx < 4);
assert(!param->rel);
tx->info->rt_mask |= 1 << param->idx;
- if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
- tx->regs.oCol[param->idx] =
- ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
+ if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
+ /* ps < 3: oCol[0] will have fog blending afterward
+ * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
+ if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
+ tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
+ } else if (IS_VS && tx->version.major < 3 && param->idx == 1) {
+ tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg);
+ } else {
+ tx->regs.oCol[param->idx] =
+ ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
+ }
+ }
dst = tx->regs.oCol[param->idx];
if (IS_VS && tx->version.major < 3)
dst = ureg_saturate(dst);
@@ -1824,7 +1890,7 @@ sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
sem->Index = 0;
break;
default:
- assert(!"Invalid DECLUSAGE.");
+ unreachable(!"Invalid DECLUSAGE.");
break;
}
}
@@ -2135,12 +2201,79 @@ DECL_SPECIAL(TEXKILL)
DECL_SPECIAL(TEXBEM)
{
- STUB(D3DERR_INVALIDCALL);
-}
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_dst tmp, tmp2, texcoord;
+ struct ureg_src sample, m00, m01, m10, m11;
+ struct ureg_src bumpenvlscale, bumpenvloffset;
+ const int m = tx->insn.dst[0].idx;
+ const int n = tx->insn.src[0].idx;
-DECL_SPECIAL(TEXBEML)
-{
- STUB(D3DERR_INVALIDCALL);
+ assert(tx->version.major == 1);
+
+ sample = ureg_DECL_sampler(ureg, m);
+ tx->info->sampler_mask |= 1 << m;
+
+ tx_texcoord_alloc(tx, m);
+
+ tmp = tx_scratch(tx);
+ tmp2 = tx_scratch(tx);
+ texcoord = tx_scratch(tx);
+ /*
+ * Bump-env-matrix:
+ * 00 is X
+ * 01 is Y
+ * 10 is Z
+ * 11 is W
+ */
+ nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
+ m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
+ m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
+ m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
+ m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
+
+ /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
+ if (m % 2 == 0) {
+ bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
+ bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
+ } else {
+ bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
+ bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
+ }
+
+ apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
+
+ /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
+ NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
+ /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
+ NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
+ NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
+
+ /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
+ NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
+ /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
+ NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
+ NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
+
+ /* Now the texture coordinates are in tmp.xy */
+
+ if (tx->insn.opcode == D3DSIO_TEXBEM) {
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+ } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
+ /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
+ ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+ ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
+ bumpenvlscale, bumpenvloffset);
+ ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
+ }
+
+ tx->info->bumpenvmat_needed = 1;
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXREG2AR)
@@ -2421,7 +2554,43 @@ DECL_SPECIAL(TEXDEPTH)
DECL_SPECIAL(BEM)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
+ struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
+ struct ureg_src m00, m01, m10, m11;
+ const int m = tx->insn.dst[0].idx;
+ struct ureg_dst tmp;
+ /*
+ * Bump-env-matrix:
+ * 00 is X
+ * 01 is Y
+ * 10 is Z
+ * 11 is W
+ */
+ nine_info_mark_const_f_used(tx->info, 8 + m);
+ m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
+ m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
+ m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
+ m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
+ /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
+ NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
+ /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
+ NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
+
+ /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
+ NINE_APPLY_SWIZZLE(src1, X), src0);
+ /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
+ NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
+ ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
+
+ tx->info->bumpenvmat_needed = 1;
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXLD)
@@ -2482,7 +2651,7 @@ DECL_SPECIAL(TEX)
src[1] = ureg_DECL_sampler(ureg, s);
tx->info->sampler_mask |= 1 << s;
- ureg_TEX(ureg, dst, t, src[0], src[1]);
+ TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
return D3D_OK;
}
@@ -2616,7 +2785,7 @@ struct sm1_op_info inst_table[] =
_OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
_OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
_OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
- _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
+ _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
_OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
_OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
_OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
@@ -3023,6 +3192,8 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
info->lconstf.data = NULL;
info->lconstf.ranges = NULL;
+ info->bumpenvmat_needed = 0;
+
for (i = 0; i < Elements(tx->regs.rL); ++i) {
tx->regs.rL[i] = ureg_dst_undef();
}
@@ -3074,6 +3245,57 @@ tgsi_processor_from_type(unsigned shader_type)
}
}
+static void
+shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
+{
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+ struct ureg_src fog_end, fog_coeff, fog_density;
+ struct ureg_src fog_vs, depth, fog_color;
+ struct ureg_dst fog_factor;
+
+ if (!tx->info->fog_enable) {
+ ureg_MOV(ureg, oCol0, src_col);
+ return;
+ }
+
+ if (tx->info->fog_mode != D3DFOG_NONE)
+ depth = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
+ TGSI_INTERPOLATE_LINEAR),
+ TGSI_SWIZZLE_Z);
+
+ nine_info_mark_const_f_used(tx->info, 33);
+ fog_color = NINE_CONSTANT_SRC(32);
+ fog_factor = tx_scratch_scalar(tx);
+
+ if (tx->info->fog_mode == D3DFOG_LINEAR) {
+ fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
+ fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
+ ureg_SUB(ureg, fog_factor, fog_end, depth);
+ ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
+ } else if (tx->info->fog_mode == D3DFOG_EXP) {
+ fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
+ ureg_MUL(ureg, fog_factor, depth, fog_density);
+ ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
+ ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
+ } else if (tx->info->fog_mode == D3DFOG_EXP2) {
+ fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
+ ureg_MUL(ureg, fog_factor, depth, fog_density);
+ ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
+ ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
+ ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
+ } else {
+ fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
+ TGSI_INTERPOLATE_PERSPECTIVE),
+ TGSI_SWIZZLE_X);
+ ureg_MOV(ureg, fog_factor, fog_vs);
+ }
+
+ ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
+ tx_src_scalar(fog_factor), src_col, fog_color);
+ ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
+}
+
#define GET_CAP(n) device->screen->get_param( \
device->screen, PIPE_CAP_##n)
#define GET_SHADER_CAP(n) device->screen->get_shader_param( \
@@ -3123,6 +3345,24 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
tx->texcoord_sn = tx->want_texcoord ?
TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
+ if (IS_VS) {
+ tx->num_constf_allowed = NINE_MAX_CONST_F;
+ } else if (tx->version.major < 2) {/* IS_PS v1 */
+ tx->num_constf_allowed = 8;
+ } else if (tx->version.major == 2) {/* IS_PS v2 */
+ tx->num_constf_allowed = 32;
+ } else {/* IS_PS v3 */
+ tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
+ }
+
+ if (tx->version.major < 2) {
+ tx->num_consti_allowed = 0;
+ tx->num_constb_allowed = 0;
+ } else {
+ tx->num_consti_allowed = NINE_MAX_CONST_I;
+ tx->num_constb_allowed = NINE_MAX_CONST_B;
+ }
+
/* VS must always write position. Declare it here to make it the 1st output.
* (Some drivers like nv50 are buggy and rely on that.)
*/
@@ -3145,10 +3385,26 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
goto out;
}
- if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
- ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
- ureg_src(tx->regs.r[0]));
- info->rt_mask |= 0x1;
+ if (IS_PS && tx->version.major < 3) {
+ if (tx->version.major < 2) {
+ assert(tx->num_temp); /* there must be color output */
+ info->rt_mask |= 0x1;
+ shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
+ } else {
+ shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
+ }
+ }
+
+ if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
+ tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
+ ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
+ }
+
+ /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
+ if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) {
+ struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1);
+ ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1]));
+ ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f));
}
if (info->position_t)
@@ -3233,6 +3489,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
info->const_int_slots > 0 ?
max_const_f + info->const_int_slots :
info->const_float_slots;
+
info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
for (s = 0; s < slot_max; s++)
diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h
index ec256c1..41577ac 100644
--- a/src/gallium/state_trackers/nine/nine_shader.h
+++ b/src/gallium/state_trackers/nine/nine_shader.h
@@ -59,6 +59,10 @@ struct nine_shader_info
uint16_t sampler_mask_shadow; /* in, which samplers use depth compare */
uint8_t rt_mask; /* out, which render targets are being written */
+ uint8_t fog_enable;
+ uint8_t fog_mode;
+ uint16_t projected; /* ps 1.1 to 1.3 */
+
unsigned const_i_base; /* in vec4 (16 byte) units */
unsigned const_b_base; /* in vec4 (16 byte) units */
unsigned const_used_size;
@@ -68,6 +72,7 @@ struct nine_shader_info
unsigned const_bool_slots;
struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */
+ uint8_t bumpenvmat_needed;
};
static inline void
@@ -137,4 +142,48 @@ nine_shader_variants_free(struct nine_shader_variant *list)
}
}
+struct nine_shader_variant64
+{
+ struct nine_shader_variant64 *next;
+ void *cso;
+ uint64_t key;
+};
+
+static inline void *
+nine_shader_variant_get64(struct nine_shader_variant64 *list, uint64_t key)
+{
+ while (list->key != key && list->next)
+ list = list->next;
+ if (list->key == key)
+ return list->cso;
+ return NULL;
+}
+
+static inline boolean
+nine_shader_variant_add64(struct nine_shader_variant64 *list,
+ uint64_t key, void *cso)
+{
+ while (list->next) {
+ assert(list->key != key);
+ list = list->next;
+ }
+ list->next = MALLOC_STRUCT(nine_shader_variant64);
+ if (!list->next)
+ return FALSE;
+ list->next->next = NULL;
+ list->next->key = key;
+ list->next->cso = cso;
+ return TRUE;
+}
+
+static inline void
+nine_shader_variants_free64(struct nine_shader_variant64 *list)
+{
+ while (list->next) {
+ struct nine_shader_variant64 *ptr = list->next;
+ list->next = ptr->next;
+ FREE(ptr);
+ }
+}
+
#endif /* _NINE_SHADER_H_ */
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 6c83585..558d07a 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -33,352 +33,36 @@
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "cso_cache/cso_context.h"
+#include "util/u_upload_mgr.h"
#include "util/u_math.h"
#define DBG_CHANNEL DBG_DEVICE
-static uint32_t
-update_framebuffer(struct NineDevice9 *device)
-{
- struct pipe_context *pipe = device->pipe;
- struct nine_state *state = &device->state;
- struct pipe_framebuffer_state *fb = &device->state.fb;
- unsigned i;
- struct NineSurface9 *rt0 = state->rt[0];
- unsigned w = rt0->desc.Width;
- unsigned h = rt0->desc.Height;
- D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType;
- unsigned mask = state->ps ? state->ps->rt_mask : 1;
- const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
-
- DBG("\n");
-
- state->rt_mask = 0x0;
- fb->nr_cbufs = 0;
-
- /* all render targets must have the same size and the depth buffer must be
- * bigger. Multisample has to match, according to spec. But some apps do
- * things wrong there, and no error is returned. The behaviour they get
- * apparently is that depth buffer is disabled if it doesn't match.
- * Surely the same for render targets. */
-
- /* Special case: D3DFMT_NULL is used to bound no real render target,
- * but render to depth buffer. We have to not take into account the render
- * target info. TODO: know what should happen when there are several render targers
- * and the first one is D3DFMT_NULL */
- if (rt0->desc.Format == D3DFMT_NULL && state->ds) {
- w = state->ds->desc.Width;
- h = state->ds->desc.Height;
- nr_samples = state->ds->desc.MultiSampleType;
- }
-
- for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
- struct NineSurface9 *rt = state->rt[i];
-
- if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) &&
- rt->desc.Width == w && rt->desc.Height == h &&
- rt->desc.MultiSampleType == nr_samples) {
- fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB);
- state->rt_mask |= 1 << i;
- fb->nr_cbufs = i + 1;
-
- if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) {
- assert(rt->texture == D3DRTYPE_TEXTURE ||
- rt->texture == D3DRTYPE_CUBETEXTURE);
- NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE;
- }
- } else {
- /* Color outputs must match RT slot,
- * drivers will have to handle NULL entries for GL, too.
- */
- fb->cbufs[i] = NULL;
- }
- }
-
- if (state->ds && state->ds->desc.Width >= w &&
- state->ds->desc.Height >= h &&
- state->ds->desc.MultiSampleType == nr_samples) {
- fb->zsbuf = NineSurface9_GetSurface(state->ds, 0);
- } else {
- fb->zsbuf = NULL;
- }
-
- fb->width = w;
- fb->height = h;
-
- pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
-
- if (fb->zsbuf) {
- DWORD scale;
- switch (fb->zsbuf->format) {
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- scale = fui(1.0f);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- scale = fui((float)(1 << 16));
- break;
- default:
- scale = fui((float)(1 << 24));
- break;
- }
- if (state->rs[NINED3DRS_ZBIASSCALE] != scale) {
- state->rs[NINED3DRS_ZBIASSCALE] = scale;
- state->changed.group |= NINE_STATE_RASTERIZER;
- }
- }
-
- return state->changed.group;
-}
-
-static void
-update_viewport(struct NineDevice9 *device)
-{
- struct pipe_context *pipe = device->pipe;
- const D3DVIEWPORT9 *vport = &device->state.viewport;
- struct pipe_viewport_state pvport;
-
- /* D3D coordinates are:
- * -1 .. +1 for X,Y and
- * 0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz)
- */
- pvport.scale[0] = (float)vport->Width * 0.5f;
- pvport.scale[1] = (float)vport->Height * -0.5f;
- pvport.scale[2] = vport->MaxZ - vport->MinZ;
- pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X;
- pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y;
- pvport.translate[2] = vport->MinZ;
-
- /* We found R600 and SI cards have some imprecision
- * on the barycentric coordinates used for interpolation.
- * Some shaders rely on having something precise.
- * We found that the proprietary driver has the imprecision issue,
- * except when the render target width and height are powers of two.
- * It is using some sort of workaround for these cases
- * which covers likely all the cases the applications rely
- * on something precise.
- * We haven't found the workaround, but it seems like it's better
- * for applications if the imprecision is biased towards infinity
- * instead of -infinity (which is what measured). So shift slightly
- * the viewport: not enough to change rasterization result (in particular
- * for multisampling), but enough to make the imprecision biased
- * towards infinity. We do this shift only if render target width and
- * height are powers of two.
- * Solves 'red shadows' bug on UE3 games.
- */
- if (device->driver_bugs.buggy_barycentrics &&
- ((vport->Width & (vport->Width-1)) == 0) &&
- ((vport->Height & (vport->Height-1)) == 0)) {
- pvport.translate[0] -= 1.0f / 128.0f;
- pvport.translate[1] -= 1.0f / 128.0f;
- }
-
- pipe->set_viewport_states(pipe, 0, 1, &pvport);
-}
-
-static inline void
-update_scissor(struct NineDevice9 *device)
-{
- struct pipe_context *pipe = device->pipe;
-
- pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor);
-}
+/* State preparation only */
static inline void
-update_blend(struct NineDevice9 *device)
+prepare_blend(struct NineDevice9 *device)
{
- nine_convert_blend_state(device->cso, device->state.rs);
+ nine_convert_blend_state(&device->state.pipe.blend, device->state.rs);
+ device->state.commit |= NINE_STATE_COMMIT_BLEND;
}
static inline void
-update_dsa(struct NineDevice9 *device)
+prepare_dsa(struct NineDevice9 *device)
{
- nine_convert_dsa_state(device->cso, device->state.rs);
+ nine_convert_dsa_state(&device->state.pipe.dsa, device->state.rs);
+ device->state.commit |= NINE_STATE_COMMIT_DSA;
}
static inline void
-update_rasterizer(struct NineDevice9 *device)
+prepare_rasterizer(struct NineDevice9 *device)
{
- nine_convert_rasterizer_state(device->cso, device->state.rs);
+ nine_convert_rasterizer_state(&device->state.pipe.rast, device->state.rs);
+ device->state.commit |= NINE_STATE_COMMIT_RASTERIZER;
}
-/* Loop through VS inputs and pick the vertex elements with the declared
- * usage from the vertex declaration, then insert the instance divisor from
- * the stream source frequency setting.
- */
static void
-update_vertex_elements(struct NineDevice9 *device)
-{
- struct nine_state *state = &device->state;
- const struct NineVertexDeclaration9 *vdecl = device->state.vdecl;
- const struct NineVertexShader9 *vs;
- unsigned n, b, i;
- int index;
- char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
- char used_streams[device->caps.MaxStreams];
- int dummy_vbo_stream = -1;
- BOOL need_dummy_vbo = FALSE;
- struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
-
- state->stream_usage_mask = 0;
- memset(vdecl_index_map, -1, 16);
- memset(used_streams, 0, device->caps.MaxStreams);
- vs = device->state.vs ? device->state.vs : device->ff.vs;
-
- if (vdecl) {
- for (n = 0; n < vs->num_inputs; ++n) {
- DBG("looking up input %u (usage %u) from vdecl(%p)\n",
- n, vs->input_map[n].ndecl, vdecl);
-
- for (i = 0; i < vdecl->nelems; i++) {
- if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
- vdecl_index_map[n] = i;
- used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
- break;
- }
- }
- if (vdecl_index_map[n] < 0)
- need_dummy_vbo = TRUE;
- }
- } else {
- /* No vertex declaration. Likely will never happen in practice,
- * but we need not crash on this */
- need_dummy_vbo = TRUE;
- }
-
- if (need_dummy_vbo) {
- for (i = 0; i < device->caps.MaxStreams; i++ ) {
- if (!used_streams[i]) {
- dummy_vbo_stream = i;
- break;
- }
- }
- }
- /* there are less vertex shader inputs than stream slots,
- * so if we need a slot for the dummy vbo, we should have found one */
- assert (!need_dummy_vbo || dummy_vbo_stream != -1);
-
- for (n = 0; n < vs->num_inputs; ++n) {
- index = vdecl_index_map[n];
- if (index >= 0) {
- ve[n] = vdecl->elems[index];
- b = ve[n].vertex_buffer_index;
- state->stream_usage_mask |= 1 << b;
- /* XXX wine just uses 1 here: */
- if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
- ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
- } else {
- /* if the vertex declaration is incomplete compared to what the
- * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
- * This is not precised by the spec, but is the behaviour
- * tested on win */
- ve[n].vertex_buffer_index = dummy_vbo_stream;
- ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- ve[n].src_offset = 0;
- ve[n].instance_divisor = 0;
- }
- }
-
- if (state->dummy_vbo_bound_at != dummy_vbo_stream) {
- if (state->dummy_vbo_bound_at >= 0)
- state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at;
- if (dummy_vbo_stream >= 0) {
- state->changed.vtxbuf |= 1 << dummy_vbo_stream;
- state->vbo_bound_done = FALSE;
- }
- state->dummy_vbo_bound_at = dummy_vbo_stream;
- }
-
- cso_set_vertex_elements(device->cso, vs->num_inputs, ve);
-
- state->changed.stream_freq = 0;
-}
-
-static inline uint32_t
-update_shader_variant_keys(struct NineDevice9 *device)
-{
- struct nine_state *state = &device->state;
- uint32_t mask = 0;
- uint32_t vs_key = state->samplers_shadow;
- uint32_t ps_key = state->samplers_shadow;
-
- vs_key = (vs_key & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0);
- ps_key = (ps_key & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0);
-
- if (state->vs) vs_key &= state->vs->sampler_mask;
- if (state->ps) {
- if (unlikely(state->ps->byte_code.version < 0x20)) {
- /* no depth textures, but variable targets */
- uint32_t m = state->ps->sampler_mask;
- ps_key = 0;
- while (m) {
- int s = ffs(m) - 1;
- m &= ~(1 << s);
- ps_key |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2);
- }
- } else {
- ps_key &= state->ps->sampler_mask;
- }
- }
-
- if (state->vs && state->vs_key != vs_key) {
- state->vs_key = vs_key;
- mask |= NINE_STATE_VS;
- }
- if (state->ps && state->ps_key != ps_key) {
- state->ps_key = ps_key;
- mask |= NINE_STATE_PS;
- }
- return mask;
-}
-
-static inline uint32_t
-update_vs(struct NineDevice9 *device)
-{
- struct nine_state *state = &device->state;
- struct NineVertexShader9 *vs = state->vs;
- uint32_t changed_group = 0;
-
- /* likely because we dislike FF */
- if (likely(vs)) {
- state->cso.vs = NineVertexShader9_GetVariant(vs, state->vs_key);
- } else {
- vs = device->ff.vs;
- state->cso.vs = vs->variant.cso;
- }
- device->pipe->bind_vs_state(device->pipe, state->cso.vs);
-
- if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
- state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
- changed_group |= NINE_STATE_RASTERIZER;
- }
-
- if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
- /* Bound dummy sampler. */
- changed_group |= NINE_STATE_SAMPLER;
- return changed_group;
-}
-
-static inline uint32_t
-update_ps(struct NineDevice9 *device)
-{
- struct nine_state *state = &device->state;
- struct NinePixelShader9 *ps = state->ps;
- uint32_t changed_group = 0;
-
- if (likely(ps)) {
- state->cso.ps = NinePixelShader9_GetVariant(ps, state->ps_key);
- } else {
- ps = device->ff.ps;
- state->cso.ps = ps->variant.cso;
- }
- device->pipe->bind_fs_state(device->pipe, state->cso.ps);
-
- if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
- /* Bound dummy sampler. */
- changed_group |= NINE_STATE_SAMPLER;
- return changed_group;
-}
+prepare_ps_constants_userbuf(struct NineDevice9 *device);
#define DO_UPLOAD_CONST_F(buf,p,c,d) \
do { \
@@ -391,7 +75,7 @@ update_ps(struct NineDevice9 *device)
/* OK, this is a bit ugly ... */
static void
-update_constants(struct NineDevice9 *device, unsigned shader_type)
+upload_constants(struct NineDevice9 *device, unsigned shader_type)
{
struct pipe_context *pipe = device->pipe;
struct pipe_resource *buf;
@@ -438,10 +122,17 @@ update_constants(struct NineDevice9 *device, unsigned shader_type)
lconstf_ranges = device->state.vs->lconstf.ranges;
lconstf_data = device->state.vs->lconstf.data;
- device->state.ff.clobber.vs_const = TRUE;
device->state.changed.group &= ~NINE_STATE_VS_CONST;
} else {
DBG("PS\n");
+ /* features only implemented on the userbuf path */
+ if (device->state.ps->bumpenvmat_needed || (
+ device->state.ps->byte_code.version < 0x30 &&
+ device->state.rs[D3DRS_FOGENABLE])) {
+ device->prefer_user_constbuf = TRUE;
+ prepare_ps_constants_userbuf(device);
+ return;
+ }
buf = device->constbuf_ps;
const_f = device->state.ps_const_f;
@@ -464,7 +155,6 @@ update_constants(struct NineDevice9 *device, unsigned shader_type)
lconstf_ranges = NULL;
lconstf_data = NULL;
- device->state.ff.clobber.ps_const = TRUE;
device->state.changed.group &= ~NINE_STATE_PS_CONST;
}
@@ -524,10 +214,9 @@ update_constants(struct NineDevice9 *device, unsigned shader_type)
}
static void
-update_vs_constants_userbuf(struct NineDevice9 *device)
+prepare_vs_constants_userbuf(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
- struct pipe_context *pipe = device->pipe;
struct pipe_constant_buffer cb;
cb.buffer = NULL;
cb.buffer_offset = 0;
@@ -567,7 +256,18 @@ update_vs_constants_userbuf(struct NineDevice9 *device)
cb.user_buffer = dst;
}
- pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
+ if (!device->driver_caps.user_cbufs) {
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb.buffer_size,
+ cb.user_buffer,
+ &cb.buffer_offset,
+ &cb.buffer);
+ u_upload_unmap(device->constbuf_uploader);
+ cb.user_buffer = NULL;
+ }
+
+ state->pipe.cb_vs = cb;
if (device->state.changed.vs_const_f) {
struct nine_range *r = device->state.changed.vs_const_f;
@@ -578,22 +278,19 @@ update_vs_constants_userbuf(struct NineDevice9 *device)
device->state.changed.vs_const_f = NULL;
}
state->changed.group &= ~NINE_STATE_VS_CONST;
+ state->commit |= NINE_STATE_COMMIT_CONST_VS;
}
static void
-update_ps_constants_userbuf(struct NineDevice9 *device)
+prepare_ps_constants_userbuf(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
- struct pipe_context *pipe = device->pipe;
struct pipe_constant_buffer cb;
cb.buffer = NULL;
cb.buffer_offset = 0;
cb.buffer_size = device->state.ps->const_used_size;
cb.user_buffer = device->state.ps_const_f;
- if (!cb.buffer_size)
- return;
-
if (state->changed.ps_const_i) {
int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i));
@@ -606,7 +303,47 @@ update_ps_constants_userbuf(struct NineDevice9 *device)
state->changed.ps_const_b = 0;
}
- pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
+ /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
+ if (device->state.ps->bumpenvmat_needed) {
+ memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
+ memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars));
+
+ cb.user_buffer = device->state.ps_lconstf_temp;
+ }
+
+ if (state->ps->byte_code.version < 0x30 &&
+ state->rs[D3DRS_FOGENABLE]) {
+ float *dst = &state->ps_lconstf_temp[4 * 32];
+ if (cb.user_buffer != state->ps_lconstf_temp) {
+ memcpy(state->ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
+ cb.user_buffer = state->ps_lconstf_temp;
+ }
+
+ d3dcolor_to_rgba(dst, state->rs[D3DRS_FOGCOLOR]);
+ if (state->rs[D3DRS_FOGTABLEMODE] == D3DFOG_LINEAR) {
+ dst[4] = asfloat(state->rs[D3DRS_FOGEND]);
+ dst[5] = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
+ } else if (state->rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE) {
+ dst[4] = asfloat(state->rs[D3DRS_FOGDENSITY]);
+ }
+ cb.buffer_size = 4 * 4 * 34;
+ }
+
+ if (!cb.buffer_size)
+ return;
+
+ if (!device->driver_caps.user_cbufs) {
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb.buffer_size,
+ cb.user_buffer,
+ &cb.buffer_offset,
+ &cb.buffer);
+ u_upload_unmap(device->constbuf_uploader);
+ cb.user_buffer = NULL;
+ }
+
+ state->pipe.cb_ps = cb;
if (device->state.changed.ps_const_f) {
struct nine_range *r = device->state.changed.ps_const_f;
@@ -617,6 +354,286 @@ update_ps_constants_userbuf(struct NineDevice9 *device)
device->state.changed.ps_const_f = NULL;
}
state->changed.group &= ~NINE_STATE_PS_CONST;
+ state->commit |= NINE_STATE_COMMIT_CONST_PS;
+}
+
+static inline uint32_t
+prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
+{
+ struct nine_state *state = &device->state;
+ struct NineVertexShader9 *vs = state->vs;
+ uint32_t changed_group = 0;
+ int has_key_changed = 0;
+
+ if (likely(vs))
+ has_key_changed = NineVertexShader9_UpdateKey(vs, state);
+
+ if (!shader_changed && !has_key_changed)
+ return 0;
+
+ /* likely because we dislike FF */
+ if (likely(vs)) {
+ state->cso.vs = NineVertexShader9_GetVariant(vs);
+ } else {
+ vs = device->ff.vs;
+ state->cso.vs = vs->ff_cso;
+ }
+
+ if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
+ state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
+ changed_group |= NINE_STATE_RASTERIZER;
+ }
+
+ if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
+ /* Bound dummy sampler. */
+ changed_group |= NINE_STATE_SAMPLER;
+
+ state->commit |= NINE_STATE_COMMIT_VS;
+ return changed_group;
+}
+
+static inline uint32_t
+prepare_ps(struct NineDevice9 *device, uint8_t shader_changed)
+{
+ struct nine_state *state = &device->state;
+ struct NinePixelShader9 *ps = state->ps;
+ uint32_t changed_group = 0;
+ int has_key_changed = 0;
+
+ if (likely(ps))
+ has_key_changed = NinePixelShader9_UpdateKey(ps, state);
+
+ if (!shader_changed && !has_key_changed)
+ return 0;
+
+ if (likely(ps)) {
+ state->cso.ps = NinePixelShader9_GetVariant(ps);
+ } else {
+ ps = device->ff.ps;
+ state->cso.ps = ps->ff_cso;
+ }
+
+ if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
+ /* Bound dummy sampler. */
+ changed_group |= NINE_STATE_SAMPLER;
+
+ state->commit |= NINE_STATE_COMMIT_PS;
+ return changed_group;
+}
+
+/* State preparation incremental */
+
+/* State preparation + State commit */
+
+static uint32_t
+update_framebuffer(struct NineDevice9 *device)
+{
+ struct pipe_context *pipe = device->pipe;
+ struct nine_state *state = &device->state;
+ struct pipe_framebuffer_state *fb = &device->state.fb;
+ unsigned i;
+ struct NineSurface9 *rt0 = state->rt[0];
+ unsigned w = rt0->desc.Width;
+ unsigned h = rt0->desc.Height;
+ D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType;
+ unsigned mask = state->ps ? state->ps->rt_mask : 1;
+ const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
+
+ DBG("\n");
+
+ state->rt_mask = 0x0;
+ fb->nr_cbufs = 0;
+
+ /* all render targets must have the same size and the depth buffer must be
+ * bigger. Multisample has to match, according to spec. But some apps do
+ * things wrong there, and no error is returned. The behaviour they get
+ * apparently is that depth buffer is disabled if it doesn't match.
+ * Surely the same for render targets. */
+
+ /* Special case: D3DFMT_NULL is used to bound no real render target,
+ * but render to depth buffer. We have to not take into account the render
+ * target info. TODO: know what should happen when there are several render targers
+ * and the first one is D3DFMT_NULL */
+ if (rt0->desc.Format == D3DFMT_NULL && state->ds) {
+ w = state->ds->desc.Width;
+ h = state->ds->desc.Height;
+ nr_samples = state->ds->desc.MultiSampleType;
+ }
+
+ for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
+ struct NineSurface9 *rt = state->rt[i];
+
+ if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) &&
+ rt->desc.Width == w && rt->desc.Height == h &&
+ rt->desc.MultiSampleType == nr_samples) {
+ fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB);
+ state->rt_mask |= 1 << i;
+ fb->nr_cbufs = i + 1;
+
+ if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) {
+ assert(rt->texture == D3DRTYPE_TEXTURE ||
+ rt->texture == D3DRTYPE_CUBETEXTURE);
+ NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE;
+ }
+ } else {
+ /* Color outputs must match RT slot,
+ * drivers will have to handle NULL entries for GL, too.
+ */
+ fb->cbufs[i] = NULL;
+ }
+ }
+
+ if (state->ds && state->ds->desc.Width >= w &&
+ state->ds->desc.Height >= h &&
+ state->ds->desc.MultiSampleType == nr_samples) {
+ fb->zsbuf = NineSurface9_GetSurface(state->ds, 0);
+ } else {
+ fb->zsbuf = NULL;
+ }
+
+ fb->width = w;
+ fb->height = h;
+
+ pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
+
+ return state->changed.group;
+}
+
+static void
+update_viewport(struct NineDevice9 *device)
+{
+ struct pipe_context *pipe = device->pipe;
+ const D3DVIEWPORT9 *vport = &device->state.viewport;
+ struct pipe_viewport_state pvport;
+
+ /* D3D coordinates are:
+ * -1 .. +1 for X,Y and
+ * 0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz)
+ */
+ pvport.scale[0] = (float)vport->Width * 0.5f;
+ pvport.scale[1] = (float)vport->Height * -0.5f;
+ pvport.scale[2] = vport->MaxZ - vport->MinZ;
+ pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X;
+ pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y;
+ pvport.translate[2] = vport->MinZ;
+
+ /* We found R600 and SI cards have some imprecision
+ * on the barycentric coordinates used for interpolation.
+ * Some shaders rely on having something precise.
+ * We found that the proprietary driver has the imprecision issue,
+ * except when the render target width and height are powers of two.
+ * It is using some sort of workaround for these cases
+ * which covers likely all the cases the applications rely
+ * on something precise.
+ * We haven't found the workaround, but it seems like it's better
+ * for applications if the imprecision is biased towards infinity
+ * instead of -infinity (which is what measured). So shift slightly
+ * the viewport: not enough to change rasterization result (in particular
+ * for multisampling), but enough to make the imprecision biased
+ * towards infinity. We do this shift only if render target width and
+ * height are powers of two.
+ * Solves 'red shadows' bug on UE3 games.
+ */
+ if (device->driver_bugs.buggy_barycentrics &&
+ ((vport->Width & (vport->Width-1)) == 0) &&
+ ((vport->Height & (vport->Height-1)) == 0)) {
+ pvport.translate[0] -= 1.0f / 128.0f;
+ pvport.translate[1] -= 1.0f / 128.0f;
+ }
+
+ pipe->set_viewport_states(pipe, 0, 1, &pvport);
+}
+
+/* Loop through VS inputs and pick the vertex elements with the declared
+ * usage from the vertex declaration, then insert the instance divisor from
+ * the stream source frequency setting.
+ */
+static void
+update_vertex_elements(struct NineDevice9 *device)
+{
+ struct nine_state *state = &device->state;
+ const struct NineVertexDeclaration9 *vdecl = device->state.vdecl;
+ const struct NineVertexShader9 *vs;
+ unsigned n, b, i;
+ int index;
+ char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
+ char used_streams[device->caps.MaxStreams];
+ int dummy_vbo_stream = -1;
+ BOOL need_dummy_vbo = FALSE;
+ struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
+
+ state->stream_usage_mask = 0;
+ memset(vdecl_index_map, -1, 16);
+ memset(used_streams, 0, device->caps.MaxStreams);
+ vs = device->state.vs ? device->state.vs : device->ff.vs;
+
+ if (vdecl) {
+ for (n = 0; n < vs->num_inputs; ++n) {
+ DBG("looking up input %u (usage %u) from vdecl(%p)\n",
+ n, vs->input_map[n].ndecl, vdecl);
+
+ for (i = 0; i < vdecl->nelems; i++) {
+ if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
+ vdecl_index_map[n] = i;
+ used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
+ break;
+ }
+ }
+ if (vdecl_index_map[n] < 0)
+ need_dummy_vbo = TRUE;
+ }
+ } else {
+ /* No vertex declaration. Likely will never happen in practice,
+ * but we need not crash on this */
+ need_dummy_vbo = TRUE;
+ }
+
+ if (need_dummy_vbo) {
+ for (i = 0; i < device->caps.MaxStreams; i++ ) {
+ if (!used_streams[i]) {
+ dummy_vbo_stream = i;
+ break;
+ }
+ }
+ }
+ /* there are less vertex shader inputs than stream slots,
+ * so if we need a slot for the dummy vbo, we should have found one */
+ assert (!need_dummy_vbo || dummy_vbo_stream != -1);
+
+ for (n = 0; n < vs->num_inputs; ++n) {
+ index = vdecl_index_map[n];
+ if (index >= 0) {
+ ve[n] = vdecl->elems[index];
+ b = ve[n].vertex_buffer_index;
+ state->stream_usage_mask |= 1 << b;
+ /* XXX wine just uses 1 here: */
+ if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
+ ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
+ } else {
+ /* if the vertex declaration is incomplete compared to what the
+ * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
+ * This is not precised by the spec, but is the behaviour
+ * tested on win */
+ ve[n].vertex_buffer_index = dummy_vbo_stream;
+ ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ ve[n].src_offset = 0;
+ ve[n].instance_divisor = 0;
+ }
+ }
+
+ if (state->dummy_vbo_bound_at != dummy_vbo_stream) {
+ if (state->dummy_vbo_bound_at >= 0)
+ state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at;
+ if (dummy_vbo_stream >= 0) {
+ state->changed.vtxbuf |= 1 << dummy_vbo_stream;
+ state->vbo_bound_done = FALSE;
+ }
+ state->dummy_vbo_bound_at = dummy_vbo_stream;
+ }
+
+ cso_set_vertex_elements(device->cso, vs->num_inputs, ve);
+
+ state->changed.stream_freq = 0;
}
static void
@@ -627,7 +644,6 @@ update_vertex_buffers(struct NineDevice9 *device)
struct pipe_vertex_buffer dummy_vtxbuf;
uint32_t mask = state->changed.vtxbuf;
unsigned i;
- unsigned start;
DBG("mask=%x\n", mask);
@@ -656,27 +672,6 @@ update_vertex_buffers(struct NineDevice9 *device)
state->changed.vtxbuf = 0;
}
-static inline void
-update_index_buffer(struct NineDevice9 *device)
-{
- struct pipe_context *pipe = device->pipe;
- if (device->state.idxbuf)
- pipe->set_index_buffer(pipe, &device->state.idxbuf->buffer);
- else
- pipe->set_index_buffer(pipe, NULL);
-}
-
-/* TODO: only go through dirty textures */
-static void
-validate_textures(struct NineDevice9 *device)
-{
- struct NineBaseTexture9 *tex, *ptr;
- LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) {
- list_delinit(&tex->list);
- NineBaseTexture9_Validate(tex);
- }
-}
-
static inline boolean
update_sampler_derived(struct nine_state *state, unsigned s)
{
@@ -706,20 +701,16 @@ update_sampler_derived(struct nine_state *state, unsigned s)
static void
update_textures_and_samplers(struct NineDevice9 *device)
{
- struct pipe_context *pipe = device->pipe;
struct nine_state *state = &device->state;
struct pipe_sampler_view *view[NINE_MAX_SAMPLERS];
- struct pipe_sampler_state samp;
unsigned num_textures;
unsigned i;
- boolean commit_views;
boolean commit_samplers;
uint16_t sampler_mask = state->ps ? state->ps->sampler_mask :
device->ff.ps->sampler_mask;
/* TODO: Can we reduce iterations here ? */
- commit_views = FALSE;
commit_samplers = FALSE;
state->bound_samplers_mask_ps = 0;
for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_PS; ++i) {
@@ -749,26 +740,12 @@ update_textures_and_samplers(struct NineDevice9 *device)
* unbind dummy sampler directly when they are not needed
* anymore, but they're going to be removed as long as texture
* or sampler states are changed. */
- view[i] = device->dummy_sampler;
+ view[i] = device->dummy_sampler_view;
num_textures = i + 1;
- memset(&samp, 0, sizeof(samp));
- samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
- samp.max_lod = 15.0f;
- samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
- samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
- samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
- samp.min_img_filter = PIPE_TEX_FILTER_NEAREST;
- samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
- samp.compare_mode = PIPE_TEX_COMPARE_NONE;
- samp.compare_func = PIPE_FUNC_LEQUAL;
- samp.normalized_coords = 1;
- samp.seamless_cube_map = 1;
-
cso_single_sampler(device->cso, PIPE_SHADER_FRAGMENT,
- s - NINE_SAMPLER_PS(0), &samp);
+ s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state);
- commit_views = TRUE;
commit_samplers = TRUE;
state->changed.sampler[s] = ~0;
}
@@ -776,16 +753,11 @@ update_textures_and_samplers(struct NineDevice9 *device)
state->bound_samplers_mask_ps |= (1 << s);
}
- commit_views |= (state->changed.texture & NINE_PS_SAMPLERS_MASK) != 0;
- commit_views |= state->changed.srgb;
- if (commit_views)
- pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
- num_textures, view);
+ cso_set_sampler_views(device->cso, PIPE_SHADER_FRAGMENT, num_textures, view);
if (commit_samplers)
cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT);
- commit_views = FALSE;
commit_samplers = FALSE;
sampler_mask = state->vs ? state->vs->sampler_mask : 0;
state->bound_samplers_mask_vs = 0;
@@ -816,76 +788,170 @@ update_textures_and_samplers(struct NineDevice9 *device)
* unbind dummy sampler directly when they are not needed
* anymore, but they're going to be removed as long as texture
* or sampler states are changed. */
- view[i] = device->dummy_sampler;
+ view[i] = device->dummy_sampler_view;
num_textures = i + 1;
- memset(&samp, 0, sizeof(samp));
- samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
- samp.max_lod = 15.0f;
- samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
- samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
- samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
- samp.min_img_filter = PIPE_TEX_FILTER_NEAREST;
- samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
- samp.compare_mode = PIPE_TEX_COMPARE_NONE;
- samp.compare_func = PIPE_FUNC_LEQUAL;
- samp.normalized_coords = 1;
- samp.seamless_cube_map = 1;
-
cso_single_sampler(device->cso, PIPE_SHADER_VERTEX,
- s - NINE_SAMPLER_VS(0), &samp);
+ s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state);
- commit_views = TRUE;
commit_samplers = TRUE;
state->changed.sampler[s] = ~0;
}
state->bound_samplers_mask_vs |= (1 << s);
}
- commit_views |= (state->changed.texture & NINE_VS_SAMPLERS_MASK) != 0;
- commit_views |= state->changed.srgb;
- if (commit_views)
- pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0,
- num_textures, view);
+
+ cso_set_sampler_views(device->cso, PIPE_SHADER_VERTEX, num_textures, view);
if (commit_samplers)
cso_single_sampler_done(device->cso, PIPE_SHADER_VERTEX);
- state->changed.srgb = FALSE;
state->changed.texture = 0;
}
+/* State commit only */
+
+static inline void
+commit_blend(struct NineDevice9 *device)
+{
+ cso_set_blend(device->cso, &device->state.pipe.blend);
+}
+
+static inline void
+commit_dsa(struct NineDevice9 *device)
+{
+ cso_set_depth_stencil_alpha(device->cso, &device->state.pipe.dsa);
+}
+
+static inline void
+commit_scissor(struct NineDevice9 *device)
+{
+ struct pipe_context *pipe = device->pipe;
+
+ pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor);
+}
+
+static inline void
+commit_rasterizer(struct NineDevice9 *device)
+{
+ cso_set_rasterizer(device->cso, &device->state.pipe.rast);
+}
-#define NINE_STATE_FREQ_GROUP_0 \
- (NINE_STATE_FB | \
- NINE_STATE_VIEWPORT | \
- NINE_STATE_SCISSOR | \
- NINE_STATE_BLEND | \
- NINE_STATE_DSA | \
- NINE_STATE_RASTERIZER | \
- NINE_STATE_VS | \
- NINE_STATE_PS | \
- NINE_STATE_BLEND_COLOR | \
- NINE_STATE_STENCIL_REF | \
+static inline void
+commit_index_buffer(struct NineDevice9 *device)
+{
+ struct pipe_context *pipe = device->pipe;
+ if (device->state.idxbuf)
+ pipe->set_index_buffer(pipe, &device->state.idxbuf->buffer);
+ else
+ pipe->set_index_buffer(pipe, NULL);
+}
+
+static inline void
+commit_vs_constants(struct NineDevice9 *device)
+{
+ struct pipe_context *pipe = device->pipe;
+
+ if (unlikely(!device->state.vs))
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff);
+ else
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
+}
+
+static inline void
+commit_ps_constants(struct NineDevice9 *device)
+{
+ struct pipe_context *pipe = device->pipe;
+
+ if (unlikely(!device->state.ps))
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps_ff);
+ else
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps);
+}
+
+static inline void
+commit_vs(struct NineDevice9 *device)
+{
+ struct nine_state *state = &device->state;
+
+ device->pipe->bind_vs_state(device->pipe, state->cso.vs);
+}
+
+
+static inline void
+commit_ps(struct NineDevice9 *device)
+{
+ struct nine_state *state = &device->state;
+
+ device->pipe->bind_fs_state(device->pipe, state->cso.ps);
+}
+/* State Update */
+
+#define NINE_STATE_SHADER_CHANGE_VS \
+ (NINE_STATE_VS | \
+ NINE_STATE_TEXTURE | \
+ NINE_STATE_FOG_SHADER)
+
+#define NINE_STATE_SHADER_CHANGE_PS \
+ (NINE_STATE_PS | \
+ NINE_STATE_TEXTURE | \
+ NINE_STATE_FOG_SHADER | \
+ NINE_STATE_PS1X_SHADER)
+
+#define NINE_STATE_FREQUENT \
+ (NINE_STATE_RASTERIZER | \
+ NINE_STATE_TEXTURE | \
+ NINE_STATE_SAMPLER | \
+ NINE_STATE_VS_CONST | \
+ NINE_STATE_PS_CONST)
+
+#define NINE_STATE_COMMON \
+ (NINE_STATE_FB | \
+ NINE_STATE_BLEND | \
+ NINE_STATE_DSA | \
+ NINE_STATE_VIEWPORT | \
+ NINE_STATE_VDECL | \
+ NINE_STATE_IDXBUF)
+
+#define NINE_STATE_RARE \
+ (NINE_STATE_SCISSOR | \
+ NINE_STATE_BLEND_COLOR | \
+ NINE_STATE_STENCIL_REF | \
NINE_STATE_SAMPLE_MASK)
-#define NINE_STATE_FREQ_GROUP_1 ~NINE_STATE_FREQ_GROUP_0
-#define NINE_STATE_SHADER_VARIANT_GROUP \
- (NINE_STATE_TEXTURE | \
- NINE_STATE_VS | \
- NINE_STATE_PS)
+/* TODO: only go through dirty textures */
+static void
+validate_textures(struct NineDevice9 *device)
+{
+ struct NineBaseTexture9 *tex, *ptr;
+ LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) {
+ list_delinit(&tex->list);
+ NineBaseTexture9_Validate(tex);
+ }
+}
+
+void
+nine_update_state_framebuffer(struct NineDevice9 *device)
+{
+ struct nine_state *state = &device->state;
+
+ validate_textures(device);
+
+ if (state->changed.group & NINE_STATE_FB)
+ update_framebuffer(device);
+
+ state->changed.group &= ~NINE_STATE_FB;
+}
boolean
-nine_update_state(struct NineDevice9 *device, uint32_t mask)
+nine_update_state(struct NineDevice9 *device)
{
struct pipe_context *pipe = device->pipe;
struct nine_state *state = &device->state;
uint32_t group;
- DBG("changed state groups: %x | %x\n",
- state->changed.group & NINE_STATE_FREQ_GROUP_0,
- state->changed.group & NINE_STATE_FREQ_GROUP_1);
+ DBG("changed state groups: %x\n", state->changed.group);
/* NOTE: We may want to use the cso cache for everything, or let
* NineDevice9.RestoreNonCSOState actually set the states, then we wouldn't
@@ -896,35 +962,79 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask)
validate_textures(device); /* may clobber state */
/* ff_update may change VS/PS dirty bits */
- if ((mask & NINE_STATE_FF) && unlikely(!state->vs || !state->ps))
+ if (unlikely(!state->vs || !state->ps))
nine_ff_update(device);
- group = state->changed.group & mask;
+ group = state->changed.group;
- if (group & NINE_STATE_SHADER_VARIANT_GROUP)
- group |= update_shader_variant_keys(device);
+ if (group & (NINE_STATE_SHADER_CHANGE_VS | NINE_STATE_SHADER_CHANGE_PS)) {
+ if (group & NINE_STATE_SHADER_CHANGE_VS)
+ group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); /* may set NINE_STATE_RASTERIZER and NINE_STATE_SAMPLER*/
+ if (group & NINE_STATE_SHADER_CHANGE_PS)
+ group |= prepare_ps(device, (group & NINE_STATE_PS) != 0);
+ }
- if (group & NINE_STATE_FREQ_GROUP_0) {
+ if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) {
if (group & NINE_STATE_FB)
- group = update_framebuffer(device) & mask;
+ group |= update_framebuffer(device); /* may set NINE_STATE_RASTERIZER */
+ if (group & NINE_STATE_BLEND)
+ prepare_blend(device);
+ if (group & NINE_STATE_DSA)
+ prepare_dsa(device);
if (group & NINE_STATE_VIEWPORT)
update_viewport(device);
- if (group & NINE_STATE_SCISSOR)
- update_scissor(device);
-
- if (group & NINE_STATE_DSA)
- update_dsa(device);
- if (group & NINE_STATE_BLEND)
- update_blend(device);
-
- if (group & NINE_STATE_VS)
- group |= update_vs(device);
+ if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) ||
+ state->changed.stream_freq & ~1)
+ update_vertex_elements(device);
+ if (group & NINE_STATE_IDXBUF)
+ commit_index_buffer(device);
+ }
+ if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS))) {
if (group & NINE_STATE_RASTERIZER)
- update_rasterizer(device);
+ prepare_rasterizer(device);
+ if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
+ update_textures_and_samplers(device);
+ if (device->prefer_user_constbuf) {
+ if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs)
+ prepare_vs_constants_userbuf(device);
+ if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
+ prepare_ps_constants_userbuf(device);
+ } else {
+ if ((group & NINE_STATE_VS_CONST) && state->vs)
+ upload_constants(device, PIPE_SHADER_VERTEX);
+ if ((group & NINE_STATE_PS_CONST) && state->ps)
+ upload_constants(device, PIPE_SHADER_FRAGMENT);
+ }
+ }
- if (group & NINE_STATE_PS)
- group |= update_ps(device);
+ if (state->changed.vtxbuf)
+ update_vertex_buffers(device);
+
+ if (state->commit & NINE_STATE_COMMIT_BLEND)
+ commit_blend(device);
+ if (state->commit & NINE_STATE_COMMIT_DSA)
+ commit_dsa(device);
+ if (state->commit & NINE_STATE_COMMIT_RASTERIZER)
+ commit_rasterizer(device);
+ if (state->commit & NINE_STATE_COMMIT_CONST_VS)
+ commit_vs_constants(device);
+ if (state->commit & NINE_STATE_COMMIT_CONST_PS)
+ commit_ps_constants(device);
+ if (state->commit & NINE_STATE_COMMIT_VS)
+ commit_vs(device);
+ if (state->commit & NINE_STATE_COMMIT_PS)
+ commit_ps(device);
+
+ state->commit = 0;
+
+ if (unlikely(state->changed.ucp)) {
+ pipe->set_clip_state(pipe, &state->clip);
+ state->changed.ucp = 0;
+ }
+ if (unlikely(group & NINE_STATE_RARE)) {
+ if (group & NINE_STATE_SCISSOR)
+ commit_scissor(device);
if (group & NINE_STATE_BLEND_COLOR) {
struct pipe_blend_color color;
d3dcolor_to_rgba(&color.color[0], state->rs[D3DRS_BLENDFACTOR]);
@@ -941,38 +1051,7 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask)
}
}
- if (state->changed.ucp) {
- pipe->set_clip_state(pipe, &state->clip);
- state->changed.ucp = 0;
- }
-
- if (group & (NINE_STATE_FREQ_GROUP_1 | NINE_STATE_VS)) {
- if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
- update_textures_and_samplers(device);
-
- if (group & NINE_STATE_IDXBUF)
- update_index_buffer(device);
-
- if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) ||
- state->changed.stream_freq & ~1)
- update_vertex_elements(device);
-
- if (device->prefer_user_constbuf) {
- if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs)
- update_vs_constants_userbuf(device);
- if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
- update_ps_constants_userbuf(device);
- } else {
- if ((group & NINE_STATE_VS_CONST) && state->vs)
- update_constants(device, PIPE_SHADER_VERTEX);
- if ((group & NINE_STATE_PS_CONST) && state->ps)
- update_constants(device, PIPE_SHADER_FRAGMENT);
- }
- }
- if (state->changed.vtxbuf)
- update_vertex_buffers(device);
-
- device->state.changed.group &= ~mask |
+ device->state.changed.group &=
(NINE_STATE_FF | NINE_STATE_VS_CONST | NINE_STATE_PS_CONST);
DBG("finished\n");
@@ -980,6 +1059,7 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask)
return TRUE;
}
+/* State defaults */
static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] =
{
@@ -1134,6 +1214,18 @@ static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] =
[NINED3DSAMP_MINLOD] = 0,
[NINED3DSAMP_SHADOW] = 0
};
+
+void nine_state_restore_non_cso(struct NineDevice9 *device)
+{
+ struct nine_state *state = &device->state;
+
+ state->changed.group = NINE_STATE_ALL;
+ state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
+ state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1;
+ state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK;
+ state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS;
+}
+
void
nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
boolean is_reset)
@@ -1152,6 +1244,7 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
}
state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE;
state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1;
+ memset(&state->bumpmap_vars, 0, sizeof(state->bumpmap_vars));
for (s = 0; s < Elements(state->samp); ++s) {
memcpy(&state->samp[s], nine_samp_state_defaults,
@@ -1170,6 +1263,9 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
/* Set changed flags to initialize driver.
*/
state->changed.group = NINE_STATE_ALL;
+ state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
+ state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1;
+ state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK;
state->ff.changed.transform[0] = ~0;
state->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32);
@@ -1186,6 +1282,23 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
state->dummy_vbo_bound_at = -1;
state->vbo_bound_done = FALSE;
}
+
+ if (!device->prefer_user_constbuf) {
+ /* fill cb_vs and cb_ps for the non user constbuf path */
+ struct pipe_constant_buffer cb;
+
+ cb.buffer_offset = 0;
+ cb.buffer_size = device->vs_const_size;
+ cb.buffer = device->constbuf_vs;
+ cb.user_buffer = NULL;
+ state->pipe.cb_vs = cb;
+
+ cb.buffer_size = device->ps_const_size;
+ cb.buffer = device->constbuf_ps;
+ state->pipe.cb_ps = cb;
+
+ state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS;
+ }
}
void
@@ -1353,15 +1466,15 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
[D3DRS_ZFUNC] = NINE_STATE_DSA,
[D3DRS_ALPHAREF] = NINE_STATE_DSA,
[D3DRS_ALPHAFUNC] = NINE_STATE_DSA,
- [D3DRS_DITHERENABLE] = NINE_STATE_RASTERIZER,
+ [D3DRS_DITHERENABLE] = NINE_STATE_BLEND,
[D3DRS_ALPHABLENDENABLE] = NINE_STATE_BLEND,
- [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER,
+ [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST,
[D3DRS_SPECULARENABLE] = NINE_STATE_FF_LIGHTING,
- [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER,
- [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER,
- [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER,
- [D3DRS_FOGEND] = NINE_STATE_FF_OTHER,
- [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER,
+ [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
+ [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST,
+ [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
+ [D3DRS_FOGEND] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
+ [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
[D3DRS_RANGEFOGENABLE] = NINE_STATE_FF_OTHER,
[D3DRS_STENCILENABLE] = NINE_STATE_DSA,
[D3DRS_STENCILFAIL] = NINE_STATE_DSA,
@@ -1394,7 +1507,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
[D3DRS_VERTEXBLEND] = NINE_STATE_FF_OTHER,
[D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER,
[D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER,
- [D3DRS_POINTSIZE_MIN] = NINE_STATE_MISC_CONST,
+ [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER,
[D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER,
[D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_OTHER,
[D3DRS_POINTSCALE_A] = NINE_STATE_FF_OTHER,
@@ -1404,7 +1517,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
[D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK,
[D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED,
[D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED,
- [D3DRS_POINTSIZE_MAX] = NINE_STATE_MISC_CONST,
+ [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER,
[D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_OTHER,
[D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND,
[D3DRS_TWEENFACTOR] = NINE_STATE_FF_OTHER,
@@ -1446,6 +1559,8 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
[D3DRS_BLENDOPALPHA] = NINE_STATE_BLEND
};
+/* Misc */
+
D3DMATRIX *
nine_state_access_transform(struct nine_state *state, D3DTRANSFORMSTATETYPE t,
boolean alloc)
@@ -1601,4 +1716,3 @@ const char *nine_d3drs_to_string(DWORD State)
return "(invalid)";
}
}
-
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index 2bf3f63..b34da70 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -33,8 +33,7 @@
#define NINED3DRS_VSPOINTSIZE (D3DRS_BLENDOPALPHA + 1)
#define NINED3DRS_RTMASK (D3DRS_BLENDOPALPHA + 2)
-#define NINED3DRS_ZBIASSCALE (D3DRS_BLENDOPALPHA + 3)
-#define NINED3DRS_ALPHACOVERAGE (D3DRS_BLENDOPALPHA + 4)
+#define NINED3DRS_ALPHACOVERAGE (D3DRS_BLENDOPALPHA + 3)
#define D3DRS_LAST D3DRS_BLENDOPALPHA
#define NINED3DRS_LAST NINED3DRS_ALPHACOVERAGE /* 213 */
@@ -67,17 +66,26 @@
#define NINE_STATE_BLEND_COLOR (1 << 16)
#define NINE_STATE_STENCIL_REF (1 << 17)
#define NINE_STATE_SAMPLE_MASK (1 << 18)
-#define NINE_STATE_MISC_CONST (1 << 19)
-#define NINE_STATE_FF (0x1f << 20)
-#define NINE_STATE_FF_VS (0x17 << 20)
-#define NINE_STATE_FF_PS (0x18 << 20)
-#define NINE_STATE_FF_LIGHTING (1 << 20)
-#define NINE_STATE_FF_MATERIAL (1 << 21)
-#define NINE_STATE_FF_VSTRANSF (1 << 22)
-#define NINE_STATE_FF_PSSTAGES (1 << 23)
-#define NINE_STATE_FF_OTHER (1 << 24)
-#define NINE_STATE_ALL 0x1ffffff
-#define NINE_STATE_UNHANDLED (1 << 25)
+#define NINE_STATE_FF (0x1f << 19)
+#define NINE_STATE_FF_VS (0x17 << 19)
+#define NINE_STATE_FF_PS (0x18 << 19)
+#define NINE_STATE_FF_LIGHTING (1 << 19)
+#define NINE_STATE_FF_MATERIAL (1 << 20)
+#define NINE_STATE_FF_VSTRANSF (1 << 21)
+#define NINE_STATE_FF_PSSTAGES (1 << 22)
+#define NINE_STATE_FF_OTHER (1 << 23)
+#define NINE_STATE_FOG_SHADER (1 << 24)
+#define NINE_STATE_PS1X_SHADER (1 << 25)
+#define NINE_STATE_ALL 0x3ffffff
+#define NINE_STATE_UNHANDLED (1 << 26)
+
+#define NINE_STATE_COMMIT_DSA (1 << 0)
+#define NINE_STATE_COMMIT_RASTERIZER (1 << 1)
+#define NINE_STATE_COMMIT_BLEND (1 << 2)
+#define NINE_STATE_COMMIT_CONST_VS (1 << 3)
+#define NINE_STATE_COMMIT_CONST_PS (1 << 4)
+#define NINE_STATE_COMMIT_VS (1 << 5)
+#define NINE_STATE_COMMIT_PS (1 << 6)
#define NINE_MAX_SIMULTANEOUS_RENDERTARGETS 4
@@ -94,6 +102,8 @@
NINE_MAX_CONST_I * 4 * sizeof(int))
+#define NINE_MAX_TEXTURE_STAGES 8
+
#define NINE_MAX_LIGHTS 65536
#define NINE_MAX_LIGHTS_ACTIVE 8
@@ -124,7 +134,6 @@ struct nine_state
uint16_t vs_const_b; /* NINE_MAX_CONST_B == 16 */
uint16_t ps_const_b;
uint8_t ucp;
- boolean srgb;
} changed;
struct NineSurface9 *rt[NINE_MAX_SIMULTANEOUS_RENDERTARGETS];
@@ -143,13 +152,13 @@ struct nine_state
int vs_const_i[NINE_MAX_CONST_I][4];
BOOL vs_const_b[NINE_MAX_CONST_B];
float *vs_lconstf_temp;
- uint32_t vs_key;
struct NinePixelShader9 *ps;
float *ps_const_f;
int ps_const_i[NINE_MAX_CONST_I][4];
BOOL ps_const_b[NINE_MAX_CONST_B];
- uint32_t ps_key;
+ float *ps_lconstf_temp;
+ uint32_t bumpmap_vars[6 * NINE_MAX_TEXTURE_STAGES];
struct {
void *vs;
@@ -184,13 +193,9 @@ struct nine_state
struct {
struct {
uint32_t group;
- uint32_t tex_stage[NINE_MAX_SAMPLERS][(NINED3DTSS_COUNT + 31) / 32];
+ uint32_t tex_stage[NINE_MAX_TEXTURE_STAGES][(NINED3DTSS_COUNT + 31) / 32];
uint32_t transform[(NINED3DTS_COUNT + 31) / 32];
} changed;
- struct {
- boolean vs_const;
- boolean ps_const;
- } clobber;
D3DMATRIX *transform; /* access only via nine_state_access_transform */
unsigned num_transforms;
@@ -205,8 +210,19 @@ struct nine_state
D3DMATERIAL9 material;
- DWORD tex_stage[NINE_MAX_SAMPLERS][NINED3DTSS_COUNT];
+ DWORD tex_stage[NINE_MAX_TEXTURE_STAGES][NINED3DTSS_COUNT];
} ff;
+
+ uint32_t commit;
+ struct {
+ struct pipe_depth_stencil_alpha_state dsa;
+ struct pipe_rasterizer_state rast;
+ struct pipe_blend_state blend;
+ struct pipe_constant_buffer cb_vs;
+ struct pipe_constant_buffer cb_ps;
+ struct pipe_constant_buffer cb_vs_ff;
+ struct pipe_constant_buffer cb_ps_ff;
+ } pipe;
};
/* map D3DRS -> NINE_STATE_x
@@ -220,8 +236,10 @@ extern const uint32_t nine_render_states_vertex[(NINED3DRS_COUNT + 31) / 32];
struct NineDevice9;
-boolean nine_update_state(struct NineDevice9 *, uint32_t group_mask);
+void nine_update_state_framebuffer(struct NineDevice9 *);
+boolean nine_update_state(struct NineDevice9 *);
+void nine_state_restore_non_cso(struct NineDevice9 *device);
void nine_state_set_defaults(struct NineDevice9 *, const D3DCAPS9 *,
boolean is_reset);
void nine_state_clear(struct nine_state *, const boolean device);
diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c
index 3f176a3..42bc349 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.c
+++ b/src/gallium/state_trackers/nine/pixelshader9.c
@@ -46,7 +46,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This,
return hr;
if (cso) {
- This->variant.cso = cso;
+ This->ff_cso = cso;
return D3D_OK;
}
device = This->base.device;
@@ -57,6 +57,8 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This,
info.const_b_base = NINE_CONST_B_BASE(device->max_ps_const_f) / 16;
info.sampler_mask_shadow = 0x0;
info.sampler_ps1xtypes = 0x0;
+ info.fog_enable = 0;
+ info.projected = 0;
hr = nine_translate_shader(device, &info);
if (FAILED(hr))
@@ -69,9 +71,13 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This,
This->byte_code.size = info.byte_size;
This->variant.cso = info.cso;
+ This->last_cso = info.cso;
+ This->last_key = 0;
+
This->sampler_mask = info.sampler_mask;
This->rt_mask = info.rt_mask;
This->const_used_size = info.const_used_size;
+ This->bumpenvmat_needed = info.bumpenvmat_needed;
/* no constant relative addressing for ps */
assert(info.lconstf.data == NULL);
assert(info.lconstf.ranges == NULL);
@@ -82,11 +88,12 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This,
void
NinePixelShader9_dtor( struct NinePixelShader9 *This )
{
- DBG("This=%p cso=%p\n", This, This->variant.cso);
+ DBG("This=%p\n", This);
if (This->base.device) {
struct pipe_context *pipe = This->base.device->pipe;
- struct nine_shader_variant *var = &This->variant;
+ struct nine_shader_variant64 *var = &This->variant;
+
do {
if (var->cso) {
if (This->base.device->state.cso.ps == var->cso)
@@ -95,8 +102,14 @@ NinePixelShader9_dtor( struct NinePixelShader9 *This )
}
var = var->next;
} while (var);
+
+ if (This->ff_cso) {
+ if (This->ff_cso == This->base.device->state.cso.ps)
+ pipe->bind_fs_state(pipe, NULL);
+ pipe->delete_fs_state(pipe, This->ff_cso);
+ }
}
- nine_shader_variants_free(&This->variant);
+ nine_shader_variants_free64(&This->variant);
FREE((void *)This->byte_code.tokens); /* const_cast */
@@ -124,10 +137,16 @@ NinePixelShader9_GetFunction( struct NinePixelShader9 *This,
}
void *
-NinePixelShader9_GetVariant( struct NinePixelShader9 *This,
- uint32_t key )
+NinePixelShader9_GetVariant( struct NinePixelShader9 *This )
{
- void *cso = nine_shader_variant_get(&This->variant, key);
+ void *cso;
+ uint64_t key;
+
+ key = This->next_key;
+ if (key == This->last_key)
+ return This->last_cso;
+
+ cso = nine_shader_variant_get64(&This->variant, key);
if (!cso) {
struct NineDevice9 *device = This->base.device;
struct nine_shader_info info;
@@ -139,13 +158,20 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This,
info.byte_code = This->byte_code.tokens;
info.sampler_mask_shadow = key & 0xffff;
info.sampler_ps1xtypes = key;
+ info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
+ info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE];
+ info.projected = (key >> 48) & 0xffff;
hr = nine_translate_shader(This->base.device, &info);
if (FAILED(hr))
return NULL;
- nine_shader_variant_add(&This->variant, key, info.cso);
+ nine_shader_variant_add64(&This->variant, key, info.cso);
cso = info.cso;
}
+
+ This->last_key = key;
+ This->last_cso = cso;
+
return cso;
}
diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h
index 6dad1d1..e09009f 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.h
+++ b/src/gallium/state_trackers/nine/pixelshader9.h
@@ -25,13 +25,16 @@
#include "iunknown.h"
#include "nine_shader.h"
+#include "nine_state.h"
+#include "basetexture9.h"
+#include "nine_ff.h"
struct nine_lconstf;
struct NinePixelShader9
{
struct NineUnknown base;
- struct nine_shader_variant variant;
+ struct nine_shader_variant64 variant;
struct {
const DWORD *tokens;
@@ -41,11 +44,17 @@ struct NinePixelShader9
unsigned const_used_size; /* in bytes */
+ uint8_t bumpenvmat_needed;
uint16_t sampler_mask;
- uint16_t sampler_mask_shadow;
uint8_t rt_mask;
uint64_t ff_key[6];
+ void *ff_cso;
+
+ uint64_t last_key;
+ void *last_cso;
+
+ uint64_t next_key;
};
static inline struct NinePixelShader9 *
NinePixelShader9( void *data )
@@ -53,9 +62,49 @@ NinePixelShader9( void *data )
return (struct NinePixelShader9 *)data;
}
+static inline BOOL
+NinePixelShader9_UpdateKey( struct NinePixelShader9 *ps,
+ struct nine_state *state )
+{
+ uint16_t samplers_shadow;
+ uint32_t samplers_ps1_types;
+ uint16_t projected;
+ uint64_t key;
+ BOOL res;
+
+ if (unlikely(ps->byte_code.version < 0x20)) {
+ /* no depth textures, but variable targets */
+ uint32_t m = ps->sampler_mask;
+ samplers_ps1_types = 0;
+ while (m) {
+ int s = ffs(m) - 1;
+ m &= ~(1 << s);
+ samplers_ps1_types |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2);
+ }
+ key = samplers_ps1_types;
+ } else {
+ samplers_shadow = (uint16_t)((state->samplers_shadow & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0));
+ key = samplers_shadow & ps->sampler_mask;
+ }
+
+ if (ps->byte_code.version < 0x30) {
+ key |= ((uint64_t)state->rs[D3DRS_FOGENABLE]) << 32;
+ key |= ((uint64_t)state->rs[D3DRS_FOGTABLEMODE]) << 33;
+ }
+
+ if (unlikely(ps->byte_code.version < 0x14)) {
+ projected = nine_ff_get_projected_key(state);
+ key |= ((uint64_t) projected) << 48;
+ }
+
+ res = ps->last_key != key;
+ if (res)
+ ps->next_key = key;
+ return res;
+}
+
void *
-NinePixelShader9_GetVariant( struct NinePixelShader9 *vs,
- uint32_t key );
+NinePixelShader9_GetVariant( struct NinePixelShader9 *ps );
/*** public ***/
diff --git a/src/gallium/state_trackers/nine/resource9.c b/src/gallium/state_trackers/nine/resource9.c
index bbc8320..6d91533 100644
--- a/src/gallium/state_trackers/nine/resource9.c
+++ b/src/gallium/state_trackers/nine/resource9.c
@@ -161,20 +161,22 @@ NineResource9_GetPrivateData( struct NineResource9 *This,
DWORD *pSizeOfData )
{
struct pheader *header;
+ DWORD sizeofdata;
DBG("This=%p refguid=%p pData=%p pSizeOfData=%p\n",
This, refguid, pData, pSizeOfData);
- user_assert(pSizeOfData, E_POINTER);
-
header = util_hash_table_get(This->pdata, refguid);
if (!header) { return D3DERR_NOTFOUND; }
+ user_assert(pSizeOfData, E_POINTER);
+ sizeofdata = *pSizeOfData;
+ *pSizeOfData = header->size;
+
if (!pData) {
- *pSizeOfData = header->size;
return D3D_OK;
}
- if (*pSizeOfData < header->size) {
+ if (sizeofdata < header->size) {
return D3DERR_MOREDATA;
}
@@ -206,10 +208,13 @@ DWORD WINAPI
NineResource9_SetPriority( struct NineResource9 *This,
DWORD PriorityNew )
{
- DWORD prev = This->priority;
-
+ DWORD prev;
DBG("This=%p, PriorityNew=%d\n", This, PriorityNew);
+ if (This->pool != D3DPOOL_MANAGED || This->type == D3DRTYPE_SURFACE)
+ return 0;
+
+ prev = This->priority;
This->priority = PriorityNew;
return prev;
}
@@ -217,6 +222,9 @@ NineResource9_SetPriority( struct NineResource9 *This,
DWORD WINAPI
NineResource9_GetPriority( struct NineResource9 *This )
{
+ if (This->pool != D3DPOOL_MANAGED || This->type == D3DRTYPE_SURFACE)
+ return 0;
+
return This->priority;
}
diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c
index 032b9ff..6d6e1be 100644
--- a/src/gallium/state_trackers/nine/stateblock9.c
+++ b/src/gallium/state_trackers/nine/stateblock9.c
@@ -251,7 +251,7 @@ nine_state_copy_common(struct nine_state *dst,
dst->ff.material = src->ff.material;
if (mask->changed.group & NINE_STATE_FF_PSSTAGES) {
- for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
+ for (s = 0; s < NINE_MAX_TEXTURE_STAGES; ++s) {
for (i = 0; i < NINED3DTSS_COUNT; ++i)
if (mask->ff.changed.tex_stage[s][i / 32] & (1 << (i % 32)))
dst->ff.tex_stage[s][i] = src->ff.tex_stage[s][i];
diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c
index 7533cb3..14c1ce9 100644
--- a/src/gallium/state_trackers/nine/surface9.c
+++ b/src/gallium/state_trackers/nine/surface9.c
@@ -104,11 +104,11 @@ NineSurface9_ctor( struct NineSurface9 *This,
/* Ram buffer with no parent. Has to allocate the resource itself */
if (!pResource && !pContainer) {
assert(!user_buffer);
- This->data = MALLOC(
+ This->data = align_malloc(
nine_format_get_level_alloc_size(This->base.info.format,
pDesc->Width,
pDesc->Height,
- 0));
+ 0), 32);
if (!This->data)
return E_OUTOFMEMORY;
}
@@ -273,7 +273,7 @@ NineSurface9_AddDirtyRect( struct NineSurface9 *This,
This->texture == D3DRTYPE_CUBETEXTURE ||
This->texture == D3DRTYPE_TEXTURE);
- if (This->base.pool != D3DPOOL_MANAGED)
+ if (This->base.pool == D3DPOOL_DEFAULT)
return;
/* Add a dirty rect to level 0 of the parent texture */
@@ -287,7 +287,7 @@ NineSurface9_AddDirtyRect( struct NineSurface9 *This,
NineTexture9(This->base.base.container);
NineTexture9_AddDirtyRect(tex, &dirty_rect);
- } else { /* This->texture == D3DRTYPE_CUBETEXTURE */
+ } else if (This->texture == D3DRTYPE_CUBETEXTURE) {
struct NineCubeTexture9 *ctex =
NineCubeTexture9(This->base.base.container);
@@ -323,6 +323,13 @@ NineSurface9_LockRect( struct NineSurface9 *This,
nine_D3DLOCK_to_str(Flags));
NineSurface9_Dump(This);
+ /* check if it's already locked */
+ user_assert(This->lock_count == 0, D3DERR_INVALIDCALL);
+
+ /* set pBits to NULL after lock_count check */
+ user_assert(pLockedRect, E_POINTER);
+ pLockedRect->pBits = NULL;
+
#ifdef NINE_STRICT
user_assert(This->base.pool != D3DPOOL_DEFAULT ||
(resource && (resource->flags & NINE_RESOURCE_FLAG_LOCKABLE)),
@@ -337,19 +344,17 @@ NineSurface9_LockRect( struct NineSurface9 *This,
user_assert(!((Flags & D3DLOCK_DISCARD) && (Flags & D3DLOCK_READONLY)),
D3DERR_INVALIDCALL);
- /* check if it's already locked */
- user_assert(This->lock_count == 0, D3DERR_INVALIDCALL);
- user_assert(pLockedRect, E_POINTER);
-
user_assert(This->desc.MultiSampleType == D3DMULTISAMPLE_NONE,
D3DERR_INVALIDCALL);
- if (pRect && This->base.pool == D3DPOOL_DEFAULT &&
- util_format_is_compressed(This->base.info.format)) {
+ if (pRect && This->desc.Pool == D3DPOOL_DEFAULT &&
+ compressed_format (This->desc.Format)) {
const unsigned w = util_format_get_blockwidth(This->base.info.format);
const unsigned h = util_format_get_blockheight(This->base.info.format);
- user_assert(!(pRect->left % w) && !(pRect->right % w) &&
- !(pRect->top % h) && !(pRect->bottom % h),
+ user_assert((pRect->left == 0 && pRect->right == This->desc.Width &&
+ pRect->top == 0 && pRect->bottom == This->desc.Height) ||
+ (!(pRect->left % w) && !(pRect->right % w) &&
+ !(pRect->top % h) && !(pRect->bottom % h)),
D3DERR_INVALIDCALL);
}
@@ -363,13 +368,9 @@ NineSurface9_LockRect( struct NineSurface9 *This,
usage |= PIPE_TRANSFER_DONTBLOCK;
if (pRect) {
+ /* Windows XP accepts invalid locking rectangles, Windows 7 rejects
+ * them. Use Windows XP behaviour for now. */
rect_to_pipe_box(&box, pRect);
- if (u_box_clip_2d(&box, &box, This->desc.Width,
- This->desc.Height) < 0) {
- DBG("pRect clipped by Width=%u Height=%u\n",
- This->desc.Width, This->desc.Height);
- return D3DERR_INVALIDCALL;
- }
} else {
u_box_origin_2d(This->desc.Width, This->desc.Height, &box);
}
@@ -463,140 +464,92 @@ IDirect3DSurface9Vtbl NineSurface9_vtable = {
(void *)NineSurface9_ReleaseDC
};
-HRESULT
-NineSurface9_CopySurface( struct NineSurface9 *This,
- struct NineSurface9 *From,
- const POINT *pDestPoint,
- const RECT *pSourceRect )
+/* When this function is called, we have already checked
+ * The copy regions fit the surfaces */
+void
+NineSurface9_CopyMemToDefault( struct NineSurface9 *This,
+ struct NineSurface9 *From,
+ const POINT *pDestPoint,
+ const RECT *pSourceRect )
{
struct pipe_context *pipe = This->pipe;
struct pipe_resource *r_dst = This->base.resource;
- struct pipe_resource *r_src = From->base.resource;
- struct pipe_transfer *transfer;
- struct pipe_box src_box;
struct pipe_box dst_box;
- uint8_t *p_dst;
const uint8_t *p_src;
+ int src_x, src_y, dst_x, dst_y, copy_width, copy_height;
- DBG("This=%p From=%p pDestPoint=%p pSourceRect=%p\n",
- This, From, pDestPoint, pSourceRect);
-
- assert(This->base.pool != D3DPOOL_MANAGED &&
- From->base.pool != D3DPOOL_MANAGED);
+ assert(This->base.pool == D3DPOOL_DEFAULT &&
+ From->base.pool == D3DPOOL_SYSTEMMEM);
- user_assert(This->desc.Format == From->desc.Format, D3DERR_INVALIDCALL);
+ if (pDestPoint) {
+ dst_x = pDestPoint->x;
+ dst_y = pDestPoint->y;
+ } else {
+ dst_x = 0;
+ dst_y = 0;
+ }
- dst_box.x = pDestPoint ? pDestPoint->x : 0;
- dst_box.y = pDestPoint ? pDestPoint->y : 0;
+ if (pSourceRect) {
+ src_x = pSourceRect->left;
+ src_y = pSourceRect->top;
+ copy_width = pSourceRect->right - pSourceRect->left;
+ copy_height = pSourceRect->bottom - pSourceRect->top;
+ } else {
+ src_x = 0;
+ src_y = 0;
+ copy_width = From->desc.Width;
+ copy_height = From->desc.Height;
+ }
- user_assert(dst_box.x >= 0 &&
- dst_box.y >= 0, D3DERR_INVALIDCALL);
+ u_box_2d_zslice(dst_x, dst_y, This->layer,
+ copy_width, copy_height, &dst_box);
- dst_box.z = This->layer;
- src_box.z = From->layer;
+ p_src = NineSurface9_GetSystemMemPointer(From, src_x, src_y);
- dst_box.depth = 1;
- src_box.depth = 1;
+ pipe->transfer_inline_write(pipe, r_dst, This->level,
+ 0, /* WRITE|DISCARD are implicit */
+ &dst_box, p_src, From->stride, 0);
- if (pSourceRect) {
- /* make sure it doesn't range outside the source surface */
- user_assert(pSourceRect->left >= 0 &&
- pSourceRect->right <= From->desc.Width &&
- pSourceRect->top >= 0 &&
- pSourceRect->bottom <= From->desc.Height,
- D3DERR_INVALIDCALL);
- if (rect_to_pipe_box_xy_only_clamp(&src_box, pSourceRect))
- return D3D_OK;
- } else {
- src_box.x = 0;
- src_box.y = 0;
- src_box.width = From->desc.Width;
- src_box.height = From->desc.Height;
- }
+ NineSurface9_MarkContainerDirty(This);
+}
- /* limits */
- dst_box.width = This->desc.Width - dst_box.x;
- dst_box.height = This->desc.Height - dst_box.y;
+void
+NineSurface9_CopyDefaultToMem( struct NineSurface9 *This,
+ struct NineSurface9 *From )
+{
+ struct pipe_context *pipe = This->pipe;
+ struct pipe_resource *r_src = From->base.resource;
+ struct pipe_transfer *transfer;
+ struct pipe_box src_box;
+ uint8_t *p_dst;
+ const uint8_t *p_src;
- user_assert(src_box.width <= dst_box.width &&
- src_box.height <= dst_box.height, D3DERR_INVALIDCALL);
+ assert(This->base.pool == D3DPOOL_SYSTEMMEM &&
+ From->base.pool == D3DPOOL_DEFAULT);
- dst_box.width = src_box.width;
- dst_box.height = src_box.height;
+ assert(This->desc.Width == From->desc.Width);
+ assert(This->desc.Height == From->desc.Height);
- /* check source block align for compressed textures */
- if (util_format_is_compressed(From->base.info.format) &&
- ((src_box.width != From->desc.Width) ||
- (src_box.height != From->desc.Height))) {
- const unsigned w = util_format_get_blockwidth(From->base.info.format);
- const unsigned h = util_format_get_blockheight(From->base.info.format);
- user_assert(!(src_box.width % w) &&
- !(src_box.height % h),
- D3DERR_INVALIDCALL);
- }
+ u_box_origin_2d(This->desc.Width, This->desc.Height, &src_box);
+ src_box.z = From->layer;
- /* check destination block align for compressed textures */
- if (util_format_is_compressed(This->base.info.format) &&
- ((dst_box.width != This->desc.Width) ||
- (dst_box.height != This->desc.Height) ||
- dst_box.x != 0 ||
- dst_box.y != 0)) {
- const unsigned w = util_format_get_blockwidth(This->base.info.format);
- const unsigned h = util_format_get_blockheight(This->base.info.format);
- user_assert(!(dst_box.x % w) && !(dst_box.width % w) &&
- !(dst_box.y % h) && !(dst_box.height % h),
- D3DERR_INVALIDCALL);
- }
+ p_src = pipe->transfer_map(pipe, r_src, From->level,
+ PIPE_TRANSFER_READ,
+ &src_box, &transfer);
+ p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0);
- if (r_dst && r_src) {
- pipe->resource_copy_region(pipe,
- r_dst, This->level,
- dst_box.x, dst_box.y, dst_box.z,
- r_src, From->level,
- &src_box);
- } else
- if (r_dst) {
- p_src = NineSurface9_GetSystemMemPointer(From, src_box.x, src_box.y);
-
- pipe->transfer_inline_write(pipe, r_dst, This->level,
- 0, /* WRITE|DISCARD are implicit */
- &dst_box, p_src, From->stride, 0);
- } else
- if (r_src) {
- p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0);
-
- p_src = pipe->transfer_map(pipe, r_src, From->level,
- PIPE_TRANSFER_READ,
- &src_box, &transfer);
- if (!p_src)
- return D3DERR_DRIVERINTERNALERROR;
-
- util_copy_rect(p_dst, This->base.info.format,
- This->stride, dst_box.x, dst_box.y,
- dst_box.width, dst_box.height,
- p_src,
- transfer->stride, src_box.x, src_box.y);
-
- pipe->transfer_unmap(pipe, transfer);
- } else {
- p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0);
- p_src = NineSurface9_GetSystemMemPointer(From, 0, 0);
-
- util_copy_rect(p_dst, This->base.info.format,
- This->stride, dst_box.x, dst_box.y,
- dst_box.width, dst_box.height,
- p_src,
- From->stride, src_box.x, src_box.y);
- }
+ assert (p_src && p_dst);
- if (This->base.pool == D3DPOOL_DEFAULT)
- NineSurface9_MarkContainerDirty(This);
- if (!r_dst && This->base.resource)
- NineSurface9_AddDirtyRect(This, &dst_box);
+ util_copy_rect(p_dst, This->base.info.format,
+ This->stride, 0, 0,
+ This->desc.Width, This->desc.Height,
+ p_src,
+ transfer->stride, 0, 0);
- return D3D_OK;
+ pipe->transfer_unmap(pipe, transfer);
}
+
/* Gladly, rendering to a MANAGED surface is not permitted, so we will
* never have to do the reverse, i.e. download the surface.
*/
diff --git a/src/gallium/state_trackers/nine/surface9.h b/src/gallium/state_trackers/nine/surface9.h
index 73092ab..76156ae 100644
--- a/src/gallium/state_trackers/nine/surface9.h
+++ b/src/gallium/state_trackers/nine/surface9.h
@@ -125,11 +125,15 @@ HRESULT
NineSurface9_UploadSelf( struct NineSurface9 *This,
const struct pipe_box *damaged );
-HRESULT
-NineSurface9_CopySurface( struct NineSurface9 *This,
- struct NineSurface9 *From,
- const POINT *pDestPoint,
- const RECT *pSourceRect );
+void
+NineSurface9_CopyMemToDefault( struct NineSurface9 *This,
+ struct NineSurface9 *From,
+ const POINT *pDestPoint,
+ const RECT *pSourceRect );
+
+void
+NineSurface9_CopyDefaultToMem( struct NineSurface9 *This,
+ struct NineSurface9 *From );
static inline boolean
NineSurface9_IsOffscreenPlain (struct NineSurface9 *This )
diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c
index a62e6ad..3f5be26 100644
--- a/src/gallium/state_trackers/nine/swapchain9.c
+++ b/src/gallium/state_trackers/nine/swapchain9.c
@@ -184,7 +184,9 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This,
/* Note: It is the role of the backend to fill if necessary
* BackBufferWidth and BackBufferHeight */
- ID3DPresent_SetPresentParameters(This->present, pParams, This->mode);
+ hr = ID3DPresent_SetPresentParameters(This->present, pParams, This->mode);
+ if (hr != D3D_OK)
+ return hr;
/* When we have flip behaviour, d3d9 expects we get back the screen buffer when we flip.
* Here we don't get back the initial content of the screen. To emulate the behaviour
@@ -575,9 +577,10 @@ handle_draw_cursor_and_hud( struct NineSwapChain9 *This, struct pipe_resource *r
blit.filter = PIPE_TEX_FILTER_NEAREST;
blit.scissor_enable = FALSE;
- ID3DPresent_GetCursorPos(This->present, &device->cursor.pos);
-
- /* NOTE: blit messes up when box.x + box.width < 0, fix driver */
+ /* NOTE: blit messes up when box.x + box.width < 0, fix driver
+ * NOTE2: device->cursor.pos contains coordinates relative to the screen.
+ * This happens to be also the position of the cursor when we are fullscreen.
+ * We don't use sw cursor for Windowed mode */
blit.dst.box.x = MAX2(device->cursor.pos.x, 0) - device->cursor.hotspot.x;
blit.dst.box.y = MAX2(device->cursor.pos.y, 0) - device->cursor.hotspot.y;
blit.dst.box.width = blit.src.box.width;
@@ -587,13 +590,14 @@ handle_draw_cursor_and_hud( struct NineSwapChain9 *This, struct pipe_resource *r
blit.src.box.width, blit.src.box.height,
blit.dst.box.x, blit.dst.box.y);
+ blit.alpha_blend = TRUE;
This->pipe->blit(This->pipe, &blit);
}
if (device->hud && resource) {
hud_draw(device->hud, resource); /* XXX: no offset */
/* HUD doesn't clobber stipple */
- NineDevice9_RestoreNonCSOState(device, ~0x2);
+ nine_state_restore_non_cso(device);
}
}
@@ -704,6 +708,7 @@ present( struct NineSwapChain9 *This,
blit.mask = PIPE_MASK_RGBA;
blit.filter = PIPE_TEX_FILTER_NEAREST;
blit.scissor_enable = FALSE;
+ blit.alpha_blend = FALSE;
This->pipe->blit(This->pipe, &blit);
}
@@ -835,7 +840,7 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]);
This->base.device->state.changed.group |= NINE_STATE_FB;
- nine_update_state(This->base.device, NINE_STATE_FB);
+ nine_update_state_framebuffer(This->base.device);
return hr;
}
@@ -856,6 +861,8 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This,
DBG("GetFrontBufferData: This=%p pDestSurface=%p\n",
This, pDestSurface);
+ user_assert(dest_surface->base.pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL);
+
width = dest_surface->desc.Width;
height = dest_surface->desc.Height;
@@ -870,7 +877,7 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This,
desc.MultiSampleQuality = 0;
desc.Width = width;
desc.Height = height;
- /* NineSurface9_CopySurface needs same format. */
+ /* NineSurface9_CopyDefaultToMem needs same format. */
desc.Format = dest_surface->desc.Format;
desc.Usage = D3DUSAGE_RENDERTARGET;
hr = NineSurface9_new(pDevice, NineUnknown(This), temp_resource, NULL, 0,
@@ -883,7 +890,7 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This,
ID3DPresent_FrontBufferCopy(This->present, temp_handle);
- NineSurface9_CopySurface(dest_surface, temp_surface, NULL, NULL);
+ NineSurface9_CopyDefaultToMem(dest_surface, temp_surface);
ID3DPresent_DestroyD3DWindowBuffer(This->present, temp_handle);
NineUnknown_Destroy(NineUnknown(temp_surface));
diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c
index 5900e76..bc325c1 100644
--- a/src/gallium/state_trackers/nine/texture9.c
+++ b/src/gallium/state_trackers/nine/texture9.c
@@ -101,6 +101,13 @@ NineTexture9_ctor( struct NineTexture9 *This,
if (Format != D3DFMT_NULL && pf == PIPE_FORMAT_NONE)
return D3DERR_INVALIDCALL;
+ if (compressed_format(Format)) {
+ const unsigned w = util_format_get_blockwidth(pf);
+ const unsigned h = util_format_get_blockheight(pf);
+
+ user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL);
+ }
+
info->screen = screen;
info->target = PIPE_TEXTURE_2D;
info->format = pf;
@@ -152,10 +159,10 @@ NineTexture9_ctor( struct NineTexture9 *This,
* apps access sublevels of texture even if they locked only first
* level) */
level_offsets = alloca(sizeof(unsigned) * (info->last_level + 1));
- user_buffer = MALLOC(
+ user_buffer = align_malloc(
nine_format_get_size_and_offsets(pf, level_offsets,
Width, Height,
- info->last_level));
+ info->last_level), 32);
This->managed_buffer = user_buffer;
if (!This->managed_buffer)
return E_OUTOFMEMORY;
@@ -202,6 +209,9 @@ NineTexture9_ctor( struct NineTexture9 *This,
return hr;
}
+ /* Textures start initially dirty */
+ This->dirty_rect.width = Width;
+ This->dirty_rect.height = Height;
This->dirty_rect.depth = 1; /* widht == 0 means empty, depth stays 1 */
if (pSharedHandle && !*pSharedHandle) {/* Pool == D3DPOOL_SYSTEMMEM */
@@ -219,7 +229,8 @@ NineTexture9_dtor( struct NineTexture9 *This )
if (This->surfaces) {
/* The surfaces should have 0 references and be unbound now. */
for (l = 0; l <= This->base.base.info.last_level; ++l)
- NineUnknown_Destroy(&This->surfaces[l]->base.base);
+ if (This->surfaces[l])
+ NineUnknown_Destroy(&This->surfaces[l]->base.base);
FREE(This->surfaces);
}
@@ -295,18 +306,22 @@ NineTexture9_AddDirtyRect( struct NineTexture9 *This,
pDirtyRect ? pDirtyRect->left : 0, pDirtyRect ? pDirtyRect->top : 0,
pDirtyRect ? pDirtyRect->right : 0, pDirtyRect ? pDirtyRect->bottom : 0);
- /* Tracking dirty regions on DEFAULT or SYSTEMMEM resources is pointless,
+ /* Tracking dirty regions on DEFAULT resources is pointless,
* because we always write to the final storage. Just marked it dirty in
* case we need to generate mip maps.
*/
- if (This->base.base.pool != D3DPOOL_MANAGED) {
- if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP)
+ if (This->base.base.pool == D3DPOOL_DEFAULT) {
+ if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) {
This->base.dirty_mip = TRUE;
+ BASETEX_REGISTER_UPDATE(&This->base);
+ }
return D3D_OK;
}
- This->base.managed.dirty = TRUE;
- BASETEX_REGISTER_UPDATE(&This->base);
+ if (This->base.base.pool == D3DPOOL_MANAGED) {
+ This->base.managed.dirty = TRUE;
+ BASETEX_REGISTER_UPDATE(&This->base);
+ }
if (!pDirtyRect) {
u_box_origin_2d(This->base.base.info.width0,
diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c
index bbd5ce9..fdfb79a 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.c
+++ b/src/gallium/state_trackers/nine/vertexshader9.c
@@ -48,9 +48,10 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
return hr;
if (cso) {
- This->variant.cso = cso;
+ This->ff_cso = cso;
return D3D_OK;
}
+
device = This->base.device;
info.type = PIPE_SHADER_VERTEX;
@@ -59,6 +60,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
info.const_b_base = NINE_CONST_B_BASE(device->max_vs_const_f) / 16;
info.sampler_mask_shadow = 0x0;
info.sampler_ps1xtypes = 0x0;
+ info.fog_enable = 0;
hr = nine_translate_shader(device, &info);
if (FAILED(hr))
@@ -71,6 +73,9 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
This->byte_code.size = info.byte_size;
This->variant.cso = info.cso;
+ This->last_cso = info.cso;
+ This->last_key = 0;
+
This->const_used_size = info.const_used_size;
This->lconstf = info.lconstf;
This->sampler_mask = info.sampler_mask;
@@ -87,11 +92,12 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
void
NineVertexShader9_dtor( struct NineVertexShader9 *This )
{
- DBG("This=%p cso=%p\n", This, This->variant.cso);
+ DBG("This=%p\n", This);
if (This->base.device) {
struct pipe_context *pipe = This->base.device->pipe;
struct nine_shader_variant *var = &This->variant;
+
do {
if (var->cso) {
if (This->base.device->state.cso.vs == var->cso)
@@ -100,6 +106,12 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This )
}
var = var->next;
} while (var);
+
+ if (This->ff_cso) {
+ if (This->ff_cso == This->base.device->state.cso.vs)
+ pipe->bind_vs_state(pipe, NULL);
+ pipe->delete_vs_state(pipe, This->ff_cso);
+ }
}
nine_shader_variants_free(&This->variant);
@@ -130,10 +142,16 @@ NineVertexShader9_GetFunction( struct NineVertexShader9 *This,
}
void *
-NineVertexShader9_GetVariant( struct NineVertexShader9 *This,
- uint32_t key )
+NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
{
- void *cso = nine_shader_variant_get(&This->variant, key);
+ void *cso;
+ uint32_t key;
+
+ key = This->next_key;
+ if (key == This->last_key)
+ return This->last_cso;
+
+ cso = nine_shader_variant_get(&This->variant, key);
if (!cso) {
struct NineDevice9 *device = This->base.device;
struct nine_shader_info info;
@@ -144,6 +162,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This,
info.const_b_base = NINE_CONST_B_BASE(device->max_vs_const_f) / 16;
info.byte_code = This->byte_code.tokens;
info.sampler_mask_shadow = key & 0xf;
+ info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
hr = nine_translate_shader(This->base.device, &info);
if (FAILED(hr))
@@ -151,6 +170,10 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This,
nine_shader_variant_add(&This->variant, key, info.cso);
cso = info.cso;
}
+
+ This->last_key = key;
+ This->last_cso = cso;
+
return cso;
}
diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h
index 66c602c..15c3f4f 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.h
+++ b/src/gallium/state_trackers/nine/vertexshader9.h
@@ -25,6 +25,7 @@
#include "iunknown.h"
#include "nine_shader.h"
+#include "nine_state.h"
struct NineVertexShader9
{
@@ -43,7 +44,6 @@ struct NineVertexShader9
} byte_code;
uint8_t sampler_mask;
- uint8_t sampler_mask_shadow;
boolean position_t; /* if true, disable vport transform */
boolean point_size; /* if true, set rasterizer.point_size_per_vertex to 1 */
@@ -54,7 +54,13 @@ struct NineVertexShader9
const struct pipe_stream_output_info *so;
- uint64_t ff_key[2];
+ uint64_t ff_key[3];
+ void *ff_cso;
+
+ uint32_t last_key;
+ void *last_cso;
+
+ uint32_t next_key;
};
static inline struct NineVertexShader9 *
NineVertexShader9( void *data )
@@ -62,9 +68,29 @@ NineVertexShader9( void *data )
return (struct NineVertexShader9 *)data;
}
+static inline BOOL
+NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
+ struct nine_state *state )
+{
+ uint8_t samplers_shadow;
+ uint32_t key;
+ BOOL res;
+
+ samplers_shadow = (uint8_t)((state->samplers_shadow & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0));
+ samplers_shadow &= vs->sampler_mask;
+ key = samplers_shadow;
+
+ if (vs->byte_code.version < 0x30)
+ key |= state->rs[D3DRS_FOGENABLE] << 8;
+
+ res = vs->last_key != key;
+ if (res)
+ vs->next_key = key;
+ return res;
+}
+
void *
-NineVertexShader9_GetVariant( struct NineVertexShader9 *vs,
- uint32_t key );
+NineVertexShader9_GetVariant( struct NineVertexShader9 *vs );
/*** public ***/
diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c
index 4dfc559..0b90056 100644
--- a/src/gallium/state_trackers/nine/volume9.c
+++ b/src/gallium/state_trackers/nine/volume9.c
@@ -23,6 +23,7 @@
#include "device9.h"
#include "volume9.h"
#include "basetexture9.h" /* for marking dirty */
+#include "volumetexture9.h"
#include "nine_helpers.h"
#include "nine_pipe.h"
#include "nine_dump.h"
@@ -43,7 +44,7 @@ NineVolume9_AllocateData( struct NineVolume9 *This )
DBG("(%p(This=%p),level=%u) Allocating 0x%x bytes of system memory.\n",
This->base.container, This, This->level, size);
- This->data = (uint8_t *)MALLOC(size);
+ This->data = (uint8_t *)align_malloc(size, 32);
if (!This->data)
return E_OUTOFMEMORY;
return D3D_OK;
@@ -182,47 +183,23 @@ NineVolume9_GetDesc( struct NineVolume9 *This,
return D3D_OK;
}
-static inline boolean
-NineVolume9_IsDirty(struct NineVolume9 *This)
-{
- return This->dirty_box[0].width != 0;
-}
-
inline void
NineVolume9_AddDirtyRegion( struct NineVolume9 *This,
const struct pipe_box *box )
{
- struct pipe_box cover_a, cover_b;
- float vol[2];
+ D3DBOX dirty_region;
+ struct NineVolumeTexture9 *tex = NineVolumeTexture9(This->base.container);
if (!box) {
- u_box_3d(0, 0, 0, This->desc.Width, This->desc.Height,
- This->desc.Depth, &This->dirty_box[0]);
- memset(&This->dirty_box[1], 0, sizeof(This->dirty_box[1]));
- return;
- }
- if (!This->dirty_box[0].width) {
- This->dirty_box[0] = *box;
- return;
- }
-
- u_box_union_3d(&cover_a, &This->dirty_box[0], box);
- vol[0] = u_box_volume_3d(&cover_a);
-
- if (This->dirty_box[1].width == 0) {
- vol[1] = u_box_volume_3d(&This->dirty_box[0]);
- if (vol[0] > (vol[1] * 1.5f))
- This->dirty_box[1] = *box;
- else
- This->dirty_box[0] = cover_a;
+ NineVolumeTexture9_AddDirtyBox(tex, NULL);
} else {
- u_box_union_3d(&cover_b, &This->dirty_box[1], box);
- vol[1] = u_box_volume_3d(&cover_b);
-
- if (vol[0] > vol[1])
- This->dirty_box[1] = cover_b;
- else
- This->dirty_box[0] = cover_a;
+ dirty_region.Left = box->x << This->level_actual;
+ dirty_region.Top = box->y << This->level_actual;
+ dirty_region.Front = box->z << This->level_actual;
+ dirty_region.Right = dirty_region.Left + (box->width << This->level_actual);
+ dirty_region.Bottom = dirty_region.Top + (box->height << This->level_actual);
+ dirty_region.Back = dirty_region.Front + (box->depth << This->level_actual);
+ NineVolumeTexture9_AddDirtyBox(tex, &dirty_region);
}
}
@@ -254,21 +231,26 @@ NineVolume9_LockBox( struct NineVolume9 *This,
pBox ? pBox->Front : 0, pBox ? pBox->Back : 0,
nine_D3DLOCK_to_str(Flags));
+ /* check if it's already locked */
+ user_assert(This->lock_count == 0, D3DERR_INVALIDCALL);
+
+ /* set pBits to NULL after lock_count check */
+ user_assert(pLockedVolume, E_POINTER);
+ pLockedVolume->pBits = NULL;
+
user_assert(This->desc.Pool != D3DPOOL_DEFAULT ||
(This->desc.Usage & D3DUSAGE_DYNAMIC), D3DERR_INVALIDCALL);
user_assert(!((Flags & D3DLOCK_DISCARD) && (Flags & D3DLOCK_READONLY)),
D3DERR_INVALIDCALL);
- user_assert(This->lock_count == 0, D3DERR_INVALIDCALL);
- user_assert(pLockedVolume, E_POINTER);
-
- if (pBox && This->desc.Pool == D3DPOOL_DEFAULT &&
- util_format_is_compressed(This->info.format)) {
+ if (pBox && compressed_format (This->desc.Format)) { /* For volume all pools are checked */
const unsigned w = util_format_get_blockwidth(This->info.format);
const unsigned h = util_format_get_blockheight(This->info.format);
- user_assert(!(pBox->Left % w) && !(pBox->Right % w) &&
- !(pBox->Top % h) && !(pBox->Bottom % h),
+ user_assert((pBox->Left == 0 && pBox->Right == This->desc.Width &&
+ pBox->Top == 0 && pBox->Bottom == This->desc.Height) ||
+ (!(pBox->Left % w) && !(pBox->Right % w) &&
+ !(pBox->Top % h) && !(pBox->Bottom % h)),
D3DERR_INVALIDCALL);
}
@@ -312,8 +294,7 @@ NineVolume9_LockBox( struct NineVolume9 *This,
if (!(Flags & (D3DLOCK_NO_DIRTY_UPDATE | D3DLOCK_READONLY))) {
NineVolume9_MarkContainerDirty(This);
- if (This->desc.Pool == D3DPOOL_MANAGED)
- NineVolume9_AddDirtyRegion(This, &box);
+ NineVolume9_AddDirtyRegion(This, &box);
}
++This->lock_count;
@@ -333,42 +314,31 @@ NineVolume9_UnlockBox( struct NineVolume9 *This )
return D3D_OK;
}
-
+/* When this function is called, we have already checked
+ * The copy regions fit the volumes */
HRESULT
-NineVolume9_CopyVolume( struct NineVolume9 *This,
- struct NineVolume9 *From,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_box *pSrcBox )
+NineVolume9_CopyMemToDefault( struct NineVolume9 *This,
+ struct NineVolume9 *From,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_box *pSrcBox )
{
struct pipe_context *pipe = This->pipe;
struct pipe_resource *r_dst = This->resource;
- struct pipe_resource *r_src = From->resource;
- struct pipe_transfer *transfer;
struct pipe_box src_box;
struct pipe_box dst_box;
- uint8_t *p_dst;
const uint8_t *p_src;
DBG("This=%p From=%p dstx=%u dsty=%u dstz=%u pSrcBox=%p\n",
This, From, dstx, dsty, dstz, pSrcBox);
- assert(This->desc.Pool != D3DPOOL_MANAGED &&
- From->desc.Pool != D3DPOOL_MANAGED);
- user_assert(This->desc.Format == From->desc.Format, D3DERR_INVALIDCALL);
+ assert(This->desc.Pool == D3DPOOL_DEFAULT &&
+ From->desc.Pool == D3DPOOL_SYSTEMMEM);
dst_box.x = dstx;
dst_box.y = dsty;
dst_box.z = dstz;
if (pSrcBox) {
- /* make sure it doesn't range outside the source volume */
- user_assert(pSrcBox->x >= 0 &&
- (pSrcBox->width - pSrcBox->x) <= From->desc.Width &&
- pSrcBox->y >= 0 &&
- (pSrcBox->height - pSrcBox->y) <= From->desc.Height &&
- pSrcBox->z >= 0 &&
- (pSrcBox->depth - pSrcBox->z) <= From->desc.Depth,
- D3DERR_INVALIDCALL);
src_box = *pSrcBox;
} else {
src_box.x = 0;
@@ -378,101 +348,54 @@ NineVolume9_CopyVolume( struct NineVolume9 *This,
src_box.height = From->desc.Height;
src_box.depth = From->desc.Depth;
}
- /* limits */
- dst_box.width = This->desc.Width - dst_box.x;
- dst_box.height = This->desc.Height - dst_box.y;
- dst_box.depth = This->desc.Depth - dst_box.z;
-
- user_assert(src_box.width <= dst_box.width &&
- src_box.height <= dst_box.height &&
- src_box.depth <= dst_box.depth, D3DERR_INVALIDCALL);
dst_box.width = src_box.width;
dst_box.height = src_box.height;
dst_box.depth = src_box.depth;
- if (r_dst && r_src) {
- pipe->resource_copy_region(pipe,
- r_dst, This->level,
- dst_box.x, dst_box.y, dst_box.z,
- r_src, From->level,
- &src_box);
- } else
- if (r_dst) {
- p_src = NineVolume9_GetSystemMemPointer(From,
- src_box.x, src_box.y, src_box.z);
-
- pipe->transfer_inline_write(pipe, r_dst, This->level,
- 0, /* WRITE|DISCARD are implicit */
- &dst_box, p_src,
- From->stride, From->layer_stride);
- } else
- if (r_src) {
- p_dst = NineVolume9_GetSystemMemPointer(This, 0, 0, 0);
- p_src = pipe->transfer_map(pipe, r_src, From->level,
- PIPE_TRANSFER_READ,
- &src_box, &transfer);
- if (!p_src)
- return D3DERR_DRIVERINTERNALERROR;
-
- util_copy_box(p_dst, This->info.format,
- This->stride, This->layer_stride,
- dst_box.x, dst_box.y, dst_box.z,
- dst_box.width, dst_box.height, dst_box.depth,
- p_src,
- transfer->stride, transfer->layer_stride,
- src_box.x, src_box.y, src_box.z);
+ p_src = NineVolume9_GetSystemMemPointer(From,
+ src_box.x, src_box.y, src_box.z);
- pipe->transfer_unmap(pipe, transfer);
- } else {
- p_dst = NineVolume9_GetSystemMemPointer(This, 0, 0, 0);
- p_src = NineVolume9_GetSystemMemPointer(From, 0, 0, 0);
-
- util_copy_box(p_dst, This->info.format,
- This->stride, This->layer_stride,
- dst_box.x, dst_box.y, dst_box.z,
- dst_box.width, dst_box.height, dst_box.depth,
- p_src,
- From->stride, From->layer_stride,
- src_box.x, src_box.y, src_box.z);
- }
+ pipe->transfer_inline_write(pipe, r_dst, This->level,
+ 0, /* WRITE|DISCARD are implicit */
+ &dst_box, p_src,
+ From->stride, From->layer_stride);
- if (This->desc.Pool == D3DPOOL_DEFAULT)
- NineVolume9_MarkContainerDirty(This);
- if (!r_dst && This->resource)
- NineVolume9_AddDirtyRegion(This, &dst_box);
+ NineVolume9_MarkContainerDirty(This);
return D3D_OK;
}
HRESULT
-NineVolume9_UploadSelf( struct NineVolume9 *This )
+NineVolume9_UploadSelf( struct NineVolume9 *This,
+ const struct pipe_box *damaged )
{
struct pipe_context *pipe = This->pipe;
struct pipe_resource *res = This->resource;
+ struct pipe_box box;
uint8_t *ptr;
- unsigned i;
- DBG("This=%p dirty=%i data=%p res=%p\n", This, NineVolume9_IsDirty(This),
+ DBG("This=%p damaged=%p data=%p res=%p\n", This, damaged,
This->data, res);
assert(This->desc.Pool == D3DPOOL_MANAGED);
-
- if (!NineVolume9_IsDirty(This))
- return D3D_OK;
assert(res);
- for (i = 0; i < Elements(This->dirty_box); ++i) {
- const struct pipe_box *box = &This->dirty_box[i];
- if (box->width == 0)
- break;
- ptr = NineVolume9_GetSystemMemPointer(This, box->x, box->y, box->z);
-
- pipe->transfer_inline_write(pipe, res, This->level,
- 0,
- box, ptr, This->stride, This->layer_stride);
+ if (damaged) {
+ box = *damaged;
+ } else {
+ box.x = 0;
+ box.y = 0;
+ box.z = 0;
+ box.width = This->desc.Width;
+ box.height = This->desc.Height;
+ box.depth = This->desc.Depth;
}
- NineVolume9_ClearDirtyRegion(This);
+
+ ptr = NineVolume9_GetSystemMemPointer(This, box.x, box.y, box.z);
+
+ pipe->transfer_inline_write(pipe, res, This->level, 0, &box,
+ ptr, This->stride, This->layer_stride);
return D3D_OK;
}
diff --git a/src/gallium/state_trackers/nine/volume9.h b/src/gallium/state_trackers/nine/volume9.h
index fae2431..26ca8a3 100644
--- a/src/gallium/state_trackers/nine/volume9.h
+++ b/src/gallium/state_trackers/nine/volume9.h
@@ -50,8 +50,6 @@ struct NineVolume9
struct pipe_transfer *transfer;
unsigned lock_count;
- struct pipe_box dirty_box[2];
-
struct pipe_context *pipe;
/* for [GS]etPrivateData/FreePrivateData */
@@ -85,20 +83,15 @@ void
NineVolume9_AddDirtyRegion( struct NineVolume9 *This,
const struct pipe_box *box );
-static inline void
-NineVolume9_ClearDirtyRegion( struct NineVolume9 *This )
-{
- memset(&This->dirty_box, 0, sizeof(This->dirty_box));
-}
-
HRESULT
-NineVolume9_CopyVolume( struct NineVolume9 *This,
- struct NineVolume9 *From,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_box *pSrcBox );
+NineVolume9_CopyMemToDefault( struct NineVolume9 *This,
+ struct NineVolume9 *From,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_box *pSrcBox );
HRESULT
-NineVolume9_UploadSelf( struct NineVolume9 *This );
+NineVolume9_UploadSelf( struct NineVolume9 *This,
+ const struct pipe_box *damaged );
/*** Direct3D public ***/
diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c
index 1193e12..e5b2b53 100644
--- a/src/gallium/state_trackers/nine/volumetexture9.c
+++ b/src/gallium/state_trackers/nine/volumetexture9.c
@@ -64,6 +64,13 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This,
if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2)
return D3DERR_INVALIDCALL;
+ if (compressed_format(Format)) {
+ const unsigned w = util_format_get_blockwidth(pf);
+ const unsigned h = util_format_get_blockheight(pf);
+ /* Compressed formats are not compressed on depth component */
+ user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL);
+ }
+
info->screen = pParams->device->screen;
info->target = PIPE_TEXTURE_3D;
info->format = pf;
@@ -116,6 +123,9 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This,
return hr;
}
+ /* Textures start initially dirty */
+ NineVolumeTexture9_AddDirtyBox(This, NULL);
+
return D3D_OK;
}
@@ -193,12 +203,14 @@ NineVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This,
{
DBG("This=%p pDirtybox=%p\n", This, pDirtyBox);
- if (This->base.base.pool != D3DPOOL_MANAGED) {
+ if (This->base.base.pool == D3DPOOL_DEFAULT) {
return D3D_OK;
}
- This->base.managed.dirty = TRUE;
- BASETEX_REGISTER_UPDATE(&This->base);
+ if (This->base.base.pool == D3DPOOL_MANAGED) {
+ This->base.managed.dirty = TRUE;
+ BASETEX_REGISTER_UPDATE(&This->base);
+ }
if (!pDirtyBox) {
This->dirty_box.x = 0;
diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am
index fe5b0b1..e26ca33 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -54,6 +54,7 @@ pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = d3d.pc
d3dadapter9_la_SOURCES = \
+ description.c \
getproc.c \
drm.c
diff --git a/src/gallium/targets/d3dadapter9/description.c b/src/gallium/targets/d3dadapter9/description.c
new file mode 100644
index 0000000..c0a8678
--- /dev/null
+++ b/src/gallium/targets/d3dadapter9/description.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright 2015 Patrick Rudolph <siro@das-labor.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include <string.h>
+#include "adapter9.h"
+
+#define DBG_CHANNEL DBG_ADAPTER
+
+/* prototypes */
+void
+d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid,
+ unsigned fallback_ven,
+ unsigned fallback_dev,
+ const char* fallback_name );
+void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid);
+void d3d_fill_cardname(D3DADAPTER_IDENTIFIER9* drvid);
+
+enum d3d_vendor_id
+{
+ HW_VENDOR_SOFTWARE = 0x0000,
+ HW_VENDOR_AMD = 0x1002,
+ HW_VENDOR_NVIDIA = 0x10de,
+ HW_VENDOR_VMWARE = 0x15ad,
+ HW_VENDOR_INTEL = 0x8086,
+};
+
+struct card_lookup_table {
+ const char *mesaname;
+ const char *d3d9name;
+}
+cards_amd[] = {
+ {"HAWAII", "AMD Radeon R9 290"},
+ {"KAVERI", "AMD Radeon(TM) R7 Graphics"},
+ {"KABINI", "AMD Radeon HD 8400 / R3 Series"},
+ {"BONAIRE", "AMD Radeon HD 8770"},
+ {"OLAND", "AMD Radeon HD 8670"},
+ {"HAINAN", "AMD Radeon HD 8600M Series"},
+ {"TAHITI", "AMD Radeon HD 7900 Series"},
+ {"PITCAIRN", "AMD Radeon HD 7800 Series"},
+ {"CAPE VERDE", "AMD Radeon HD 7700 Series"},
+ {"ARUBA", "AMD Radeon HD 7660D"},
+ {"CAYMAN", "AMD Radeon HD 6900 Series"},
+ {"BARTS", "AMD Radeon HD 6800 Series"},
+ {"TURKS", "AMD Radeon HD 6600 Series"},
+ {"SUMO2", "AMD Radeon HD 6410D"},
+ {"SUMO", "AMD Radeon HD 6550D"},
+ {"CAICOS", "AMD Radeon HD 6400 Series"},
+ {"PALM", "AMD Radeon HD 6300 series Graphics"},
+ {"HEMLOCK", "ATI Radeon HD 5900 Series"},
+ {"CYPRESS", "ATI Radeon HD 5800 Series"},
+ {"JUNIPER", "ATI Radeon HD 5700 Series"},
+ {"REDWOOD", "ATI Radeon HD 5600 Series"},
+ {"CEDAR", "ATI Radeon HD 5500 Series"},
+ {"R700", "ATI Radeon HD 4800 Series"},
+ {"RV790", "ATI Radeon HD 4800 Series"},
+ {"RV770", "ATI Radeon HD 4800 Series"},
+ {"RV740", "ATI Radeon HD 4700 Series"},
+ {"RV730", "ATI Radeon HD 4600 Series"},
+ {"RV710", "ATI Radeon HD 4350"},
+ {"RS880", "ATI Mobility Radeon HD 4200"},
+ {"RS780", "ATI Radeon HD 3200 Graphics"},
+ {"R680", "ATI Radeon HD 2900 XT"},
+ {"R600", "ATI Radeon HD 2900 XT"},
+ {"RV670", "ATI Radeon HD 2900 XT"},
+ {"RV635", "ATI Mobility Radeon HD 2600"},
+ {"RV630", "ATI Mobility Radeon HD 2600"},
+ {"RV620", "ATI Mobility Radeon HD 2350"},
+ {"RV610", "ATI Mobility Radeon HD 2350"},
+ {"R580", "ATI Radeon X1600 Series"},
+ {"R520", "ATI Radeon X1600 Series"},
+ {"RV570", "ATI Radeon X1600 Series"},
+ {"RV560", "ATI Radeon X1600 Series"},
+ {"RV535", "ATI Radeon X1600 Series"},
+ {"RV530", "ATI Radeon X1600 Series"},
+ {"RV516", "ATI Radeon X700 SE"},
+ {"RV515", "ATI Radeon X700 SE"},
+ {"R481", "ATI Radeon X700 SE"},
+ {"R480", "ATI Radeon X700 SE"},
+ {"R430", "ATI Radeon X700 SE"},
+ {"R423", "ATI Radeon X700 SE"},
+ {"R420", "ATI Radeon X700 SE"},
+ {"R410", "ATI Radeon X700 SE"},
+ {"RV410", "ATI Radeon X700 SE"},
+ {"RS740", "ATI RADEON XPRESS 200M Series"},
+ {"RS690", "ATI RADEON XPRESS 200M Series"},
+ {"RS600", "ATI RADEON XPRESS 200M Series"},
+ {"RS485", "ATI RADEON XPRESS 200M Series"},
+ {"RS482", "ATI RADEON XPRESS 200M Series"},
+ {"RS480", "ATI RADEON XPRESS 200M Series"},
+ {"RS400", "ATI RADEON XPRESS 200M Series"},
+ {"R360", "ATI Radeon 9500"},
+ {"R350", "ATI Radeon 9500"},
+ {"R300", "ATI Radeon 9500"},
+ {"RV370", "ATI Radeon 9500"},
+ {"RV360", "ATI Radeon 9500"},
+ {"RV351", "ATI Radeon 9500"},
+ {"RV350", "ATI Radeon 9500"},
+},
+cards_nvidia[] =
+{
+ {"NV124", "NVIDIA GeForce GTX 970"},
+ {"NV117", "NVIDIA GeForce GTX 750"},
+ {"NVF1", "NVIDIA GeForce GTX 780 Ti"},
+ {"NVF0", "NVIDIA GeForce GTX 780"},
+ {"NVE6", "NVIDIA GeForce GTX 770M"},
+ {"NVE4", "NVIDIA GeForce GTX 680"},
+ {"NVD9", "NVIDIA GeForce GT 520"},
+ {"NVCF", "NVIDIA GeForce GTX 550 Ti"},
+ {"NVCE", "NVIDIA GeForce GTX 560"},
+ {"NVC8", "NVIDIA GeForce GTX 570"},
+ {"NVC4", "NVIDIA GeForce GTX 460"},
+ {"NVC3", "NVIDIA GeForce GT 440"},
+ {"NVC1", "NVIDIA GeForce GT 420"},
+ {"NVC0", "NVIDIA GeForce GTX 480"},
+ {"NVAF", "NVIDIA GeForce GT 320M"},
+ {"NVAC", "NVIDIA GeForce 8200"},
+ {"NVAA", "NVIDIA GeForce 8200"},
+ {"NVA8", "NVIDIA GeForce 210"},
+ {"NVA5", "NVIDIA GeForce GT 220"},
+ {"NVA3", "NVIDIA GeForce GT 240"},
+ {"NVA0", "NVIDIA GeForce GTX 280"},
+ {"NV98", "NVIDIA GeForce 9200"},
+ {"NV96", "NVIDIA GeForce 9400 GT"},
+ {"NV94", "NVIDIA GeForce 9600 GT"},
+ {"NV92", "NVIDIA GeForce 9800 GT"},
+ {"NV86", "NVIDIA GeForce 8500 GT"},
+ {"NV84", "NVIDIA GeForce 8600 GT"},
+ {"NV50", "NVIDIA GeForce 8800 GTX"},
+ {"NV68", "NVIDIA GeForce 6200"},
+ {"NV67", "NVIDIA GeForce 6200"},
+ {"NV63", "NVIDIA GeForce 6200"},
+ {"NV4E", "NVIDIA GeForce 6200"},
+ {"NV4C", "NVIDIA GeForce 6200"},
+ {"NV4B", "NVIDIA GeForce 7600 GT"},
+ {"NV4A", "NVIDIA GeForce 6200"},
+ {"NV49", "NVIDIA GeForce 7800 GT"},
+ {"NV47", "NVIDIA GeForce 7800 GT"},
+ {"NV46", "NVIDIA GeForce Go 7400",},
+ {"NV45", "NVIDIA GeForce 6800"},
+ {"NV44", "NVIDIA GeForce 6200"},
+ {"NV43", "NVIDIA GeForce 6600 GT"},
+ {"NV42", "NVIDIA GeForce 6800"},
+ {"NV41", "NVIDIA GeForce 6800"},
+ {"NV40", "NVIDIA GeForce 6800"},
+ {"NV38", "NVIDIA GeForce FX 5800"},
+ {"NV36", "NVIDIA GeForce FX 5800"},
+ {"NV35", "NVIDIA GeForce FX 5800"},
+ {"NV34", "NVIDIA GeForce FX 5200"},
+ {"NV31", "NVIDIA GeForce FX 5600"},
+ {"NV30", "NVIDIA GeForce FX 5800"},
+ {"nv28", "NVIDIA GeForce4 Ti 4200"},
+ {"nv25", "NVIDIA GeForce4 Ti 4200"},
+ {"nv20", "NVIDIA GeForce3"},
+ {"nv1F", "NVIDIA GeForce4 MX 460"},
+ {"nv1A", "NVIDIA GeForce2 GTS/GeForce2 Pro"},
+ {"nv18", "NVIDIA GeForce4 MX 460"},
+ {"nv17", "NVIDIA GeForce4 MX 460"},
+ {"nv16", "NVIDIA GeForce2 GTS/GeForce2 Pro"},
+ {"nv15", "NVIDIA GeForce2 GTS/GeForce2 Pro"},
+ {"nv11", "NVIDIA GeForce2 MX/MX 400"},
+ {"nv10", "NVIDIA GeForce 256"},
+},
+cards_vmware[] =
+{
+ {"SVGA3D", "VMware SVGA 3D (Microsoft Corporation - WDDM)"},
+},
+cards_intel[] =
+{
+ {"Haswell Mobile", "Intel(R) Haswell Mobile"},
+ {"Ivybridge Server", "Intel(R) Ivybridge Server"},
+ {"Ivybridge Mobile", "Intel(R) Ivybridge Mobile"},
+ {"Ivybridge Desktop", "Intel(R) Ivybridge Desktop"},
+ {"Sandybridge Server", "Intel(R) Sandybridge Server"},
+ {"Sandybridge Mobile", "Intel(R) Sandybridge Mobile"},
+ {"Sandybridge Desktop", "Intel(R) Sandybridge Desktop"},
+ {"Ironlake Mobile", "Intel(R) Ironlake Mobile"},
+ {"Ironlake Desktop", "Intel(R) Ironlake Desktop"},
+ {"B43", "Intel(R) B43"},
+ {"G41", "Intel(R) G41"},
+ {"G45", "Intel(R) G45/G43"},
+ {"Q45", "Intel(R) Q45/Q43"},
+ {"Integrated Graphics Device", "Intel(R) Integrated Graphics Device"},
+ {"GM45", "Mobile Intel(R) GM45 Express Chipset Family"},
+ {"965GME", "Intel(R) 965GME"},
+ {"965GM", "Mobile Intel(R) 965 Express Chipset Family"},
+ {"946GZ", "Intel(R) 946GZ"},
+ {"965G", "Intel(R) 965G"},
+ {"965Q", "Intel(R) 965Q"},
+ {"Pineview M", "Intel(R) IGD"},
+ {"Pineview G", "Intel(R) IGD"},
+ {"IGD", "Intel(R) IGD"},
+ {"Q33", "Intel(R) Q33"},
+ {"G33", "Intel(R) G33"},
+ {"Q35", "Intel(R) Q35"},
+ {"945GME", "Intel(R) 945GME"},
+ {"945GM", "Mobile Intel(R) 945GM Express Chipset Family"},
+ {"945G", "Intel(R) 945G"},
+ {"915GM", "Mobile Intel(R) 915GM/GMS,910GML Express Chipset Family"},
+ {"E7221G", "Intel(R) E7221G"},
+ {"915G", "Intel(R) 82915G/GV/910GL Express Chipset Family"},
+ {"865G", "Intel(R) 82865G Graphics Controller"},
+ {"845G", "Intel(R) 845G"},
+ {"855GM", "Intel(R) 82852/82855 GM/GME Graphics Controller"},
+ {"830M", "Intel(R) 82830M Graphics Controller"},
+};
+
+/* override VendorId, DeviceId and Description for unknown vendors */
+void
+d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid,
+ unsigned fallback_ven,
+ unsigned fallback_dev,
+ const char* fallback_name )
+{
+ if (drvid->VendorId == HW_VENDOR_INTEL ||
+ drvid->VendorId == HW_VENDOR_VMWARE ||
+ drvid->VendorId == HW_VENDOR_AMD ||
+ drvid->VendorId == HW_VENDOR_NVIDIA)
+ return;
+
+ DBG("unknown vendor 0x4%x, emulating 0x4%x\n", drvid->VendorId, fallback_ven);
+ drvid->VendorId = fallback_ven;
+ drvid->DeviceId = fallback_dev;
+ strncpy(drvid->Description, fallback_name, sizeof(drvid->Description));
+}
+
+/* fill in driver name and version */
+void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid) {
+ switch (drvid->VendorId) {
+ case HW_VENDOR_INTEL:
+ drvid->DriverVersionLowPart = 0x000A0682;
+ drvid->DriverVersionHighPart = 0x0006000F;
+ strncpy(drvid->Driver, "igdumd32.dll", sizeof(drvid->Driver));
+ break;
+ case HW_VENDOR_VMWARE:
+ drvid->DriverVersionLowPart = 0x0001046E;
+ drvid->DriverVersionHighPart = 0x0006000E;
+ strncpy(drvid->Driver, "vm3dum.dll", sizeof(drvid->Driver));
+ break;
+ case HW_VENDOR_AMD:
+ drvid->DriverVersionLowPart = 0x000A0500;
+ drvid->DriverVersionHighPart = 0x00060011;
+ strncpy(drvid->Driver, "atiumdag.dll", sizeof(drvid->Driver));
+ break;
+ case HW_VENDOR_NVIDIA:
+ drvid->DriverVersionLowPart = 0x000D0FD4;
+ drvid->DriverVersionHighPart = 0x00060012;
+ strncpy(drvid->Driver, "nvd3dum.dll", sizeof(drvid->Driver));
+ break;
+ default:
+ break;
+ }
+}
+
+/* try to match the device name and override it with Windows like device names */
+void d3d_fill_cardname(D3DADAPTER_IDENTIFIER9* drvid) {
+ unsigned i;
+ switch (drvid->VendorId) {
+ case HW_VENDOR_INTEL:
+ for (i = 0; i < sizeof(cards_intel) / sizeof(cards_intel[0]); i++) {
+ if (strstr(drvid->Description, cards_intel[i].mesaname)) {
+ strncpy(drvid->Description, cards_intel[i].d3d9name, sizeof(drvid->Description));
+ return;
+ }
+ }
+ /* use a fall-back if nothing matches */
+ DBG("Unknown card name %s!\n", drvid->DeviceName);
+ strncpy(drvid->Description, cards_intel[0].d3d9name, sizeof(drvid->Description));
+ break;
+ case HW_VENDOR_VMWARE:
+ for (i = 0; i < sizeof(cards_vmware) / sizeof(cards_vmware[0]); i++) {
+ if (strstr(drvid->Description, cards_vmware[i].mesaname)) {
+ strncpy(drvid->Description, cards_vmware[i].d3d9name, sizeof(drvid->Description));
+ return;
+ }
+ }
+ /* use a fall-back if nothing matches */
+ DBG("Unknown card name %s!\n", drvid->DeviceName);
+ strncpy(drvid->Description, cards_vmware[0].d3d9name, sizeof(drvid->Description));
+ break;
+ case HW_VENDOR_AMD:
+ for (i = 0; i < sizeof(cards_amd) / sizeof(cards_amd[0]); i++) {
+ if (strstr(drvid->Description, cards_amd[i].mesaname)) {
+ strncpy(drvid->Description, cards_amd[i].d3d9name, sizeof(drvid->Description));
+ return;
+ }
+ }
+ /* use a fall-back if nothing matches */
+ DBG("Unknown card name %s!\n", drvid->DeviceName);
+ strncpy(drvid->Description, cards_amd[0].d3d9name, sizeof(drvid->Description));
+ break;
+ case HW_VENDOR_NVIDIA:
+ for (i = 0; i < sizeof(cards_nvidia) / sizeof(cards_nvidia[0]); i++) {
+ if (strstr(drvid->Description, cards_nvidia[i].mesaname)) {
+ strncpy(drvid->Description, cards_nvidia[i].d3d9name, sizeof(drvid->Description));
+ return;
+ }
+ }
+ /* use a fall-back if nothing matches */
+ DBG("Unknown card name %s!\n", drvid->DeviceName);
+ strncpy(drvid->Description, cards_nvidia[0].d3d9name, sizeof(drvid->Description));
+ break;
+ default:
+ break;
+ }
+}
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index 680f516..fabc820 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -46,12 +46,6 @@
#define DBG_CHANNEL DBG_ADAPTER
-#define VERSION_DWORD(hi, lo) \
- ((DWORD)( \
- ((DWORD)((hi) & 0xFFFF) << 16) | \
- (DWORD)((lo) & 0xFFFF) \
- ))
-
const char __driConfigOptionsNine[] =
DRI_CONF_BEGIN
DRI_CONF_SECTION_PERFORMANCE
@@ -63,12 +57,21 @@ DRI_CONF_BEGIN
DRI_CONF_SECTION_END
DRI_CONF_END;
-/* Regarding os versions, we should not define our own as that would simply be
- * weird. Defaulting to Win2k/XP seems sane considering the origin of D3D9. The
- * driver also defaults to being a generic D3D9 driver, which of course only
- * matters if you're actually using the DDI. */
-#define VERSION_HIGH VERSION_DWORD(0x0006, 0x000E) /* winxp, d3d9 */
-#define VERSION_LOW VERSION_DWORD(0x0000, 0x0001) /* version, build */
+/* define fallback value here: NVIDIA GeForce GTX 970 */
+#define FALLBACK_NAME "NV124"
+#define FALLBACK_DEVID 0x13C2
+#define FALLBACK_VENID 0x10de
+
+/* prototypes */
+void
+d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid,
+ unsigned fallback_ven,
+ unsigned fallback_dev,
+ const char* fallback_name );
+
+void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid);
+
+void d3d_fill_cardname(D3DADAPTER_IDENTIFIER9* drvid);
struct d3dadapter9drm_context
{
@@ -152,9 +155,9 @@ get_bus_info( int fd,
*subsysid = 0;
*revision = 0;
} else {
- DBG("Unable to detect card. Fake GTX 680.\n");
- *vendorid = 0x10de; /* NV GTX 680 */
- *deviceid = 0x1180;
+ DBG("Unable to detect card. Faking %s\n", FALLBACK_NAME);
+ *vendorid = FALLBACK_VENID;
+ *deviceid = FALLBACK_DEVID;
*subsysid = 0;
*revision = 0;
}
@@ -169,33 +172,23 @@ read_descriptor( struct d3dadapter9_context *ctx,
memset(drvid, 0, sizeof(*drvid));
get_bus_info(fd, &drvid->VendorId, &drvid->DeviceId,
&drvid->SubSysId, &drvid->Revision);
+ snprintf(drvid->DeviceName, sizeof(drvid->DeviceName),
+ "Gallium 0.4 with %s", ctx->hal->get_vendor(ctx->hal));
+ strncpy(drvid->Description, ctx->hal->get_name(ctx->hal),
+ sizeof(drvid->Description));
+
+ /* choose fall-back vendor if necessary to allow
+ * the following functions to return sane results */
+ d3d_match_vendor_id(drvid, FALLBACK_VENID, FALLBACK_DEVID, FALLBACK_NAME);
+ /* fill in driver name and version info */
+ d3d_fill_driver_version(drvid);
+ /* override Description field with Windows like names */
+ d3d_fill_cardname(drvid);
+
+ /* this driver isn't WHQL certified */
+ drvid->WHQLLevel = 0;
- strncpy(drvid->Driver, "libd3dadapter9.so", sizeof(drvid->Driver));
- strncpy(drvid->DeviceName, ctx->hal->get_name(ctx->hal), 32);
- snprintf(drvid->Description, sizeof(drvid->Description),
- "Gallium 0.4 with %s", ctx->hal->get_vendor(ctx->hal));
-
- drvid->DriverVersionLowPart = VERSION_LOW;
- drvid->DriverVersionHighPart = VERSION_HIGH;
-
- /* To make a pseudo-real GUID we use the PCI bus data and some string */
- drvid->DeviceIdentifier.Data1 = drvid->VendorId;
- drvid->DeviceIdentifier.Data2 = drvid->DeviceId;
- drvid->DeviceIdentifier.Data3 = drvid->SubSysId;
- memcpy(drvid->DeviceIdentifier.Data4, "Gallium3D", 8);
-
- drvid->WHQLLevel = 1; /* This fakes WHQL validaion */
-
- /* XXX Fake NVIDIA binary driver on Windows.
- *
- * OS version: 4=95/98/NT4, 5=2000, 6=2000/XP, 7=Vista, 8=Win7
- */
- strncpy(drvid->Driver, "nvd3dum.dll", sizeof(drvid->Driver));
- strncpy(drvid->Description, "NVIDIA GeForce GTX 680", sizeof(drvid->Description));
- drvid->DriverVersionLowPart = VERSION_DWORD(12, 6658); /* minor, build */
- drvid->DriverVersionHighPart = VERSION_DWORD(6, 15); /* OS, major */
- drvid->SubSysId = 0;
- drvid->Revision = 0;
+ /* this value is fixed */
drvid->DeviceIdentifier.Data1 = 0xaeb2cdd4;
drvid->DeviceIdentifier.Data2 = 0x6e41;
drvid->DeviceIdentifier.Data3 = 0x43ea;
@@ -207,7 +200,6 @@ read_descriptor( struct d3dadapter9_context *ctx,
drvid->DeviceIdentifier.Data4[5] = 0x76;
drvid->DeviceIdentifier.Data4[6] = 0x07;
drvid->DeviceIdentifier.Data4[7] = 0x81;
- drvid->WHQLLevel = 0;
}
static HRESULT WINAPI
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index 7168e1d..a33d7f8 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -77,8 +77,8 @@ gallium_DRIVERS += libmesa_pipe_r600
LOCAL_CFLAGS += -DGALLIUM_R600
endif
ifneq ($(filter radeonsi,$(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_pipe_radeonsi
-LOCAL_SHARED_LIBRARIES += libLLVM
+gallium_DRIVERS += libmesa_pipe_radeonsi libmesa_winsys_amdgpu
+LOCAL_SHARED_LIBRARIES += libLLVM libdrm_amdgpu
LOCAL_CFLAGS += -DGALLIUM_RADEONSI
endif
gallium_DRIVERS += libmesa_winsys_radeon libmesa_pipe_radeon
diff --git a/src/gallium/winsys/amdgpu/drm/Android.mk b/src/gallium/winsys/amdgpu/drm/Android.mk
index 7d507aa..5773234 100644
--- a/src/gallium/winsys/amdgpu/drm/Android.mk
+++ b/src/gallium/winsys/amdgpu/drm/Android.mk
@@ -30,6 +30,16 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(C_SOURCES)
+LOCAL_CFLAGS := \
+ $(AMDGPU_CFLAGS) \
+ -DBRAHMA_BUILD=1
+
+LOCAL_C_INCLUDES := \
+ $(LOCAL_PATH)/addrlib \
+ $(LOCAL_PATH)/addrlib/core \
+ $(LOCAL_PATH)/addrlib/inc/chip/r800 \
+ $(LOCAL_PATH)/addrlib/r800/chip
+
LOCAL_SHARED_LIBRARIES := libdrm libdrm_amdgpu
LOCAL_MODULE := libmesa_winsys_amdgpu
diff --git a/src/gallium/winsys/amdgpu/drm/Makefile.sources b/src/gallium/winsys/amdgpu/drm/Makefile.sources
index 6b33841..2363004 100644
--- a/src/gallium/winsys/amdgpu/drm/Makefile.sources
+++ b/src/gallium/winsys/amdgpu/drm/Makefile.sources
@@ -11,9 +11,7 @@ C_SOURCES := \
addrlib/core/addrobject.h \
addrlib/inc/chip/r800/si_gb_reg.h \
addrlib/inc/lnx_common_defs.h \
- addrlib/r800/chip/si_ci_merged_enum.h \
addrlib/r800/chip/si_ci_vi_merged_enum.h \
- addrlib/r800/chip/si_enum.h \
addrlib/r800/ciaddrlib.cpp \
addrlib/r800/ciaddrlib.h \
addrlib/r800/egbaddrlib.cpp \
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index 0842259..12c6b62 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -77,8 +77,8 @@ struct amdgpu_cs {
int buffer_indices_hashlist[512];
- unsigned used_vram;
- unsigned used_gart;
+ uint64_t used_vram;
+ uint64_t used_gart;
unsigned max_dependencies;
};
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 7a267f9..f04a696 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -97,22 +97,17 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
{
int i;
- csc->buf = MALLOC(ws->ib_max_size);
- if (!csc->buf)
- return FALSE;
csc->fd = ws->fd;
csc->nrelocs = 512;
csc->relocs_bo = (struct radeon_bo**)
CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
if (!csc->relocs_bo) {
- FREE(csc->buf);
return FALSE;
}
csc->relocs = (struct drm_radeon_cs_reloc*)
CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
if (!csc->relocs) {
- FREE(csc->buf);
FREE(csc->relocs_bo);
return FALSE;
}
@@ -165,7 +160,6 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
radeon_cs_context_cleanup(csc);
FREE(csc->relocs_bo);
FREE(csc->relocs);
- FREE(csc->buf);
}
@@ -206,7 +200,7 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
cs->cst = &cs->csc2;
cs->base.buf = cs->csc->buf;
cs->base.ring_type = ring_type;
- cs->base.max_dw = ws->ib_max_size / 4;
+ cs->base.max_dw = ARRAY_SIZE(cs->csc->buf);
p_atomic_inc(&ws->num_cs);
return &cs->base;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index ab15494..6ceb8e9 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -30,7 +30,7 @@
#include "radeon_drm_bo.h"
struct radeon_cs_context {
- uint32_t *buf;
+ uint32_t buf[16 * 1024];
int fd;
struct drm_radeon_cs cs;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index b70bbaa..f7784fb 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -395,20 +395,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
}
ws->info.r600_virtual_address = FALSE;
- ws->ib_max_size = 64 * 1024;
-
if (ws->info.drm_minor >= 13) {
+ uint32_t ib_vm_max_size;
+
ws->info.r600_virtual_address = TRUE;
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
&ws->va_start))
ws->info.r600_virtual_address = FALSE;
-
- if (radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
- &ws->ib_max_size))
- ws->ib_max_size *= 4; /* the kernel returns the size in dwords */
- else
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
+ &ib_vm_max_size))
ws->info.r600_virtual_address = FALSE;
-
radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
&ws->va_unmap_working);
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index c1a8d6a..308b5bd 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -73,7 +73,6 @@ struct radeon_drm_winsys {
enum radeon_generation gen;
struct radeon_info info;
- uint32_t ib_max_size;
uint32_t va_start;
uint32_t va_unmap_working;
uint32_t accel_working2;
diff --git a/src/gallium/winsys/sw/kms-dri/Makefile.am b/src/gallium/winsys/sw/kms-dri/Makefile.am
index 7f26b1b..8162553 100644
--- a/src/gallium/winsys/sw/kms-dri/Makefile.am
+++ b/src/gallium/winsys/sw/kms-dri/Makefile.am
@@ -31,5 +31,3 @@ AM_CFLAGS = \
noinst_LTLIBRARIES = libswkmsdri.la
libswkmsdri_la_SOURCES = $(C_SOURCES)
-
-EXTRA_DIST = SConscript