summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruce Cherniak <bruce.cherniak@intel.com>2016-09-27 13:27:08 -0500
committerTim Rowley <timothy.o.rowley@intel.com>2016-10-03 09:57:45 -0500
commit903d00cd32137161c0b57bcba95af51e47d91fa5 (patch)
treeb285c268643da1614be965e13ec95a8287e5bd83
parentcdac0427331442213a2cb8ed5a71057e1bb9793e (diff)
downloadexternal_mesa3d-903d00cd32137161c0b57bcba95af51e47d91fa5.zip
external_mesa3d-903d00cd32137161c0b57bcba95af51e47d91fa5.tar.gz
external_mesa3d-903d00cd32137161c0b57bcba95af51e47d91fa5.tar.bz2
swr: Removed stalling SwrWaitForIdle from queries.
Previous fundamental change in stats gathering added a temporary SwrWaitForIdle to begin_query and end_query. Code has been reworked to remove stall. Reviewed-by: George Kyriazis <george.kyriazis@intel.com>
-rw-r--r--src/gallium/drivers/swr/swr_context.cpp33
-rw-r--r--src/gallium/drivers/swr/swr_context.h11
-rw-r--r--src/gallium/drivers/swr/swr_query.cpp152
-rw-r--r--src/gallium/drivers/swr/swr_query.h10
4 files changed, 87 insertions, 119 deletions
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
index 15e60cd..cbc60e0 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -24,6 +24,7 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/u_atomic.h"
extern "C" {
#include "util/u_transfer.h"
@@ -352,9 +353,9 @@ swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)
if (!pDC)
return;
- struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
+ struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats;
- SWR_STATS *pSwrStats = &ctx->stats;
+ SWR_STATS *pSwrStats = &pqr->core;
pSwrStats->DepthPassCount += pStats->DepthPassCount;
pSwrStats->PsInvocations += pStats->PsInvocations;
@@ -369,22 +370,24 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)
if (!pDC)
return;
- struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx;
+ struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats;
- SWR_STATS_FE *pSwrStats = &ctx->statsFE;
- pSwrStats->IaVertices += pStats->IaVertices;
- pSwrStats->IaPrimitives += pStats->IaPrimitives;
- pSwrStats->VsInvocations += pStats->VsInvocations;
- pSwrStats->HsInvocations += pStats->HsInvocations;
- pSwrStats->DsInvocations += pStats->DsInvocations;
- pSwrStats->GsInvocations += pStats->GsInvocations;
- pSwrStats->CInvocations += pStats->CInvocations;
- pSwrStats->CPrimitives += pStats->CPrimitives;
- pSwrStats->GsPrimitives += pStats->GsPrimitives;
+ SWR_STATS_FE *pSwrStats = &pqr->coreFE;
+ p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices);
+ p_atomic_add(&pSwrStats->IaPrimitives, pStats->IaPrimitives);
+ p_atomic_add(&pSwrStats->VsInvocations, pStats->VsInvocations);
+ p_atomic_add(&pSwrStats->HsInvocations, pStats->HsInvocations);
+ p_atomic_add(&pSwrStats->DsInvocations, pStats->DsInvocations);
+ p_atomic_add(&pSwrStats->GsInvocations, pStats->GsInvocations);
+ p_atomic_add(&pSwrStats->CInvocations, pStats->CInvocations);
+ p_atomic_add(&pSwrStats->CPrimitives, pStats->CPrimitives);
+ p_atomic_add(&pSwrStats->GsPrimitives, pStats->GsPrimitives);
for (unsigned i = 0; i < 4; i++) {
- pSwrStats->SoPrimStorageNeeded[i] += pStats->SoPrimStorageNeeded[i];
- pSwrStats->SoNumPrimsWritten[i] += pStats->SoNumPrimsWritten[i];
+ p_atomic_add(&pSwrStats->SoPrimStorageNeeded[i],
+ pStats->SoPrimStorageNeeded[i]);
+ p_atomic_add(&pSwrStats->SoNumPrimsWritten[i],
+ pStats->SoNumPrimsWritten[i]);
}
}
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
index 6854d69..eecfe0d 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -92,7 +92,7 @@ struct swr_draw_context {
float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
- void *swr_ctx;
+ void *pStats;
};
/* gen_llvm_types FINI */
@@ -159,9 +159,6 @@ struct swr_context {
/* SWR private state - draw context */
struct swr_draw_context swrDC;
- SWR_STATS stats;
- SWR_STATS_FE statsFE;
-
unsigned dirty; /**< Mask of SWR_NEW_x flags */
};
@@ -172,11 +169,13 @@ swr_context(struct pipe_context *pipe)
}
static INLINE void
-swr_update_draw_context(struct swr_context *ctx)
+swr_update_draw_context(struct swr_context *ctx,
+ struct swr_query_result *pqr = nullptr)
{
swr_draw_context *pDC =
(swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext);
- ctx->swrDC.swr_ctx = ctx;
+ if (pqr)
+ ctx->swrDC.pStats = pqr;
memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context));
}
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
index c51c529..8bb0b16 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -71,48 +71,6 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
}
-static void
-swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- assert(pq->result);
- struct swr_query_result *result = pq->result;
- boolean enable_stats = pq->enable_stats;
-
- /* A few results don't require the core, so don't involve it */
- switch (pq->type) {
- case PIPE_QUERY_TIMESTAMP:
- case PIPE_QUERY_TIME_ELAPSED:
- result->timestamp = swr_get_timestamp(pipe->screen);
- break;
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- case PIPE_QUERY_GPU_FINISHED:
- /* nothing to do here */
- break;
- default:
- /* TODO: should fence instead of stalling pipeline */
- SwrWaitForIdle(ctx->swrContext);
- memcpy(&result->core, &ctx->stats, sizeof(result->core));
- memcpy(&result->coreFE, &ctx->statsFE, sizeof(result->coreFE));
-
-#if 0
- if (!pq->fence) {
- struct swr_screen *screen = swr_screen(pipe->screen);
- swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
- }
- swr_fence_submit(ctx, pq->fence);
-#endif
-
- /* Only change stat collection if there are no active queries */
- if (ctx->active_queries == 0)
- SwrEnableStats(ctx->swrContext, enable_stats);
-
- break;
- }
-}
-
-
static boolean
swr_get_query_result(struct pipe_context *pipe,
struct pipe_query *q,
@@ -120,8 +78,6 @@ swr_get_query_result(struct pipe_context *pipe,
union pipe_query_result *result)
{
struct swr_query *pq = swr_query(q);
- struct swr_query_result *start = &pq->start;
- struct swr_query_result *end = &pq->end;
unsigned index = pq->index;
if (pq->fence) {
@@ -132,40 +88,37 @@ swr_get_query_result(struct pipe_context *pipe,
swr_fence_reference(pipe->screen, &pq->fence, NULL);
}
- /* XXX: Need to handle counter rollover */
-
+ /* All values are reset to 0 at swr_begin_query, except starting timestamp.
+ * Counters become simply end values. */
switch (pq->type) {
/* Booleans */
case PIPE_QUERY_OCCLUSION_PREDICATE:
- result->b = end->core.DepthPassCount != start->core.DepthPassCount;
+ result->b = pq->result.core.DepthPassCount != 0;
break;
case PIPE_QUERY_GPU_FINISHED:
result->b = TRUE;
break;
/* Counters */
case PIPE_QUERY_OCCLUSION_COUNTER:
- result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;
+ result->u64 = pq->result.core.DepthPassCount;
break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
- result->u64 = end->timestamp - start->timestamp;
+ result->u64 = pq->result.timestamp_end - pq->result.timestamp_start;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
- result->u64 = end->coreFE.IaPrimitives - start->coreFE.IaPrimitives;
+ result->u64 = pq->result.coreFE.IaPrimitives;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
- result->u64 = end->coreFE.SoNumPrimsWritten[index]
- - start->coreFE.SoNumPrimsWritten[index];
+ result->u64 = pq->result.coreFE.SoNumPrimsWritten[index];
break;
/* Structures */
case PIPE_QUERY_SO_STATISTICS: {
struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
- struct SWR_STATS_FE *start = &pq->start.coreFE;
- struct SWR_STATS_FE *end = &pq->end.coreFE;
so_stats->num_primitives_written =
- end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
+ pq->result.coreFE.SoNumPrimsWritten[index];
so_stats->primitives_storage_needed =
- end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
+ pq->result.coreFE.SoPrimStorageNeeded[index];
} break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* os_get_time_nano returns nanoseconds */
@@ -175,29 +128,23 @@ swr_get_query_result(struct pipe_context *pipe,
case PIPE_QUERY_PIPELINE_STATISTICS: {
struct pipe_query_data_pipeline_statistics *p_stats =
&result->pipeline_statistics;
- struct SWR_STATS *start = &pq->start.core;
- struct SWR_STATS *end = &pq->end.core;
- struct SWR_STATS_FE *startFE = &pq->start.coreFE;
- struct SWR_STATS_FE *endFE = &pq->end.coreFE;
- p_stats->ia_vertices = endFE->IaVertices - startFE->IaVertices;
- p_stats->ia_primitives = endFE->IaPrimitives - startFE->IaPrimitives;
- p_stats->vs_invocations = endFE->VsInvocations - startFE->VsInvocations;
- p_stats->gs_invocations = endFE->GsInvocations - startFE->GsInvocations;
- p_stats->gs_primitives = endFE->GsPrimitives - startFE->GsPrimitives;
- p_stats->c_invocations = endFE->CPrimitives - startFE->CPrimitives;
- p_stats->c_primitives = endFE->CPrimitives - startFE->CPrimitives;
- p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
- p_stats->hs_invocations = endFE->HsInvocations - startFE->HsInvocations;
- p_stats->ds_invocations = endFE->DsInvocations - startFE->DsInvocations;
- p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
+ p_stats->ia_vertices = pq->result.coreFE.IaVertices;
+ p_stats->ia_primitives = pq->result.coreFE.IaPrimitives;
+ p_stats->vs_invocations = pq->result.coreFE.VsInvocations;
+ p_stats->gs_invocations = pq->result.coreFE.GsInvocations;
+ p_stats->gs_primitives = pq->result.coreFE.GsPrimitives;
+ p_stats->c_invocations = pq->result.coreFE.CPrimitives;
+ p_stats->c_primitives = pq->result.coreFE.CPrimitives;
+ p_stats->ps_invocations = pq->result.core.PsInvocations;
+ p_stats->hs_invocations = pq->result.coreFE.HsInvocations;
+ p_stats->ds_invocations = pq->result.coreFE.DsInvocations;
+ p_stats->cs_invocations = pq->result.core.CsInvocations;
} break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
- struct SWR_STATS_FE *start = &pq->start.coreFE;
- struct SWR_STATS_FE *end = &pq->end.coreFE;
uint64_t num_primitives_written =
- end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
+ pq->result.coreFE.SoNumPrimsWritten[index];
uint64_t primitives_storage_needed =
- end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
+ pq->result.coreFE.SoPrimStorageNeeded[index];
result->b = num_primitives_written > primitives_storage_needed;
}
break;
@@ -215,21 +162,27 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
struct swr_context *ctx = swr_context(pipe);
struct swr_query *pq = swr_query(q);
- assert(!pq->enable_stats && "swr_begin_query: Query is already active!");
-
/* Initialize Results */
- memset(&pq->start, 0, sizeof(pq->start));
- memset(&pq->end, 0, sizeof(pq->end));
+ memset(&pq->result, 0, sizeof(pq->result));
+ switch (pq->type) {
+ case PIPE_QUERY_TIMESTAMP:
+ /* nothing to do */
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ pq->result.timestamp_start = swr_get_timestamp(pipe->screen);
+ break;
+ default:
+ /* Core counters required. Update draw context with location to
+ * store results. */
+ swr_update_draw_context(ctx, &pq->result);
- /* Gather start stats and enable SwrCore counters */
- pq->result = &pq->start;
- pq->enable_stats = TRUE;
- swr_gather_stats(pipe, pq);
- ctx->active_queries++;
+ /* Only change stat collection if there are no active queries */
+ if (ctx->active_queries == 0)
+ SwrEnableStats(ctx->swrContext, TRUE);
+ break;
+ }
- /* override start timestamp to 0 for TIMESTAMP query */
- if (pq->type == PIPE_QUERY_TIMESTAMP)
- pq->start.timestamp = 0;
+ ctx->active_queries++;
return true;
}
@@ -244,10 +197,27 @@ swr_end_query(struct pipe_context *pipe, struct pipe_query *q)
&& "swr_end_query, there are no active queries!");
ctx->active_queries--;
- /* Gather end stats and disable SwrCore counters */
- pq->result = &pq->end;
- pq->enable_stats = FALSE;
- swr_gather_stats(pipe, pq);
+ switch (pq->type) {
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIME_ELAPSED:
+ pq->result.timestamp_end = swr_get_timestamp(pipe->screen);
+ break;
+ default:
+ /* Stats are updated asynchronously, a fence is used to signal
+ * completion. */
+ if (!pq->fence) {
+ struct swr_screen *screen = swr_screen(pipe->screen);
+ swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
+ }
+ swr_fence_submit(ctx, pq->fence);
+
+ /* Only change stat collection if there are no active queries */
+ if (ctx->active_queries == 0)
+ SwrEnableStats(ctx->swrContext, FALSE);
+
+ break;
+ }
+
return true;
}
diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h
index 931d687..c5160ce 100644
--- a/src/gallium/drivers/swr/swr_query.h
+++ b/src/gallium/drivers/swr/swr_query.h
@@ -30,20 +30,16 @@
struct swr_query_result {
SWR_STATS core;
SWR_STATS_FE coreFE;
- uint64_t timestamp;
+ uint64_t timestamp_start;
+ uint64_t timestamp_end;
};
struct swr_query {
unsigned type; /* PIPE_QUERY_* */
unsigned index;
- struct swr_query_result *result;
- struct swr_query_result start;
- struct swr_query_result end;
-
+ struct swr_query_result result;
struct pipe_fence_handle *fence;
-
- boolean enable_stats;
};
extern void swr_query_init(struct pipe_context *pipe);