From 8dfc9f038ee3f6a57f0a3f3cc641b0866a6111b7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 16 Oct 2013 11:51:22 -0700 Subject: i965/fs: Use the gen7 scratch read opcode when possible. This avoids a lot of message setup we had to do otherwise. Improves GLB2.7 performance with register spilling force enabled by 1.6442% +/- 0.553218% (n=4). v2: Use BRW_PREDICATE_NONE, improve a comment (by Paul). Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp') diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 5093dd5..9e4de29 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -342,6 +342,18 @@ schedule_node::set_latency_gen7(bool is_haswell) latency = 200; break; + case SHADER_OPCODE_GEN7_SCRATCH_READ: + /* Testing a load from offset 0, that had been previously written: + * + * send(8) g114<1>UW g0<8,8,1>F data (0, 0, 0) mlen 1 rlen 1 { align1 WE_normal 1Q }; + * mov(8) null g114<8,8,1>F { align1 WE_normal 1Q }; + * + * The cycles spent seemed to be grouped around 40-50 (as low as 38), + * then around 140. Presumably this is cache hit vs miss. + */ + latency = 50; + break; + default: /* 2 cycles: * mul(8) g4<1>F g2<0,1,0>F 0.5F { align1 WE_normal 1Q }; -- cgit v1.1