src/mesa/drivers/dri/i965/intel_pixel_read.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268

/*
 * Copyright 2003 VMware, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "main/enums.h"
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/fbobject.h"
#include "main/image.h"
#include "main/bufferobj.h"
#include "main/readpix.h"
#include "main/state.h"
#include "main/glformats.h"
#include "drivers/common/meta.h"

#include "brw_context.h"
#include "intel_screen.h"
#include "intel_batchbuffer.h"
#include "intel_blit.h"
#include "intel_buffers.h"
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
#include "intel_pixel.h"
#include "intel_buffer_objects.h"
#include "intel_tiled_memcpy.h"

#define FILE_DEBUG_FLAG DEBUG_PIXEL

/**
 * \brief A fast path for glReadPixels
 *
 * This fast path is taken when the source format is BGRA, RGBA,
 * A or L and when the texture memory is X- or Y-tiled.  It downloads
 * the source data by directly mapping the memory without a GTT fence.
 * This then needs to be de-tiled on the CPU before presenting the data to
 * the user in the linear fasion.
 *
 * This is a performance win over the conventional texture download path.
 * In the conventional texture download path, the texture is either mapped
 * through the GTT or copied to a linear buffer with the blitter before
 * handing off to a software path.  This allows us to avoid round-tripping
 * through the GPU (in the case where we would be blitting) and do only a
 * single copy operation.
 */
static bool
intel_readpixels_tiled_memcpy(struct gl_context * ctx,
                              GLint xoffset, GLint yoffset,
                              GLsizei width, GLsizei height,
                              GLenum format, GLenum type,
                              GLvoid * pixels,
                              const struct gl_pixelstore_attrib *pack)
{
   struct brw_context *brw = brw_context(ctx);
   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;

   /* This path supports reading from color buffers only */
   if (rb == NULL)
      return false;

   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
   int dst_pitch;

   /* The miptree's buffer. */
   drm_intel_bo *bo;

   int error = 0;

   uint32_t cpp;
   mem_copy_fn mem_copy = NULL;

   /* This fastpath is restricted to specific renderbuffer types:
    * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
    * more types.
    */
   if (!brw->has_llc ||
       !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
       pixels == NULL ||
       _mesa_is_bufferobj(pack->BufferObj) ||
       pack->Alignment > 4 ||
       pack->SkipPixels > 0 ||
       pack->SkipRows > 0 ||
       (pack->RowLength != 0 && pack->RowLength != width) ||
       pack->SwapBytes ||
       pack->LsbFirst ||
       pack->Invert)
      return false;

   /* Only a simple blit, no scale, bias or other mapping. */
   if (ctx->_ImageTransferState)
      return false;

   /* It is possible that the renderbuffer (or underlying texture) is
    * multisampled.  Since ReadPixels from a multisampled buffer requires a
    * multisample resolve, we can't handle this here
    */
   if (rb->NumSamples > 1)
      return false;

   /* We can't handle copying from RGBX or BGRX because the tiled_memcpy
    * function doesn't set the last channel to 1. Note this checks BaseFormat
    * rather than TexFormat in case the RGBX format is being simulated with an
    * RGBA format.
    */
   if (rb->_BaseFormat == GL_RGB)
      return false;

   if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp))
      return false;

   if (!irb->mt ||
       (irb->mt->tiling != I915_TILING_X &&
       irb->mt->tiling != I915_TILING_Y)) {
      /* The algorithm is written only for X- or Y-tiled memory. */
      return false;
   }

   /* Since we are going to read raw data to the miptree, we need to resolve
    * any pending fast color clears before we start.
    */
   intel_miptree_resolve_color(brw, irb->mt, 0);

   bo = irb->mt->bo;

   if (drm_intel_bo_references(brw->batch.bo, bo)) {
      perf_debug("Flushing before mapping a referenced bo.\n");
      intel_batchbuffer_flush(brw);
   }

   error = brw_bo_map(brw, bo, false /* write enable */, "miptree");
   if (error) {
      DBG("%s: failed to map bo\n", __func__);
      return false;
   }

   xoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].x_offset;
   yoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].y_offset;

   dst_pitch = _mesa_image_row_stride(pack, width, format, type);

   /* For a window-system renderbuffer, the buffer is actually flipped
    * vertically, so we need to handle that.  Since the detiling function
    * can only really work in the forwards direction, we have to be a
    * little creative.  First, we compute the Y-offset of the first row of
    * the renderbuffer (in renderbuffer coordinates).  We then match that
    * with the last row of the client's data.  Finally, we give
    * tiled_to_linear a negative pitch so that it walks through the
    * client's data backwards as it walks through the renderbufer forwards.
    */
   if (rb->Name == 0) {
      yoffset = rb->Height - yoffset - height;
      pixels += (ptrdiff_t) (height - 1) * dst_pitch;
      dst_pitch = -dst_pitch;
   }

   /* We postponed printing this message until having committed to executing
    * the function.
    */
   DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
       "mesa_format=0x%x tiling=%d "
       "pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
       __func__, xoffset, yoffset, width, height,
       format, type, rb->Format, irb->mt->tiling,
       pack->Alignment, pack->RowLength, pack->SkipPixels,
       pack->SkipRows);

   tiled_to_linear(
      xoffset * cpp, (xoffset + width) * cpp,
      yoffset, yoffset + height,
      pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp,
      bo->virtual + irb->mt->offset,
      dst_pitch, irb->mt->pitch,
      brw->has_swizzling,
      irb->mt->tiling,
      mem_copy
   );

   drm_intel_bo_unmap(bo);
   return true;
}

void
intelReadPixels(struct gl_context * ctx,
                GLint x, GLint y, GLsizei width, GLsizei height,
                GLenum format, GLenum type,
                const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
{
   bool ok;

   struct brw_context *brw = brw_context(ctx);
   bool dirty;

   DBG("%s\n", __func__);

   if (_mesa_is_bufferobj(pack->BufferObj)) {
      if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, height, 1,
                                        format, type, pixels, pack)) {
         /* _mesa_meta_pbo_GetTexSubImage() implements PBO transfers by
          * binding the user-provided BO as a fake framebuffer and rendering
          * to it.  This breaks the invariant of the GL that nothing is able
          * to render to a BO, causing nondeterministic corruption issues
          * because the render cache is not coherent with a number of other
          * caches that the BO could potentially be bound to afterwards.
          *
          * This could be solved in the same way that we guarantee texture
          * coherency after a texture is attached to a framebuffer and
          * rendered to, but that would involve checking *all* BOs bound to
          * the pipeline for the case we need to emit a cache flush due to
          * previous rendering to any of them -- Including vertex, index,
          * uniform, atomic counter, shader image, transform feedback,
          * indirect draw buffers, etc.
          *
          * That would increase the per-draw call overhead even though it's
          * very unlikely that any of the BOs bound to the pipeline has been
          * rendered to via a PBO at any point, so it seems better to just
          * flush here unconditionally.
          */
         brw_emit_mi_flush(brw);
         return;
      }

      perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
   }

   ok = intel_readpixels_tiled_memcpy(ctx, x, y, width, height,
                                      format, type, pixels, pack);
   if(ok)
      return;

   /* glReadPixels() wont dirty the front buffer, so reset the dirty
    * flag after calling intel_prepare_render(). */
   dirty = brw->front_buffer_dirty;
   intel_prepare_render(brw);
   brw->front_buffer_dirty = dirty;

   /* Update Mesa state before calling _mesa_readpixels().
    * XXX this may not be needed since ReadPixels no longer uses the
    * span code.
    */

   if (ctx->NewState)
      _mesa_update_state(ctx);

   _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);

   /* There's an intel_prepare_render() call in intelSpanRenderStart(). */
   brw->front_buffer_dirty = dirty;
}