summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libpixelflinger/Android.mk18
-rw-r--r--libpixelflinger/codeflinger/ARMAssembler.cpp12
-rw-r--r--libpixelflinger/codeflinger/ARMAssembler.h1
-rw-r--r--libpixelflinger/codeflinger/ARMAssemblerInterface.h3
-rw-r--r--libpixelflinger/codeflinger/ARMAssemblerProxy.cpp4
-rw-r--r--libpixelflinger/codeflinger/ARMAssemblerProxy.h1
-rw-r--r--libpixelflinger/codeflinger/disassem.c11
-rw-r--r--libpixelflinger/codeflinger/load_store.cpp20
-rw-r--r--libpixelflinger/col32cb16blend.S31
-rw-r--r--libpixelflinger/col32cb16blend_neon.S32
-rw-r--r--libpixelflinger/raster.cpp2
-rw-r--r--libpixelflinger/scanline.cpp905
-rw-r--r--libpixelflinger/tests/codegen/Android.mk5
-rw-r--r--libpixelflinger/tests/codegen/codegen.cpp49
14 files changed, 975 insertions, 119 deletions
diff --git a/libpixelflinger/Android.mk b/libpixelflinger/Android.mk
index 6491d24..ed2ab5e 100644
--- a/libpixelflinger/Android.mk
+++ b/libpixelflinger/Android.mk
@@ -2,17 +2,6 @@ LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
#
-# ARMv6 specific objects
-#
-
-ifeq ($(TARGET_ARCH),arm)
-LOCAL_ASFLAGS := -march=armv6
-LOCAL_SRC_FILES := rotate90CW_4x4_16v6.S
-LOCAL_MODULE := libpixelflinger_armv6
-include $(BUILD_STATIC_LIBRARY)
-endif
-
-#
# C/C++ and ARMv5 objects
#
@@ -77,10 +66,6 @@ ifneq ($(BUILD_TINY_ANDROID),true)
LOCAL_SHARED_LIBRARIES += libhardware_legacy
LOCAL_CFLAGS += -DWITH_LIB_HARDWARE
endif
-
-ifeq ($(TARGET_ARCH),arm)
-LOCAL_WHOLE_STATIC_LIBRARIES := libpixelflinger_armv6
-endif
include $(BUILD_SHARED_LIBRARY)
#
@@ -91,9 +76,6 @@ include $(CLEAR_VARS)
LOCAL_MODULE:= libpixelflinger_static
LOCAL_SRC_FILES := $(PIXELFLINGER_SRC_FILES)
LOCAL_CFLAGS := $(PIXELFLINGER_CFLAGS)
-ifeq ($(TARGET_ARCH),arm)
-LOCAL_WHOLE_STATIC_LIBRARIES := libpixelflinger_armv6
-endif
include $(BUILD_STATIC_LIBRARY)
diff --git a/libpixelflinger/codeflinger/ARMAssembler.cpp b/libpixelflinger/codeflinger/ARMAssembler.cpp
index d3720c3..4726a08 100644
--- a/libpixelflinger/codeflinger/ARMAssembler.cpp
+++ b/libpixelflinger/codeflinger/ARMAssembler.cpp
@@ -334,7 +334,7 @@ void ARMAssembler::LDM(int cc, int dir,
void ARMAssembler::STM(int cc, int dir,
int Rn, int W, uint32_t reg_list)
-{ // FA EA FD ED IB IA DB DA
+{ // ED FD EA FA IB IA DB DA
const uint8_t P[8] = { 0, 1, 0, 1, 1, 0, 1, 0 };
const uint8_t U[8] = { 0, 0, 1, 1, 1, 1, 0, 0 };
*mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
@@ -433,6 +433,16 @@ void ARMAssembler::UXTB16(int cc, int Rd, int Rm, int rotate)
{
*mPC++ = (cc<<28) | 0x6CF0070 | (Rd<<12) | ((rotate >> 3) << 10) | Rm;
}
+#if 0
+#pragma mark -
+#pragma mark Bit manipulation (ARMv7+ only)...
+#endif
+
+// Bit manipulation (ARMv7+ only)...
+void ARMAssembler::UBFX(int cc, int Rd, int Rn, int lsb, int width)
+{
+ *mPC++ = (cc<<28) | 0x7E00000 | ((width-1)<<16) | (Rd<<12) | (lsb<<7) | 0x50 | Rn;
+}
}; // namespace android
diff --git a/libpixelflinger/codeflinger/ARMAssembler.h b/libpixelflinger/codeflinger/ARMAssembler.h
index a667cb5..e7f038a 100644
--- a/libpixelflinger/codeflinger/ARMAssembler.h
+++ b/libpixelflinger/codeflinger/ARMAssembler.h
@@ -124,6 +124,7 @@ public:
virtual void SMLAW(int cc, int y,
int Rd, int Rm, int Rs, int Rn);
virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
+ virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width);
private:
ARMAssembler(const ARMAssembler& rhs);
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
index ff6af2a..796342a 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerInterface.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
@@ -206,6 +206,9 @@ public:
// byte/half word extract...
virtual void UXTB16(int cc, int Rd, int Rm, int rotate) = 0;
+ // bit manipulation...
+ virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width) = 0;
+
// -----------------------------------------------------------------------
// convenience...
// -----------------------------------------------------------------------
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
index 7c422db..c57d7da 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
@@ -199,5 +199,9 @@ void ARMAssemblerProxy::UXTB16(int cc, int Rd, int Rm, int rotate) {
mTarget->UXTB16(cc, Rd, Rm, rotate);
}
+void ARMAssemblerProxy::UBFX(int cc, int Rd, int Rn, int lsb, int width) {
+ mTarget->UBFX(cc, Rd, Rn, lsb, width);
+}
+
}; // namespace android
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.h b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
index 9134cce..8c7f270 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerProxy.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
@@ -115,6 +115,7 @@ public:
int Rd, int Rm, int Rs, int Rn);
virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
+ virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width);
private:
ARMAssemblerInterface* mTarget;
diff --git a/libpixelflinger/codeflinger/disassem.c b/libpixelflinger/codeflinger/disassem.c
index c17f3ec..aeb8034 100644
--- a/libpixelflinger/codeflinger/disassem.c
+++ b/libpixelflinger/codeflinger/disassem.c
@@ -81,6 +81,8 @@
* g - 2nd fp operand (register) (bits 16-18)
* h - 3rd fp operand (register/immediate) (bits 0-4)
* j - xtb rotate literal (bits 10-11)
+ * i - bfx lsb literal (bits 7-11)
+ * w - bfx width literal (bits 16-20)
* b - branch address
* t - thumb branch address (bits 24, 0-23)
* k - breakpoint comment (bits 0-3, 8-19)
@@ -124,6 +126,7 @@ static const struct arm32_insn arm32_i[] = {
{ 0x0fe000f0, 0x00a00090, "umlal", "Sdnms" },
{ 0x0fe000f0, 0x00e00090, "smlal", "Sdnms" },
{ 0x0fff03f0, 0x06cf0070, "uxtb16", "dmj" },
+ { 0x0fe00070, 0x07e00050, "ubfx", "dmiw" },
{ 0x0d700000, 0x04200000, "strt", "daW" },
{ 0x0d700000, 0x04300000, "ldrt", "daW" },
{ 0x0d700000, 0x04600000, "strbt", "daW" },
@@ -412,6 +415,14 @@ disasm(const disasm_interface_t *di, u_int loc, int altfmt)
case 'j':
di->di_printf("ror #%d", ((insn >> 10) & 3) << 3);
break;
+ /* i - bfx lsb literal (bits 7-11) */
+ case 'i':
+ di->di_printf("#%d", (insn >> 7) & 31);
+ break;
+ /* w - bfx width literal (bits 16-20) */
+ case 'w':
+ di->di_printf("#%d", 1 + ((insn >> 16) & 31));
+ break;
/* b - branch address */
case 'b':
branch = ((insn << 2) & 0x03ffffff);
diff --git a/libpixelflinger/codeflinger/load_store.cpp b/libpixelflinger/codeflinger/load_store.cpp
index 93c5825..ed20a00 100644
--- a/libpixelflinger/codeflinger/load_store.cpp
+++ b/libpixelflinger/codeflinger/load_store.cpp
@@ -18,9 +18,12 @@
#include <assert.h>
#include <stdio.h>
#include <cutils/log.h>
-
#include "codeflinger/GGLAssembler.h"
+#ifdef __ARM_ARCH__
+#include <machine/cpu-features.h>
+#endif
+
namespace android {
// ----------------------------------------------------------------------------
@@ -110,6 +113,20 @@ void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
assert(maskLen<=8);
assert(h);
+#if __ARM_ARCH__ >= 7
+ const int mask = (1<<maskLen)-1;
+ if ((h == bits) && !l && (s != d.reg)) {
+ MOV(AL, 0, d.reg, s); // component = packed;
+ } else if ((h == bits) && l) {
+ MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
+ } else if (!l && isValidImmediate(mask)) {
+ AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
+ } else if (!l && isValidImmediate(~mask)) {
+ BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
+ } else {
+ UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l;
+ }
+#else
if (h != bits) {
const int mask = ((1<<maskLen)-1) << l;
if (isValidImmediate(mask)) {
@@ -132,6 +149,7 @@ void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
if (s != d.reg) {
MOV(AL, 0, d.reg, s);
}
+#endif
d.s = maskLen;
}
diff --git a/libpixelflinger/col32cb16blend.S b/libpixelflinger/col32cb16blend.S
index 1450bde..1831255 100644
--- a/libpixelflinger/col32cb16blend.S
+++ b/libpixelflinger/col32cb16blend.S
@@ -1,20 +1,19 @@
/* libs/pixelflinger/col32cb16blend.S
-**
-** (C) COPYRIGHT 2009 ARM Limited.
-**
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-**
-** http://www.apache.org/licenses/LICENSE-2.0
-**
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-**
-*/
+ *
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
.text
.align
diff --git a/libpixelflinger/col32cb16blend_neon.S b/libpixelflinger/col32cb16blend_neon.S
index 17b0d01..cbd54d1 100644
--- a/libpixelflinger/col32cb16blend_neon.S
+++ b/libpixelflinger/col32cb16blend_neon.S
@@ -1,20 +1,20 @@
/* libs/pixelflinger/col32cb16blend_neon.S
-**
-** (C) COPYRIGHT 2009 ARM Limited.
-**
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-**
-** http://www.apache.org/licenses/LICENSE-2.0
-**
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-**
-*/
+ *
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
.text
.align
diff --git a/libpixelflinger/raster.cpp b/libpixelflinger/raster.cpp
index d751202..32b2a97 100644
--- a/libpixelflinger/raster.cpp
+++ b/libpixelflinger/raster.cpp
@@ -143,7 +143,7 @@ void ggl_copyPixels(void* con, GGLint xs, GGLint ys,
using namespace android;
-GGLint gglBitBlti(GGLContext* con, int tmu, GGLint crop[4], GGLint where[4])
+GGLint gglBitBlit(GGLContext* con, int tmu, GGLint crop[4], GGLint where[4])
{
GGL_CONTEXT(c, (void*)con);
diff --git a/libpixelflinger/scanline.cpp b/libpixelflinger/scanline.cpp
index a2f43eb..8fba147 100644
--- a/libpixelflinger/scanline.cpp
+++ b/libpixelflinger/scanline.cpp
@@ -1,6 +1,6 @@
/* libs/pixelflinger/scanline.cpp
**
-** Copyright 2006, The Android Open Source Project
+** Copyright 2006-2011, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
@@ -57,6 +57,11 @@
#define DEBUG__CODEGEN_ONLY 0
+/* Set to 1 to dump to the log the states that need a new
+ * code-generated scanline callback, i.e. those that don't
+ * have a corresponding shortcut function.
+ */
+#define DEBUG_NEEDS 0
#define ASSEMBLY_SCRATCH_SIZE 2048
@@ -79,8 +84,21 @@ static void scanline(context_t* c);
static void scanline_perspective(context_t* c);
static void scanline_perspective_single(context_t* c);
static void scanline_t32cb16blend(context_t* c);
+static void scanline_t32cb16blend_dither(context_t* c);
+static void scanline_t32cb16blend_srca(context_t* c);
+static void scanline_t32cb16blend_clamp(context_t* c);
+static void scanline_t32cb16blend_clamp_dither(context_t* c);
+static void scanline_t32cb16blend_clamp_mod(context_t* c);
+static void scanline_x32cb16blend_clamp_mod(context_t* c);
+static void scanline_t32cb16blend_clamp_mod_dither(context_t* c);
+static void scanline_x32cb16blend_clamp_mod_dither(context_t* c);
static void scanline_t32cb16(context_t* c);
+static void scanline_t32cb16_dither(context_t* c);
+static void scanline_t32cb16_clamp(context_t* c);
+static void scanline_t32cb16_clamp_dither(context_t* c);
static void scanline_col32cb16blend(context_t* c);
+static void scanline_t16cb16_clamp(context_t* c);
+static void scanline_t16cb16blend_clamp_mod(context_t* c);
static void scanline_memcpy(context_t* c);
static void scanline_memset8(context_t* c);
static void scanline_memset16(context_t* c);
@@ -99,6 +117,13 @@ extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t
// ----------------------------------------------------------------------------
+static inline uint16_t convertAbgr8888ToRgb565(uint32_t pix)
+{
+ return uint16_t( ((pix << 8) & 0xf800) |
+ ((pix >> 5) & 0x07e0) |
+ ((pix >> 19) & 0x001f) );
+}
+
struct shortcut_t {
needs_filter_t filter;
const char* desc;
@@ -107,13 +132,95 @@ struct shortcut_t {
};
// Keep in sync with needs
+
+/* To understand the values here, have a look at:
+ * system/core/include/private/pixelflinger/ggl_context.h
+ *
+ * Especially the lines defining and using GGL_RESERVE_NEEDS
+ *
+ * Quick reminders:
+ * - the last nibble of the first value is the destination buffer format.
+ * - the last nibble of the third value is the source texture format
+ * - formats: 4=rgb565 1=abgr8888 2=xbgr8888
+ *
+ * In the descriptions below:
+ *
+ * SRC means we copy the source pixels to the destination
+ *
+ * SRC_OVER means we blend the source pixels to the destination
+ * with dstFactor = 1-srcA, srcFactor=1 (premultiplied source).
+ * This mode is otherwise called 'blend'.
+ *
+ * SRCA_OVER means we blend the source pixels to the destination
+ * with dstFactor=srcA*(1-srcA) srcFactor=srcA (non-premul source).
+ * This mode is otherwise called 'blend_srca'
+ *
+ * clamp means we fetch source pixels from a texture with u/v clamping
+ *
+ * mod means the source pixels are modulated (multiplied) by the
+ * a/r/g/b of the current context's color. Typically used for
+ * fade-in / fade-out.
+ *
+ * dither means we dither 32 bit values to 16 bits
+ */
static shortcut_t shortcuts[] = {
{ { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } },
{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
- "565 fb, 8888 tx, blend", scanline_t32cb16blend, init_y_noop },
+ "565 fb, 8888 tx, blend SRC_OVER", scanline_t32cb16blend, init_y_noop },
{ { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } },
{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
- "565 fb, 8888 tx", scanline_t32cb16, init_y_noop },
+ "565 fb, 8888 tx, SRC", scanline_t32cb16, init_y_noop },
+ /* same as first entry, but with dithering */
+ { { { 0x03515104, 0x00000177, { 0x00000A01, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 8888 tx, blend SRC_OVER dither", scanline_t32cb16blend_dither, init_y_noop },
+ /* same as second entry, but with dithering */
+ { { { 0x03010104, 0x00000177, { 0x00000A01, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 8888 tx, SRC dither", scanline_t32cb16_dither, init_y_noop },
+ /* this is used during the boot animation - CHEAT: ignore dithering */
+ { { { 0x03545404, 0x00000077, { 0x00000A01, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFEFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 8888 tx, blend dst:ONE_MINUS_SRCA src:SRCA", scanline_t32cb16blend_srca, init_y_noop },
+ /* special case for arbitrary texture coordinates (think scaling) */
+ { { { 0x03515104, 0x00000077, { 0x00000001, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 8888 tx, SRC_OVER clamp", scanline_t32cb16blend_clamp, init_y },
+ { { { 0x03515104, 0x00000177, { 0x00000001, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 8888 tx, SRC_OVER clamp dither", scanline_t32cb16blend_clamp_dither, init_y },
+ /* another case used during emulation */
+ { { { 0x03515104, 0x00000077, { 0x00001001, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 8888 tx, SRC_OVER clamp modulate", scanline_t32cb16blend_clamp_mod, init_y },
+ /* and this */
+ { { { 0x03515104, 0x00000077, { 0x00001002, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, x888 tx, SRC_OVER clamp modulate", scanline_x32cb16blend_clamp_mod, init_y },
+ { { { 0x03515104, 0x00000177, { 0x00001001, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 8888 tx, SRC_OVER clamp modulate dither", scanline_t32cb16blend_clamp_mod_dither, init_y },
+ { { { 0x03515104, 0x00000177, { 0x00001002, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, x888 tx, SRC_OVER clamp modulate dither", scanline_x32cb16blend_clamp_mod_dither, init_y },
+ { { { 0x03010104, 0x00000077, { 0x00000001, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 8888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
+ { { { 0x03010104, 0x00000077, { 0x00000002, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, x888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
+ { { { 0x03010104, 0x00000177, { 0x00000001, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 8888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
+ { { { 0x03010104, 0x00000177, { 0x00000002, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, x888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
+ { { { 0x03010104, 0x00000077, { 0x00000004, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 565 tx, SRC clamp", scanline_t16cb16_clamp, init_y },
+ { { { 0x03515104, 0x00000077, { 0x00001004, 0x00000000 } },
+ { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
+ "565 fb, 565 tx, SRC_OVER clamp", scanline_t16cb16blend_clamp_mod, init_y },
{ { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } },
{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } },
"565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed },
@@ -243,6 +350,12 @@ static void pick_scanline(context_t* c)
}
}
+#ifdef DEBUG_NEEDS
+ LOGI("Needs: n=0x%08x p=0x%08x t0=0x%08x t1=0x%08x",
+ c->state.needs.n, c->state.needs.p,
+ c->state.needs.t[0], c->state.needs.t[1]);
+#endif
+
#endif // DEBUG__CODEGEN_ONLY
c->init_y = init_y;
@@ -797,6 +910,678 @@ discard:
#pragma mark Scanline
#endif
+/* Used to parse a 32-bit source texture linearly. Usage is:
+ *
+ * horz_iterator32 hi(context);
+ * while (...) {
+ * uint32_t src_pixel = hi.get_pixel32();
+ * ...
+ * }
+ *
+ * Use only for one-to-one texture mapping.
+ */
+struct horz_iterator32 {
+ horz_iterator32(context_t* c) {
+ const int x = c->iterators.xl;
+ const int y = c->iterators.y;
+ texture_t& tx = c->state.texture[0];
+ const int32_t u = (tx.shade.is0>>16) + x;
+ const int32_t v = (tx.shade.it0>>16) + y;
+ m_src = reinterpret_cast<uint32_t*>(tx.surface.data)+(u+(tx.surface.stride*v));
+ }
+ uint32_t get_pixel32() {
+ return *m_src++;
+ }
+protected:
+ uint32_t* m_src;
+};
+
+/* A variant for 16-bit source textures. */
+struct horz_iterator16 {
+ horz_iterator16(context_t* c) {
+ const int x = c->iterators.xl;
+ const int y = c->iterators.y;
+ texture_t& tx = c->state.texture[0];
+ const int32_t u = (tx.shade.is0>>16) + x;
+ const int32_t v = (tx.shade.it0>>16) + y;
+ m_src = reinterpret_cast<uint16_t*>(tx.surface.data)+(u+(tx.surface.stride*v));
+ }
+ uint16_t get_pixel16() {
+ return *m_src++;
+ }
+protected:
+ uint16_t* m_src;
+};
+
+/* A clamp iterator is used to iterate inside a texture with GGL_CLAMP.
+ * After initialization, call get_src16() or get_src32() to get the current
+ * texture pixel value.
+ */
+struct clamp_iterator {
+ clamp_iterator(context_t* c) {
+ const int xs = c->iterators.xl;
+ texture_t& tx = c->state.texture[0];
+ texture_iterators_t& ti = tx.iterators;
+ m_s = (xs * ti.dsdx) + ti.ydsdy;
+ m_t = (xs * ti.dtdx) + ti.ydtdy;
+ m_ds = ti.dsdx;
+ m_dt = ti.dtdx;
+ m_width_m1 = tx.surface.width - 1;
+ m_height_m1 = tx.surface.height - 1;
+ m_data = tx.surface.data;
+ m_stride = tx.surface.stride;
+ }
+ uint16_t get_pixel16() {
+ int u, v;
+ get_uv(u, v);
+ uint16_t* src = reinterpret_cast<uint16_t*>(m_data) + (u + (m_stride*v));
+ return src[0];
+ }
+ uint32_t get_pixel32() {
+ int u, v;
+ get_uv(u, v);
+ uint32_t* src = reinterpret_cast<uint32_t*>(m_data) + (u + (m_stride*v));
+ return src[0];
+ }
+private:
+ void get_uv(int& u, int& v) {
+ int uu = m_s >> 16;
+ int vv = m_t >> 16;
+ if (uu < 0)
+ uu = 0;
+ if (uu > m_width_m1)
+ uu = m_width_m1;
+ if (vv < 0)
+ vv = 0;
+ if (vv > m_height_m1)
+ vv = m_height_m1;
+ u = uu;
+ v = vv;
+ m_s += m_ds;
+ m_t += m_dt;
+ }
+
+ GGLfixed m_s, m_t;
+ GGLfixed m_ds, m_dt;
+ int m_width_m1, m_height_m1;
+ uint8_t* m_data;
+ int m_stride;
+};
+
+/*
+ * The 'horizontal clamp iterator' variant corresponds to the case where
+ * the 'v' coordinate doesn't change. This is useful to avoid one mult and
+ * extra adds / checks per pixels, if the blending/processing operation after
+ * this is very fast.
+ */
+static int is_context_horizontal(const context_t* c) {
+ return (c->state.texture[0].iterators.dtdx == 0);
+}
+
+struct horz_clamp_iterator {
+ uint16_t get_pixel16() {
+ int u = m_s >> 16;
+ m_s += m_ds;
+ if (u < 0)
+ u = 0;
+ if (u > m_width_m1)
+ u = m_width_m1;
+ const uint16_t* src = reinterpret_cast<const uint16_t*>(m_data);
+ return src[u];
+ }
+ uint32_t get_pixel32() {
+ int u = m_s >> 16;
+ m_s += m_ds;
+ if (u < 0)
+ u = 0;
+ if (u > m_width_m1)
+ u = m_width_m1;
+ const uint32_t* src = reinterpret_cast<const uint32_t*>(m_data);
+ return src[u];
+ }
+protected:
+ void init(const context_t* c, int shift);
+ GGLfixed m_s;
+ GGLfixed m_ds;
+ int m_width_m1;
+ const uint8_t* m_data;
+};
+
+void horz_clamp_iterator::init(const context_t* c, int shift)
+{
+ const int xs = c->iterators.xl;
+ const texture_t& tx = c->state.texture[0];
+ const texture_iterators_t& ti = tx.iterators;
+ m_s = (xs * ti.dsdx) + ti.ydsdy;
+ m_ds = ti.dsdx;
+ m_width_m1 = tx.surface.width-1;
+ m_data = tx.surface.data;
+
+ GGLfixed t = (xs * ti.dtdx) + ti.ydtdy;
+ int v = t >> 16;
+ if (v < 0)
+ v = 0;
+ else if (v >= (int)tx.surface.height)
+ v = (int)tx.surface.height-1;
+
+ m_data += (tx.surface.stride*v) << shift;
+}
+
+struct horz_clamp_iterator16 : horz_clamp_iterator {
+ horz_clamp_iterator16(const context_t* c) {
+ init(c,1);
+ };
+};
+
+struct horz_clamp_iterator32 : horz_clamp_iterator {
+ horz_clamp_iterator32(context_t* c) {
+ init(c,2);
+ };
+};
+
+/* This is used to perform dithering operations.
+ */
+struct ditherer {
+ ditherer(const context_t* c) {
+ const int x = c->iterators.xl;
+ const int y = c->iterators.y;
+ m_line = &c->ditherMatrix[ ((y & GGL_DITHER_MASK)<<GGL_DITHER_ORDER_SHIFT) ];
+ m_index = x & GGL_DITHER_MASK;
+ }
+ void step(void) {
+ m_index++;
+ }
+ int get_value(void) {
+ int ret = m_line[m_index & GGL_DITHER_MASK];
+ m_index++;
+ return ret;
+ }
+ uint16_t abgr8888ToRgb565(uint32_t s) {
+ uint32_t r = s & 0xff;
+ uint32_t g = (s >> 8) & 0xff;
+ uint32_t b = (s >> 16) & 0xff;
+ return rgb888ToRgb565(r,g,b);
+ }
+ /* The following assumes that r/g/b are in the 0..255 range each */
+ uint16_t rgb888ToRgb565(uint32_t& r, uint32_t& g, uint32_t &b) {
+ int threshold = get_value();
+ /* dither in on GGL_DITHER_BITS, and each of r, g, b is on 8 bits */
+ r += (threshold >> (GGL_DITHER_BITS-8 +5));
+ g += (threshold >> (GGL_DITHER_BITS-8 +6));
+ b += (threshold >> (GGL_DITHER_BITS-8 +5));
+ if (r > 0xff)
+ r = 0xff;
+ if (g > 0xff)
+ g = 0xff;
+ if (b > 0xff)
+ b = 0xff;
+ return uint16_t(((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3));
+ }
+protected:
+ const uint8_t* m_line;
+ int m_index;
+};
+
+/* This structure is used to blend (SRC_OVER) 32-bit source pixels
+ * onto 16-bit destination ones. Usage is simply:
+ *
+ * blender.blend(<32-bit-src-pixel-value>,<ptr-to-16-bit-dest-pixel>)
+ */
+struct blender_32to16 {
+ blender_32to16(context_t* c) { }
+ void write(uint32_t s, uint16_t* dst) {
+ if (s == 0)
+ return;
+ s = GGL_RGBA_TO_HOST(s);
+ int sA = (s>>24);
+ if (sA == 0xff) {
+ *dst = convertAbgr8888ToRgb565(s);
+ } else {
+ int f = 0x100 - (sA + (sA>>7));
+ int sR = (s >> ( 3))&0x1F;
+ int sG = (s >> ( 8+2))&0x3F;
+ int sB = (s >> (16+3))&0x1F;
+ uint16_t d = *dst;
+ int dR = (d>>11)&0x1f;
+ int dG = (d>>5)&0x3f;
+ int dB = (d)&0x1f;
+ sR += (f*dR)>>8;
+ sG += (f*dG)>>8;
+ sB += (f*dB)>>8;
+ *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+ }
+ }
+ void write(uint32_t s, uint16_t* dst, ditherer& di) {
+ if (s == 0) {
+ di.step();
+ return;
+ }
+ s = GGL_RGBA_TO_HOST(s);
+ int sA = (s>>24);
+ if (sA == 0xff) {
+ *dst = di.abgr8888ToRgb565(s);
+ } else {
+ int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
+ int f = 0x100 - (sA + (sA>>7));
+ int sR = (s >> ( 3))&0x1F;
+ int sG = (s >> ( 8+2))&0x3F;
+ int sB = (s >> (16+3))&0x1F;
+ uint16_t d = *dst;
+ int dR = (d>>11)&0x1f;
+ int dG = (d>>5)&0x3f;
+ int dB = (d)&0x1f;
+ sR = ((sR << 8) + f*dR + threshold)>>8;
+ sG = ((sG << 8) + f*dG + threshold)>>8;
+ sB = ((sB << 8) + f*dB + threshold)>>8;
+ if (sR > 0x1f) sR = 0x1f;
+ if (sG > 0x3f) sG = 0x3f;
+ if (sB > 0x1f) sB = 0x1f;
+ *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+ }
+ }
+};
+
+/* This blender does the same for the 'blend_srca' operation.
+ * where dstFactor=srcA*(1-srcA) srcFactor=srcA
+ */
+struct blender_32to16_srcA {
+ blender_32to16_srcA(const context_t* c) { }
+ void write(uint32_t s, uint16_t* dst) {
+ if (!s) {
+ return;
+ }
+ uint16_t d = *dst;
+ s = GGL_RGBA_TO_HOST(s);
+ int sR = (s >> ( 3))&0x1F;
+ int sG = (s >> ( 8+2))&0x3F;
+ int sB = (s >> (16+3))&0x1F;
+ int sA = (s>>24);
+ int f1 = (sA + (sA>>7));
+ int f2 = 0x100-f1;
+ int dR = (d>>11)&0x1f;
+ int dG = (d>>5)&0x3f;
+ int dB = (d)&0x1f;
+ sR = (f1*sR + f2*dR)>>8;
+ sG = (f1*sG + f2*dG)>>8;
+ sB = (f1*sB + f2*dB)>>8;
+ *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+ }
+};
+
+/* Common init code the modulating blenders */
+struct blender_modulate {
+ void init(const context_t* c) {
+ const int r = c->iterators.ydrdy >> (GGL_COLOR_BITS-8);
+ const int g = c->iterators.ydgdy >> (GGL_COLOR_BITS-8);
+ const int b = c->iterators.ydbdy >> (GGL_COLOR_BITS-8);
+ const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
+ m_r = r + (r >> 7);
+ m_g = g + (g >> 7);
+ m_b = b + (b >> 7);
+ m_a = a + (a >> 7);
+ }
+protected:
+ int m_r, m_g, m_b, m_a;
+};
+
+/* This blender does a normal blend after modulation.
+ */
+struct blender_32to16_modulate : blender_modulate {
+ blender_32to16_modulate(const context_t* c) {
+ init(c);
+ }
+ void write(uint32_t s, uint16_t* dst) {
+ // blend source and destination
+ if (!s) {
+ return;
+ }
+ s = GGL_RGBA_TO_HOST(s);
+
+ /* We need to modulate s */
+ uint32_t sA = (s >> 24);
+ uint32_t sB = (s >> 16) & 0xff;
+ uint32_t sG = (s >> 8) & 0xff;
+ uint32_t sR = s & 0xff;
+
+ sA = (sA*m_a) >> 8;
+ /* Keep R/G/B scaled to 5.8 or 6.8 fixed float format */
+ sR = (sR*m_r) >> (8 - 5);
+ sG = (sG*m_g) >> (8 - 6);
+ sB = (sB*m_b) >> (8 - 5);
+
+ /* Now do a normal blend */
+ int f = 0x100 - (sA + (sA>>7));
+ uint16_t d = *dst;
+ int dR = (d>>11)&0x1f;
+ int dG = (d>>5)&0x3f;
+ int dB = (d)&0x1f;
+ sR = (sR + f*dR)>>8;
+ sG = (sG + f*dG)>>8;
+ sB = (sB + f*dB)>>8;
+ *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+ }
+ void write(uint32_t s, uint16_t* dst, ditherer& di) {
+ // blend source and destination
+ if (!s) {
+ di.step();
+ return;
+ }
+ s = GGL_RGBA_TO_HOST(s);
+
+ /* We need to modulate s */
+ uint32_t sA = (s >> 24);
+ uint32_t sB = (s >> 16) & 0xff;
+ uint32_t sG = (s >> 8) & 0xff;
+ uint32_t sR = s & 0xff;
+
+ sA = (sA*m_a) >> 8;
+ /* keep R/G/B scaled to 5.8 or 6.8 fixed float format */
+ sR = (sR*m_r) >> (8 - 5);
+ sG = (sG*m_g) >> (8 - 6);
+ sB = (sB*m_b) >> (8 - 5);
+
+ /* Scale threshold to 0.8 fixed float format */
+ int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
+ int f = 0x100 - (sA + (sA>>7));
+ uint16_t d = *dst;
+ int dR = (d>>11)&0x1f;
+ int dG = (d>>5)&0x3f;
+ int dB = (d)&0x1f;
+ sR = (sR + f*dR + threshold)>>8;
+ sG = (sG + f*dG + threshold)>>8;
+ sB = (sB + f*dB + threshold)>>8;
+ if (sR > 0x1f) sR = 0x1f;
+ if (sG > 0x3f) sG = 0x3f;
+ if (sB > 0x1f) sB = 0x1f;
+ *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+ }
+};
+
+/* same as 32to16_modulate, except that the input is xRGB, instead of ARGB */
+struct blender_x32to16_modulate : blender_modulate {
+ blender_x32to16_modulate(const context_t* c) {
+ init(c);
+ }
+ void write(uint32_t s, uint16_t* dst) {
+ s = GGL_RGBA_TO_HOST(s);
+
+ uint32_t sB = (s >> 16) & 0xff;
+ uint32_t sG = (s >> 8) & 0xff;
+ uint32_t sR = s & 0xff;
+
+ /* Keep R/G/B in 5.8 or 6.8 format */
+ sR = (sR*m_r) >> (8 - 5);
+ sG = (sG*m_g) >> (8 - 6);
+ sB = (sB*m_b) >> (8 - 5);
+
+ int f = 0x100 - m_a;
+ uint16_t d = *dst;
+ int dR = (d>>11)&0x1f;
+ int dG = (d>>5)&0x3f;
+ int dB = (d)&0x1f;
+ sR = (sR + f*dR)>>8;
+ sG = (sG + f*dG)>>8;
+ sB = (sB + f*dB)>>8;
+ *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+ }
+ void write(uint32_t s, uint16_t* dst, ditherer& di) {
+ s = GGL_RGBA_TO_HOST(s);
+
+ uint32_t sB = (s >> 16) & 0xff;
+ uint32_t sG = (s >> 8) & 0xff;
+ uint32_t sR = s & 0xff;
+
+ sR = (sR*m_r) >> (8 - 5);
+ sG = (sG*m_g) >> (8 - 6);
+ sB = (sB*m_b) >> (8 - 5);
+
+ /* Now do a normal blend */
+ int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
+ int f = 0x100 - m_a;
+ uint16_t d = *dst;
+ int dR = (d>>11)&0x1f;
+ int dG = (d>>5)&0x3f;
+ int dB = (d)&0x1f;
+ sR = (sR + f*dR + threshold)>>8;
+ sG = (sG + f*dG + threshold)>>8;
+ sB = (sB + f*dB + threshold)>>8;
+ if (sR > 0x1f) sR = 0x1f;
+ if (sG > 0x3f) sG = 0x3f;
+ if (sB > 0x1f) sB = 0x1f;
+ *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+ }
+};
+
+/* Same as above, but source is 16bit rgb565 */
+struct blender_16to16_modulate : blender_modulate {
+ blender_16to16_modulate(const context_t* c) {
+ init(c);
+ }
+ void write(uint16_t s16, uint16_t* dst) {
+ uint32_t s = s16;
+
+ uint32_t sR = s >> 11;
+ uint32_t sG = (s >> 5) & 0x3f;
+ uint32_t sB = s & 0x1f;
+
+ sR = (sR*m_r);
+ sG = (sG*m_g);
+ sB = (sB*m_b);
+
+ int f = 0x100 - m_a;
+ uint16_t d = *dst;
+ int dR = (d>>11)&0x1f;
+ int dG = (d>>5)&0x3f;
+ int dB = (d)&0x1f;
+ sR = (sR + f*dR)>>8;
+ sG = (sG + f*dG)>>8;
+ sB = (sB + f*dB)>>8;
+ *dst = uint16_t((sR<<11)|(sG<<5)|sB);
+ }
+};
+
+/* This is used to iterate over a 16-bit destination color buffer.
+ * Usage is:
+ *
+ * dst_iterator16 di(context);
+ * while (di.count--) {
+ * <do stuff with dest pixel at di.dst>
+ * di.dst++;
+ * }
+ */
+struct dst_iterator16 {
+ dst_iterator16(const context_t* c) {
+ const int x = c->iterators.xl;
+ const int width = c->iterators.xr - x;
+ const int32_t y = c->iterators.y;
+ const surface_t* cb = &(c->state.buffers.color);
+ count = width;
+ dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
+ }
+ int count;
+ uint16_t* dst;
+};
+
+
+static void scanline_t32cb16_clamp(context_t* c)
+{
+ dst_iterator16 di(c);
+
+ if (is_context_horizontal(c)) {
+ /* Special case for simple horizontal scaling */
+ horz_clamp_iterator32 ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ *di.dst++ = convertAbgr8888ToRgb565(s);
+ }
+ } else {
+ /* General case */
+ clamp_iterator ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ *di.dst++ = convertAbgr8888ToRgb565(s);
+ }
+ }
+}
+
+static void scanline_t32cb16_dither(context_t* c)
+{
+ horz_iterator32 si(c);
+ dst_iterator16 di(c);
+ ditherer dither(c);
+
+ while (di.count--) {
+ uint32_t s = si.get_pixel32();
+ *di.dst++ = dither.abgr8888ToRgb565(s);
+ }
+}
+
+static void scanline_t32cb16_clamp_dither(context_t* c)
+{
+ dst_iterator16 di(c);
+ ditherer dither(c);
+
+ if (is_context_horizontal(c)) {
+ /* Special case for simple horizontal scaling */
+ horz_clamp_iterator32 ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ *di.dst++ = dither.abgr8888ToRgb565(s);
+ }
+ } else {
+ /* General case */
+ clamp_iterator ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ *di.dst++ = dither.abgr8888ToRgb565(s);
+ }
+ }
+}
+
+static void scanline_t32cb16blend_dither(context_t* c)
+{
+ dst_iterator16 di(c);
+ ditherer dither(c);
+ blender_32to16 bl(c);
+ horz_iterator32 hi(c);
+ while (di.count--) {
+ uint32_t s = hi.get_pixel32();
+ bl.write(s, di.dst, dither);
+ di.dst++;
+ }
+}
+
+static void scanline_t32cb16blend_clamp(context_t* c)
+{
+ dst_iterator16 di(c);
+ blender_32to16 bl(c);
+
+ if (is_context_horizontal(c)) {
+ horz_clamp_iterator32 ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ bl.write(s, di.dst);
+ di.dst++;
+ }
+ } else {
+ clamp_iterator ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ bl.write(s, di.dst);
+ di.dst++;
+ }
+ }
+}
+
+static void scanline_t32cb16blend_clamp_dither(context_t* c)
+{
+ dst_iterator16 di(c);
+ ditherer dither(c);
+ blender_32to16 bl(c);
+
+ clamp_iterator ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ bl.write(s, di.dst, dither);
+ di.dst++;
+ }
+}
+
+void scanline_t32cb16blend_clamp_mod(context_t* c)
+{
+ dst_iterator16 di(c);
+ blender_32to16_modulate bl(c);
+
+ clamp_iterator ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ bl.write(s, di.dst);
+ di.dst++;
+ }
+}
+
+void scanline_t32cb16blend_clamp_mod_dither(context_t* c)
+{
+ dst_iterator16 di(c);
+ blender_32to16_modulate bl(c);
+ ditherer dither(c);
+
+ clamp_iterator ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ bl.write(s, di.dst, dither);
+ di.dst++;
+ }
+}
+
+/* Variant of scanline_t32cb16blend_clamp_mod with a xRGB texture */
+void scanline_x32cb16blend_clamp_mod(context_t* c)
+{
+ dst_iterator16 di(c);
+ blender_x32to16_modulate bl(c);
+
+ clamp_iterator ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ bl.write(s, di.dst);
+ di.dst++;
+ }
+}
+
+void scanline_x32cb16blend_clamp_mod_dither(context_t* c)
+{
+ dst_iterator16 di(c);
+ blender_x32to16_modulate bl(c);
+ ditherer dither(c);
+
+ clamp_iterator ci(c);
+ while (di.count--) {
+ uint32_t s = ci.get_pixel32();
+ bl.write(s, di.dst, dither);
+ di.dst++;
+ }
+}
+
+void scanline_t16cb16_clamp(context_t* c)
+{
+ dst_iterator16 di(c);
+
+ /* Special case for simple horizontal scaling */
+ if (is_context_horizontal(c)) {
+ horz_clamp_iterator16 ci(c);
+ while (di.count--) {
+ *di.dst++ = ci.get_pixel16();
+ }
+ } else {
+ clamp_iterator ci(c);
+ while (di.count--) {
+ *di.dst++ = ci.get_pixel16();
+ }
+ }
+}
+
+
+
template <typename T, typename U>
static inline __attribute__((const))
T interpolate(int y, T v0, U dvdx, U dvdy) {
@@ -1322,30 +2107,24 @@ void scanline_t32cb16(context_t* c)
if (ct==1 || uint32_t(dst)&2) {
last_one:
s = GGL_RGBA_TO_HOST( *src++ );
- sR = (s >> ( 3))&0x1F;
- sG = (s >> ( 8+2))&0x3F;
- sB = (s >> (16+3))&0x1F;
- *dst++ = uint16_t((sR<<11)|(sG<<5)|sB);
+ *dst++ = convertAbgr8888ToRgb565(s);
ct--;
}
while (ct >= 2) {
+#if BYTE_ORDER == BIG_ENDIAN
s = GGL_RGBA_TO_HOST( *src++ );
- sR = (s >> ( 3))&0x1F;
- sG = (s >> ( 8+2))&0x3F;
- sB = (s >> (16+3))&0x1F;
- d = (sR<<11)|(sG<<5)|sB;
-
+ d = convertAbgr8888ToRgb565_hi16(s);
+
+ s = GGL_RGBA_TO_HOST( *src++ );
+ d |= convertAbgr8888ToRgb565(s);
+#else
s = GGL_RGBA_TO_HOST( *src++ );
- sR = (s >> ( 3))&0x1F;
- sG = (s >> ( 8+2))&0x3F;
- sB = (s >> (16+3))&0x1F;
- d |= ((sR<<11)|(sG<<5)|sB)<<16;
+ d = convertAbgr8888ToRgb565(s);
-#if BYTE_ORDER == BIG_ENDIAN
- d = (d>>16) | (d<<16);
+ s = GGL_RGBA_TO_HOST( *src++ );
+ d |= convertAbgr8888ToRgb565(s) << 16;
#endif
-
*dst32++ = d;
ct -= 2;
}
@@ -1357,6 +2136,7 @@ last_one:
void scanline_t32cb16blend(context_t* c)
{
+#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
int32_t x = c->iterators.xl;
size_t ct = c->iterators.xr - x;
int32_t y = c->iterators.y;
@@ -1368,33 +2148,55 @@ void scanline_t32cb16blend(context_t* c)
const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v));
-#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
scanline_t32cb16blend_arm(dst, src, ct);
#else
- while (ct--) {
- uint32_t s = *src++;
- if (!s) {
- dst++;
- continue;
- }
- uint16_t d = *dst;
- s = GGL_RGBA_TO_HOST(s);
- int sR = (s >> ( 3))&0x1F;
- int sG = (s >> ( 8+2))&0x3F;
- int sB = (s >> (16+3))&0x1F;
- int sA = (s>>24);
- int f = 0x100 - (sA + (sA>>7));
- int dR = (d>>11)&0x1f;
- int dG = (d>>5)&0x3f;
- int dB = (d)&0x1f;
- sR += (f*dR)>>8;
- sG += (f*dG)>>8;
- sB += (f*dB)>>8;
- *dst++ = uint16_t((sR<<11)|(sG<<5)|sB);
+ dst_iterator16 di(c);
+ horz_iterator32 hi(c);
+ blender_32to16 bl(c);
+ while (di.count--) {
+ uint32_t s = hi.get_pixel32();
+ bl.write(s, di.dst);
+ di.dst++;
}
#endif
}
+void scanline_t32cb16blend_srca(context_t* c)
+{
+ dst_iterator16 di(c);
+ horz_iterator32 hi(c);
+ blender_32to16_srcA blender(c);
+
+ while (di.count--) {
+ uint32_t s = hi.get_pixel32();
+ blender.write(s,di.dst);
+ di.dst++;
+ }
+}
+
+void scanline_t16cb16blend_clamp_mod(context_t* c)
+{
+ const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
+ if (a == 0) {
+ return;
+ }
+
+ if (a == 255) {
+ scanline_t16cb16_clamp(c);
+ return;
+ }
+
+ dst_iterator16 di(c);
+ blender_16to16_modulate blender(c);
+ clamp_iterator ci(c);
+
+ while (di.count--) {
+ uint16_t s = ci.get_pixel16();
+ blender.write(s, di.dst);
+ di.dst++;
+ }
+}
+
void scanline_memcpy(context_t* c)
{
int32_t x = c->iterators.xl;
@@ -1518,26 +2320,3 @@ void rect_memcpy(context_t* c, size_t yc)
// ----------------------------------------------------------------------------
}; // namespace android
-using namespace android;
-extern "C" void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1)
-{
-#if ANDROID_ARM_CODEGEN
- GGLContext* c;
- gglInit(&c);
- needs_t needs;
- needs.n = n;
- needs.p = p;
- needs.t[0] = t0;
- needs.t[1] = t1;
- sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE));
- GGLAssembler assembler( new ARMAssembler(a) );
- int err = assembler.scanline(needs, (context_t*)c);
- if (err != 0) {
- printf("error %08x (%s)\n", err, strerror(-err));
- }
- gglUninit(c);
-#else
- printf("This test runs only on ARM\n");
-#endif
-}
-
diff --git a/libpixelflinger/tests/codegen/Android.mk b/libpixelflinger/tests/codegen/Android.mk
index 1bc4214..aa320fc 100644
--- a/libpixelflinger/tests/codegen/Android.mk
+++ b/libpixelflinger/tests/codegen/Android.mk
@@ -2,12 +2,15 @@ LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
LOCAL_SRC_FILES:= \
- codegen.cpp
+ codegen.cpp.arm
LOCAL_SHARED_LIBRARIES := \
libcutils \
libpixelflinger
+LOCAL_C_INCLUDES := \
+ system/core/libpixelflinger
+
LOCAL_MODULE:= test-opengl-codegen
LOCAL_MODULE_TAGS := tests
diff --git a/libpixelflinger/tests/codegen/codegen.cpp b/libpixelflinger/tests/codegen/codegen.cpp
index 1865888..94e2481 100644
--- a/libpixelflinger/tests/codegen/codegen.cpp
+++ b/libpixelflinger/tests/codegen/codegen.cpp
@@ -1,9 +1,54 @@
#include <stdio.h>
#include <stdint.h>
-extern "C" void ggl_test_codegen(
- uint32_t n, uint32_t p, uint32_t t0, uint32_t t1);
+#include "private/pixelflinger/ggl_context.h"
+#include "buffer.h"
+#include "scanline.h"
+
+#include "codeflinger/CodeCache.h"
+#include "codeflinger/GGLAssembler.h"
+#include "codeflinger/ARMAssembler.h"
+
+#if defined(__arm__)
+# define ANDROID_ARM_CODEGEN 1
+#else
+# define ANDROID_ARM_CODEGEN 0
+#endif
+
+#define ASSEMBLY_SCRATCH_SIZE 2048
+
+using namespace android;
+
+class ScanlineAssembly : public Assembly {
+ AssemblyKey<needs_t> mKey;
+public:
+ ScanlineAssembly(needs_t needs, size_t size)
+ : Assembly(size), mKey(needs) { }
+ const AssemblyKey<needs_t>& key() const { return mKey; }
+};
+
+static void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1)
+{
+#if ANDROID_ARM_CODEGEN
+ GGLContext* c;
+ gglInit(&c);
+ needs_t needs;
+ needs.n = n;
+ needs.p = p;
+ needs.t[0] = t0;
+ needs.t[1] = t1;
+ sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE));
+ GGLAssembler assembler( new ARMAssembler(a) );
+ int err = assembler.scanline(needs, (context_t*)c);
+ if (err != 0) {
+ printf("error %08x (%s)\n", err, strerror(-err));
+ }
+ gglUninit(c);
+#else
+ printf("This test runs only on ARM\n");
+#endif
+}
int main(int argc, char** argv)
{