diff options
Diffstat (limited to 'libpixelflinger/include')
-rw-r--r-- | libpixelflinger/include/pixelflinger/format.h | 136 | ||||
-rw-r--r-- | libpixelflinger/include/pixelflinger/pixelflinger.h | 330 | ||||
-rw-r--r-- | libpixelflinger/include/private/pixelflinger/ggl_context.h | 565 | ||||
-rw-r--r-- | libpixelflinger/include/private/pixelflinger/ggl_fixed.h | 633 |
4 files changed, 1664 insertions, 0 deletions
diff --git a/libpixelflinger/include/pixelflinger/format.h b/libpixelflinger/include/pixelflinger/format.h new file mode 100644 index 0000000..82eeca4 --- /dev/null +++ b/libpixelflinger/include/pixelflinger/format.h @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2005 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_PIXELFLINGER_FORMAT_H +#define ANDROID_PIXELFLINGER_FORMAT_H + +#include <stdint.h> +#include <sys/types.h> + +enum GGLPixelFormat { + // these constants need to match those + // in graphics/PixelFormat.java, ui/PixelFormat.h, BlitHardware.h + GGL_PIXEL_FORMAT_UNKNOWN = 0, + GGL_PIXEL_FORMAT_NONE = 0, + + GGL_PIXEL_FORMAT_RGBA_8888 = 1, // 4x8-bit ARGB + GGL_PIXEL_FORMAT_RGBX_8888 = 2, // 3x8-bit RGB stored in 32-bit chunks + GGL_PIXEL_FORMAT_RGB_888 = 3, // 3x8-bit RGB + GGL_PIXEL_FORMAT_RGB_565 = 4, // 16-bit RGB + GGL_PIXEL_FORMAT_BGRA_8888 = 5, // 4x8-bit BGRA + GGL_PIXEL_FORMAT_RGBA_5551 = 6, // 16-bit RGBA + GGL_PIXEL_FORMAT_RGBA_4444 = 7, // 16-bit RGBA + + GGL_PIXEL_FORMAT_A_8 = 8, // 8-bit A + GGL_PIXEL_FORMAT_L_8 = 9, // 8-bit L (R=G=B = L) + GGL_PIXEL_FORMAT_LA_88 = 0xA, // 16-bit LA + GGL_PIXEL_FORMAT_RGB_332 = 0xB, // 8-bit RGB (non paletted) + + // reserved range. don't use. + GGL_PIXEL_FORMAT_RESERVED_10 = 0x10, + GGL_PIXEL_FORMAT_RESERVED_11 = 0x11, + GGL_PIXEL_FORMAT_RESERVED_12 = 0x12, + GGL_PIXEL_FORMAT_RESERVED_13 = 0x13, + GGL_PIXEL_FORMAT_RESERVED_14 = 0x14, + GGL_PIXEL_FORMAT_RESERVED_15 = 0x15, + GGL_PIXEL_FORMAT_RESERVED_16 = 0x16, + GGL_PIXEL_FORMAT_RESERVED_17 = 0x17, + + // reserved/special formats + GGL_PIXEL_FORMAT_Z_16 = 0x18, + GGL_PIXEL_FORMAT_S_8 = 0x19, + GGL_PIXEL_FORMAT_SZ_24 = 0x1A, + GGL_PIXEL_FORMAT_SZ_8 = 0x1B, + + // reserved range. don't use. + GGL_PIXEL_FORMAT_RESERVED_20 = 0x20, + GGL_PIXEL_FORMAT_RESERVED_21 = 0x21, +}; + +enum GGLFormatComponents { + GGL_STENCIL_INDEX = 0x1901, + GGL_DEPTH_COMPONENT = 0x1902, + GGL_ALPHA = 0x1906, + GGL_RGB = 0x1907, + GGL_RGBA = 0x1908, + GGL_LUMINANCE = 0x1909, + GGL_LUMINANCE_ALPHA = 0x190A, +}; + +enum GGLFormatComponentIndex { + GGL_INDEX_ALPHA = 0, + GGL_INDEX_RED = 1, + GGL_INDEX_GREEN = 2, + GGL_INDEX_BLUE = 3, + GGL_INDEX_STENCIL = 0, + GGL_INDEX_DEPTH = 1, + GGL_INDEX_Y = 0, + GGL_INDEX_CB = 1, + GGL_INDEX_CR = 2, +}; + +typedef struct { +#ifdef __cplusplus + enum { + ALPHA = GGL_INDEX_ALPHA, + RED = GGL_INDEX_RED, + GREEN = GGL_INDEX_GREEN, + BLUE = GGL_INDEX_BLUE, + STENCIL = GGL_INDEX_STENCIL, + DEPTH = GGL_INDEX_DEPTH, + LUMA = GGL_INDEX_Y, + CHROMAB = GGL_INDEX_CB, + CHROMAR = GGL_INDEX_CR, + }; + inline uint32_t mask(int i) const { + return ((1<<(c[i].h-c[i].l))-1)<<c[i].l; + } + inline uint32_t bits(int i) const { + return c[i].h - c[i].l; + } +#endif + uint8_t size; // bytes per pixel + uint8_t bitsPerPixel; + union { + struct { + uint8_t ah; // alpha high bit position + 1 + uint8_t al; // alpha low bit position + uint8_t rh; // red high bit position + 1 + uint8_t rl; // red low bit position + uint8_t gh; // green high bit position + 1 + uint8_t gl; // green low bit position + uint8_t bh; // blue high bit position + 1 + uint8_t bl; // blue low bit position + }; + struct { + uint8_t h; + uint8_t l; + } __attribute__((__packed__)) c[4]; + } __attribute__((__packed__)); + uint16_t components; // GGLFormatComponents +} GGLFormat; + + +#ifdef __cplusplus +extern "C" const GGLFormat* gglGetPixelFormatTable(size_t* numEntries = 0); +#else +const GGLFormat* gglGetPixelFormatTable(size_t* numEntries); +#endif + + +// ---------------------------------------------------------------------------- + +#endif // ANDROID_PIXELFLINGER_FORMAT_H diff --git a/libpixelflinger/include/pixelflinger/pixelflinger.h b/libpixelflinger/include/pixelflinger/pixelflinger.h new file mode 100644 index 0000000..8a2b442 --- /dev/null +++ b/libpixelflinger/include/pixelflinger/pixelflinger.h @@ -0,0 +1,330 @@ +/* + * Copyright (C) 2007 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_PIXELFLINGER_H +#define ANDROID_PIXELFLINGER_H + +#include <stdint.h> +#include <sys/types.h> + +#include <pixelflinger/format.h> + +// GGL types + +typedef int8_t GGLbyte; // b +typedef int16_t GGLshort; // s +typedef int32_t GGLint; // i +typedef ssize_t GGLsizei; // i +typedef int32_t GGLfixed; // x +typedef int32_t GGLclampx; // x +typedef float GGLfloat; // f +typedef float GGLclampf; // f +typedef double GGLdouble; // d +typedef double GGLclampd; // d +typedef uint8_t GGLubyte; // ub +typedef uint8_t GGLboolean; // ub +typedef uint16_t GGLushort; // us +typedef uint32_t GGLuint; // ui +typedef unsigned int GGLenum; // ui +typedef unsigned int GGLbitfield; // ui +typedef void GGLvoid; +typedef int32_t GGLfixed32; +typedef int32_t GGLcolor; +typedef int32_t GGLcoord; + +// ---------------------------------------------------------------------------- + +#define GGL_MAX_VIEWPORT_DIMS 4096 +#define GGL_MAX_TEXTURE_SIZE 4096 +#define GGL_MAX_ALIASED_POINT_SIZE 0x7FFFFFF +#define GGL_MAX_SMOOTH_POINT_SIZE 2048 +#define GGL_MAX_SMOOTH_LINE_WIDTH 2048 + +// ---------------------------------------------------------------------------- + +// All these names are compatible with their OpenGL equivalents +// some of them are listed only for completeness +enum GGLNames { + GGL_FALSE = 0, + GGL_TRUE = 1, + + // enable/disable + GGL_SCISSOR_TEST = 0x0C11, + GGL_TEXTURE_2D = 0x0DE1, + GGL_ALPHA_TEST = 0x0BC0, + GGL_BLEND = 0x0BE2, + GGL_COLOR_LOGIC_OP = 0x0BF2, + GGL_DITHER = 0x0BD0, + GGL_STENCIL_TEST = 0x0B90, + GGL_DEPTH_TEST = 0x0B71, + GGL_AA = 0x80000001, + GGL_W_LERP = 0x80000004, + GGL_POINT_SMOOTH_NICE = 0x80000005, + + // buffers, pixel drawing/reading + GGL_COLOR = 0x1800, + + // fog + GGL_FOG = 0x0B60, + + // shade model + GGL_FLAT = 0x1D00, + GGL_SMOOTH = 0x1D01, + + // Texture parameter name + GGL_TEXTURE_MIN_FILTER = 0x2801, + GGL_TEXTURE_MAG_FILTER = 0x2800, + GGL_TEXTURE_WRAP_S = 0x2802, + GGL_TEXTURE_WRAP_T = 0x2803, + GGL_TEXTURE_WRAP_R = 0x2804, + + // Texture Filter + GGL_NEAREST = 0x2600, + GGL_LINEAR = 0x2601, + GGL_NEAREST_MIPMAP_NEAREST = 0x2700, + GGL_LINEAR_MIPMAP_NEAREST = 0x2701, + GGL_NEAREST_MIPMAP_LINEAR = 0x2702, + GGL_LINEAR_MIPMAP_LINEAR = 0x2703, + + // Texture Wrap Mode + GGL_CLAMP = 0x2900, + GGL_REPEAT = 0x2901, + GGL_CLAMP_TO_EDGE = 0x812F, + + // Texture Env Mode + GGL_REPLACE = 0x1E01, + GGL_MODULATE = 0x2100, + GGL_DECAL = 0x2101, + GGL_ADD = 0x0104, + + // Texture Env Parameter + GGL_TEXTURE_ENV_MODE = 0x2200, + GGL_TEXTURE_ENV_COLOR = 0x2201, + + // Texture Env Target + GGL_TEXTURE_ENV = 0x2300, + + // Texture coord generation + GGL_TEXTURE_GEN_MODE = 0x2500, + GGL_S = 0x2000, + GGL_T = 0x2001, + GGL_R = 0x2002, + GGL_Q = 0x2003, + GGL_ONE_TO_ONE = 0x80000002, + GGL_AUTOMATIC = 0x80000003, + + // AlphaFunction + GGL_NEVER = 0x0200, + GGL_LESS = 0x0201, + GGL_EQUAL = 0x0202, + GGL_LEQUAL = 0x0203, + GGL_GREATER = 0x0204, + GGL_NOTEQUAL = 0x0205, + GGL_GEQUAL = 0x0206, + GGL_ALWAYS = 0x0207, + + // LogicOp + GGL_CLEAR = 0x1500, // 0 + GGL_AND = 0x1501, // s & d + GGL_AND_REVERSE = 0x1502, // s & ~d + GGL_COPY = 0x1503, // s + GGL_AND_INVERTED = 0x1504, // ~s & d + GGL_NOOP = 0x1505, // d + GGL_XOR = 0x1506, // s ^ d + GGL_OR = 0x1507, // s | d + GGL_NOR = 0x1508, // ~(s | d) + GGL_EQUIV = 0x1509, // ~(s ^ d) + GGL_INVERT = 0x150A, // ~d + GGL_OR_REVERSE = 0x150B, // s | ~d + GGL_COPY_INVERTED = 0x150C, // ~s + GGL_OR_INVERTED = 0x150D, // ~s | d + GGL_NAND = 0x150E, // ~(s & d) + GGL_SET = 0x150F, // 1 + + // blending equation & function + GGL_ZERO = 0, // SD + GGL_ONE = 1, // SD + GGL_SRC_COLOR = 0x0300, // D + GGL_ONE_MINUS_SRC_COLOR = 0x0301, // D + GGL_SRC_ALPHA = 0x0302, // SD + GGL_ONE_MINUS_SRC_ALPHA = 0x0303, // SD + GGL_DST_ALPHA = 0x0304, // SD + GGL_ONE_MINUS_DST_ALPHA = 0x0305, // SD + GGL_DST_COLOR = 0x0306, // S + GGL_ONE_MINUS_DST_COLOR = 0x0307, // S + GGL_SRC_ALPHA_SATURATE = 0x0308, // S + + // clear bits + GGL_DEPTH_BUFFER_BIT = 0x00000100, + GGL_STENCIL_BUFFER_BIT = 0x00000400, + GGL_COLOR_BUFFER_BIT = 0x00004000, + + // errors + GGL_NO_ERROR = 0, + GGL_INVALID_ENUM = 0x0500, + GGL_INVALID_VALUE = 0x0501, + GGL_INVALID_OPERATION = 0x0502, + GGL_STACK_OVERFLOW = 0x0503, + GGL_STACK_UNDERFLOW = 0x0504, + GGL_OUT_OF_MEMORY = 0x0505 +}; + +// ---------------------------------------------------------------------------- + +typedef struct { + GGLsizei version; // always set to sizeof(GGLSurface) + GGLuint width; // width in pixels + GGLuint height; // height in pixels + GGLint stride; // stride in pixels + GGLubyte* data; // pointer to the bits + GGLubyte format; // pixel format + GGLubyte rfu[3]; // must be zero + // these values are dependent on the used format + union { + GGLint compressedFormat; + GGLint vstride; + }; + void* reserved; +} GGLSurface; + + +typedef struct { + // immediate rendering + void (*pointx)(void *con, const GGLcoord* v, GGLcoord r); + void (*linex)(void *con, + const GGLcoord* v0, const GGLcoord* v1, GGLcoord width); + void (*recti)(void* c, GGLint l, GGLint t, GGLint r, GGLint b); + void (*trianglex)(void* c, + GGLcoord const* v0, GGLcoord const* v1, GGLcoord const* v2); + + // scissor + void (*scissor)(void* c, GGLint x, GGLint y, GGLsizei width, GGLsizei height); + + // Set the textures and color buffers + void (*activeTexture)(void* c, GGLuint tmu); + void (*bindTexture)(void* c, const GGLSurface* surface); + void (*colorBuffer)(void* c, const GGLSurface* surface); + void (*readBuffer)(void* c, const GGLSurface* surface); + void (*depthBuffer)(void* c, const GGLSurface* surface); + void (*bindTextureLod)(void* c, GGLuint tmu, const GGLSurface* surface); + + // enable/disable features + void (*enable)(void* c, GGLenum name); + void (*disable)(void* c, GGLenum name); + void (*enableDisable)(void* c, GGLenum name, GGLboolean en); + + // specify the fragment's color + void (*shadeModel)(void* c, GGLenum mode); + void (*color4xv)(void* c, const GGLclampx* color); + // specify color iterators (16.16) + void (*colorGrad12xv)(void* c, const GGLcolor* grad); + + // specify Z coordinate iterators (0.32) + void (*zGrad3xv)(void* c, const GGLfixed32* grad); + + // specify W coordinate iterators (16.16) + void (*wGrad3xv)(void* c, const GGLfixed* grad); + + // specify fog iterator & color (16.16) + void (*fogGrad3xv)(void* c, const GGLfixed* grad); + void (*fogColor3xv)(void* c, const GGLclampx* color); + + // specify blending parameters + void (*blendFunc)(void* c, GGLenum src, GGLenum dst); + void (*blendFuncSeparate)(void* c, GGLenum src, GGLenum dst, + GGLenum srcAlpha, GGLenum dstAplha); + + // texture environnement (REPLACE / MODULATE / DECAL / BLEND) + void (*texEnvi)(void* c, GGLenum target, + GGLenum pname, + GGLint param); + + void (*texEnvxv)(void* c, GGLenum target, + GGLenum pname, const GGLfixed* params); + + // texture parameters (Wrapping, filter) + void (*texParameteri)(void* c, GGLenum target, + GGLenum pname, + GGLint param); + + // texture iterators (16.16) + void (*texCoord2i)(void* c, GGLint s, GGLint t); + void (*texCoord2x)(void* c, GGLfixed s, GGLfixed t); + + // s, dsdx, dsdy, scale, t, dtdx, dtdy, tscale + // This api uses block floating-point for S and T texture coordinates. + // All values are given in 16.16, scaled by 'scale'. In other words, + // set scale to 0, for 16.16 values. + void (*texCoordGradScale8xv)(void* c, GGLint tmu, const int32_t* grad8); + + void (*texGeni)(void* c, GGLenum coord, GGLenum pname, GGLint param); + + // masking + void (*colorMask)(void* c, GGLboolean red, + GGLboolean green, + GGLboolean blue, + GGLboolean alpha); + + void (*depthMask)(void* c, GGLboolean flag); + + void (*stencilMask)(void* c, GGLuint mask); + + // alpha func + void (*alphaFuncx)(void* c, GGLenum func, GGLclampx ref); + + // depth func + void (*depthFunc)(void* c, GGLenum func); + + // logic op + void (*logicOp)(void* c, GGLenum opcode); + + // clear + void (*clear)(void* c, GGLbitfield mask); + void (*clearColorx)(void* c, + GGLclampx r, GGLclampx g, GGLclampx b, GGLclampx a); + void (*clearDepthx)(void* c, GGLclampx depth); + void (*clearStencil)(void* c, GGLint s); + + // framebuffer operations + void (*copyPixels)(void* c, GGLint x, GGLint y, + GGLsizei width, GGLsizei height, GGLenum type); + void (*rasterPos2x)(void* c, GGLfixed x, GGLfixed y); + void (*rasterPos2i)(void* c, GGLint x, GGLint y); +} GGLContext; + +// ---------------------------------------------------------------------------- + +#ifdef __cplusplus +extern "C" { +#endif + +// construct / destroy the context +ssize_t gglInit(GGLContext** context); +ssize_t gglUninit(GGLContext* context); + +GGLint gglBitBlit( + GGLContext* c, + int tmu, + GGLint crop[4], + GGLint where[4]); + +#ifdef __cplusplus +}; +#endif + +// ---------------------------------------------------------------------------- + +#endif // ANDROID_PIXELFLINGER_H diff --git a/libpixelflinger/include/private/pixelflinger/ggl_context.h b/libpixelflinger/include/private/pixelflinger/ggl_context.h new file mode 100644 index 0000000..d43655c --- /dev/null +++ b/libpixelflinger/include/private/pixelflinger/ggl_context.h @@ -0,0 +1,565 @@ +/* + * Copyright (C) 2006 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_GGL_CONTEXT_H +#define ANDROID_GGL_CONTEXT_H + +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <sys/types.h> +#include <endian.h> + +#include <pixelflinger/pixelflinger.h> +#include <private/pixelflinger/ggl_fixed.h> + +namespace android { + +// ---------------------------------------------------------------------------- + +#if BYTE_ORDER == LITTLE_ENDIAN + +inline uint32_t GGL_RGBA_TO_HOST(uint32_t v) { + return v; +} +inline uint32_t GGL_HOST_TO_RGBA(uint32_t v) { + return v; +} + +#else + +inline uint32_t GGL_RGBA_TO_HOST(uint32_t v) { +#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2 + uint32_t r; + __asm__("wsbh %0, %1;" + "rotr %0, %0, 16" + : "=r" (r) + : "r" (v) + ); + return r; +#else + return (v<<24) | (v>>24) | ((v<<8)&0xff0000) | ((v>>8)&0xff00); +#endif +} +inline uint32_t GGL_HOST_TO_RGBA(uint32_t v) { +#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2 + uint32_t r; + __asm__("wsbh %0, %1;" + "rotr %0, %0, 16" + : "=r" (r) + : "r" (v) + ); + return r; +#else + return (v<<24) | (v>>24) | ((v<<8)&0xff0000) | ((v>>8)&0xff00); +#endif +} + +#endif + +// ---------------------------------------------------------------------------- + +const int GGL_DITHER_BITS = 6; // dither weights stored on 6 bits +const int GGL_DITHER_ORDER_SHIFT= 3; +const int GGL_DITHER_ORDER = (1<<GGL_DITHER_ORDER_SHIFT); +const int GGL_DITHER_SIZE = GGL_DITHER_ORDER * GGL_DITHER_ORDER; +const int GGL_DITHER_MASK = GGL_DITHER_ORDER-1; + +// ---------------------------------------------------------------------------- + +const int GGL_SUBPIXEL_BITS = 4; + +// TRI_FRACTION_BITS defines the number of bits we want to use +// for the sub-pixel coordinates during the edge stepping, the +// value shouldn't be more than 7, or bad things are going to +// happen when drawing large triangles (8 doesn't work because +// 32 bit muls will loose the sign bit) + +#define TRI_FRACTION_BITS (GGL_SUBPIXEL_BITS) +#define TRI_ONE (1 << TRI_FRACTION_BITS) +#define TRI_HALF (1 << (TRI_FRACTION_BITS-1)) +#define TRI_FROM_INT(x) ((x) << TRI_FRACTION_BITS) +#define TRI_FRAC(x) ((x) & (TRI_ONE-1)) +#define TRI_FLOOR(x) ((x) & ~(TRI_ONE-1)) +#define TRI_CEIL(x) (((x) + (TRI_ONE-1)) & ~(TRI_ONE-1)) +#define TRI_ROUND(x) (((x) + TRI_HALF ) & ~(TRI_ONE-1)) + +#define TRI_ROUDNING (1 << (16 - TRI_FRACTION_BITS - 1)) +#define TRI_FROM_FIXED(x) (((x)+TRI_ROUDNING) >> (16-TRI_FRACTION_BITS)) + +#define TRI_SNAP_NEXT_HALF(x) (TRI_CEIL((x)+TRI_HALF) - TRI_HALF) +#define TRI_SNAP_PREV_HALF(x) (TRI_CEIL((x)-TRI_HALF) - TRI_HALF) + +// ---------------------------------------------------------------------------- + +const int GGL_COLOR_BITS = 24; + +// To maintain 8-bits color chanels, with a maximum GGLSurface +// size of 4096 and GGL_SUBPIXEL_BITS=4, we need 8 + 12 + 4 = 24 bits +// for encoding the color iterators + +inline GGLcolor gglFixedToIteratedColor(GGLfixed c) { + return (c << 8) - c; +} + +// ---------------------------------------------------------------------------- + +template<bool> struct CTA; +template<> struct CTA<true> { }; + +#define GGL_CONTEXT(con, c) context_t *con = static_cast<context_t *>(c) +#define GGL_OFFSETOF(field) uintptr_t(&(((context_t*)0)->field)) +#define GGL_INIT_PROC(p, f) p.f = ggl_ ## f; +#define GGL_BETWEEN(x, L, H) (uint32_t((x)-(L)) <= ((H)-(L))) + +#define ggl_likely(x) __builtin_expect(!!(x), 1) +#define ggl_unlikely(x) __builtin_expect(!!(x), 0) + +const int GGL_TEXTURE_UNIT_COUNT = 2; +const int GGL_TMU_STATE = 0x00000001; +const int GGL_CB_STATE = 0x00000002; +const int GGL_PIXEL_PIPELINE_STATE = 0x00000004; + +// ---------------------------------------------------------------------------- + +#define GGL_RESERVE_NEEDS(name, l, s) \ + const uint32_t GGL_NEEDS_##name##_MASK = (((1LU<<(s))-1)<<l); \ + const uint32_t GGL_NEEDS_##name##_SHIFT = (l); + +#define GGL_BUILD_NEEDS(val, name) \ + (((val)<<(GGL_NEEDS_##name##_SHIFT)) & GGL_NEEDS_##name##_MASK) + +#define GGL_READ_NEEDS(name, n) \ + (uint32_t(n & GGL_NEEDS_##name##_MASK) >> GGL_NEEDS_##name##_SHIFT) + +#define GGL_NEED_MASK(name) (uint32_t(GGL_NEEDS_##name##_MASK)) +#define GGL_NEED(name, val) GGL_BUILD_NEEDS(val, name) + +GGL_RESERVE_NEEDS( CB_FORMAT, 0, 6 ) +GGL_RESERVE_NEEDS( SHADE, 6, 1 ) +GGL_RESERVE_NEEDS( W, 7, 1 ) +GGL_RESERVE_NEEDS( BLEND_SRC, 8, 4 ) +GGL_RESERVE_NEEDS( BLEND_DST, 12, 4 ) +GGL_RESERVE_NEEDS( BLEND_SRCA, 16, 4 ) +GGL_RESERVE_NEEDS( BLEND_DSTA, 20, 4 ) +GGL_RESERVE_NEEDS( LOGIC_OP, 24, 4 ) +GGL_RESERVE_NEEDS( MASK_ARGB, 28, 4 ) + +GGL_RESERVE_NEEDS( P_ALPHA_TEST, 0, 3 ) +GGL_RESERVE_NEEDS( P_AA, 3, 1 ) +GGL_RESERVE_NEEDS( P_DEPTH_TEST, 4, 3 ) +GGL_RESERVE_NEEDS( P_MASK_Z, 7, 1 ) +GGL_RESERVE_NEEDS( P_DITHER, 8, 1 ) +GGL_RESERVE_NEEDS( P_FOG, 9, 1 ) +GGL_RESERVE_NEEDS( P_RESERVED1, 10,22 ) + +GGL_RESERVE_NEEDS( T_FORMAT, 0, 6 ) +GGL_RESERVE_NEEDS( T_RESERVED0, 6, 1 ) +GGL_RESERVE_NEEDS( T_POT, 7, 1 ) +GGL_RESERVE_NEEDS( T_S_WRAP, 8, 2 ) +GGL_RESERVE_NEEDS( T_T_WRAP, 10, 2 ) +GGL_RESERVE_NEEDS( T_ENV, 12, 3 ) +GGL_RESERVE_NEEDS( T_LINEAR, 15, 1 ) + +const int GGL_NEEDS_WRAP_CLAMP_TO_EDGE = 0; +const int GGL_NEEDS_WRAP_REPEAT = 1; +const int GGL_NEEDS_WRAP_11 = 2; + +inline uint32_t ggl_wrap_to_needs(uint32_t e) { + switch (e) { + case GGL_CLAMP: return GGL_NEEDS_WRAP_CLAMP_TO_EDGE; + case GGL_REPEAT: return GGL_NEEDS_WRAP_REPEAT; + } + return 0; +} + +inline uint32_t ggl_blendfactor_to_needs(uint32_t b) { + if (b <= 1) return b; + return (b & 0xF)+2; +} + +inline uint32_t ggl_needs_to_blendfactor(uint32_t n) { + if (n <= 1) return n; + return (n - 2) + 0x300; +} + +inline uint32_t ggl_env_to_needs(uint32_t e) { + switch (e) { + case GGL_REPLACE: return 0; + case GGL_MODULATE: return 1; + case GGL_DECAL: return 2; + case GGL_BLEND: return 3; + case GGL_ADD: return 4; + } + return 0; +} + +inline uint32_t ggl_needs_to_env(uint32_t n) { + const uint32_t envs[] = { GGL_REPLACE, GGL_MODULATE, + GGL_DECAL, GGL_BLEND, GGL_ADD }; + return envs[n]; + +} + +// ---------------------------------------------------------------------------- + +enum { + GGL_ENABLE_BLENDING = 0x00000001, + GGL_ENABLE_SMOOTH = 0x00000002, + GGL_ENABLE_AA = 0x00000004, + GGL_ENABLE_LOGIC_OP = 0x00000008, + GGL_ENABLE_ALPHA_TEST = 0x00000010, + GGL_ENABLE_SCISSOR_TEST = 0x00000020, + GGL_ENABLE_TMUS = 0x00000040, + GGL_ENABLE_DEPTH_TEST = 0x00000080, + GGL_ENABLE_STENCIL_TEST = 0x00000100, + GGL_ENABLE_W = 0x00000200, + GGL_ENABLE_DITHER = 0x00000400, + GGL_ENABLE_FOG = 0x00000800, + GGL_ENABLE_POINT_AA_NICE= 0x00001000 +}; + +// ---------------------------------------------------------------------------- + +class needs_filter_t; +struct needs_t { + inline int match(const needs_filter_t& filter); + inline bool operator == (const needs_t& rhs) const { + return (n==rhs.n) && + (p==rhs.p) && + (t[0]==rhs.t[0]) && + (t[1]==rhs.t[1]); + } + inline bool operator != (const needs_t& rhs) const { + return !operator == (rhs); + } + uint32_t n; + uint32_t p; + uint32_t t[GGL_TEXTURE_UNIT_COUNT]; +}; + +inline int compare_type(const needs_t& lhs, const needs_t& rhs) { + return memcmp(&lhs, &rhs, sizeof(needs_t)); +} + +struct needs_filter_t { + needs_t value; + needs_t mask; +}; + +int needs_t::match(const needs_filter_t& filter) { + uint32_t result = + ((filter.value.n ^ n) & filter.mask.n) | + ((filter.value.p ^ p) & filter.mask.p) | + ((filter.value.t[0] ^ t[0]) & filter.mask.t[0]) | + ((filter.value.t[1] ^ t[1]) & filter.mask.t[1]); + return (result == 0); +} + +// ---------------------------------------------------------------------------- + +struct context_t; +class Assembly; + +struct blend_state_t { + uint32_t src; + uint32_t dst; + uint32_t src_alpha; + uint32_t dst_alpha; + uint8_t reserved; + uint8_t alpha_separate; + uint8_t operation; + uint8_t equation; +}; + +struct mask_state_t { + uint8_t color; + uint8_t depth; + uint32_t stencil; +}; + +struct clear_state_t { + GGLclampx r; + GGLclampx g; + GGLclampx b; + GGLclampx a; + GGLclampx depth; + GGLint stencil; + uint32_t colorPacked; + uint32_t depthPacked; + uint32_t stencilPacked; + uint32_t dirty; +}; + +struct fog_state_t { + uint8_t color[4]; +}; + +struct logic_op_state_t { + uint16_t opcode; +}; + +struct alpha_test_state_t { + uint16_t func; + GGLcolor ref; +}; + +struct depth_test_state_t { + uint16_t func; + GGLclampx clearValue; +}; + +struct scissor_t { + uint32_t user_left; + uint32_t user_right; + uint32_t user_top; + uint32_t user_bottom; + uint32_t left; + uint32_t right; + uint32_t top; + uint32_t bottom; +}; + +struct pixel_t { + uint32_t c[4]; + uint8_t s[4]; +}; + +struct surface_t { + union { + GGLSurface s; + // Keep the following struct field types in line with the corresponding + // GGLSurface fields to avoid mismatches leading to errors. + struct { + GGLsizei reserved; + GGLuint width; + GGLuint height; + GGLint stride; + GGLubyte* data; + GGLubyte format; + GGLubyte dirty; + GGLubyte pad[2]; + }; + }; + void (*read) (const surface_t* s, context_t* c, + uint32_t x, uint32_t y, pixel_t* pixel); + void (*write)(const surface_t* s, context_t* c, + uint32_t x, uint32_t y, const pixel_t* pixel); +}; + +// ---------------------------------------------------------------------------- + +struct texture_shade_t { + union { + struct { + int32_t is0; + int32_t idsdx; + int32_t idsdy; + int sscale; + int32_t it0; + int32_t idtdx; + int32_t idtdy; + int tscale; + }; + struct { + int32_t v; + int32_t dx; + int32_t dy; + int scale; + } st[2]; + }; +}; + +struct texture_iterators_t { + // these are not encoded in the same way than in the + // texture_shade_t structure + union { + struct { + GGLfixed ydsdy; + GGLfixed dsdx; + GGLfixed dsdy; + int sscale; + GGLfixed ydtdy; + GGLfixed dtdx; + GGLfixed dtdy; + int tscale; + }; + struct { + GGLfixed ydvdy; + GGLfixed dvdx; + GGLfixed dvdy; + int scale; + } st[2]; + }; +}; + +struct texture_t { + surface_t surface; + texture_iterators_t iterators; + texture_shade_t shade; + uint32_t s_coord; + uint32_t t_coord; + uint16_t s_wrap; + uint16_t t_wrap; + uint16_t min_filter; + uint16_t mag_filter; + uint16_t env; + uint8_t env_color[4]; + uint8_t enable; + uint8_t dirty; +}; + +struct raster_t { + GGLfixed x; + GGLfixed y; +}; + +struct framebuffer_t { + surface_t color; + surface_t read; + surface_t depth; + surface_t stencil; + int16_t *coverage; + size_t coverageBufferSize; +}; + +// ---------------------------------------------------------------------------- + +struct iterators_t { + int32_t xl; + int32_t xr; + int32_t y; + GGLcolor ydady; + GGLcolor ydrdy; + GGLcolor ydgdy; + GGLcolor ydbdy; + GGLfixed ydzdy; + GGLfixed ydwdy; + GGLfixed ydfdy; +}; + +struct shade_t { + GGLcolor a0; + GGLcolor dadx; + GGLcolor dady; + GGLcolor r0; + GGLcolor drdx; + GGLcolor drdy; + GGLcolor g0; + GGLcolor dgdx; + GGLcolor dgdy; + GGLcolor b0; + GGLcolor dbdx; + GGLcolor dbdy; + uint32_t z0; + GGLfixed32 dzdx; + GGLfixed32 dzdy; + GGLfixed w0; + GGLfixed dwdx; + GGLfixed dwdy; + uint32_t f0; + GGLfixed dfdx; + GGLfixed dfdy; +}; + +// these are used in the generated code +// we use this mirror structure to improve +// data locality in the pixel pipeline +struct generated_tex_vars_t { + uint32_t width; + uint32_t height; + uint32_t stride; + uintptr_t data; + int32_t dsdx; + int32_t dtdx; + int32_t spill[2]; +}; + +struct generated_vars_t { + struct { + int32_t c; + int32_t dx; + } argb[4]; + int32_t aref; + int32_t dzdx; + int32_t zbase; + int32_t f; + int32_t dfdx; + int32_t spill[3]; + generated_tex_vars_t texture[GGL_TEXTURE_UNIT_COUNT]; + int32_t rt; + int32_t lb; +}; + +// ---------------------------------------------------------------------------- + +struct state_t { + framebuffer_t buffers; + texture_t texture[GGL_TEXTURE_UNIT_COUNT]; + scissor_t scissor; + raster_t raster; + blend_state_t blend; + alpha_test_state_t alpha_test; + depth_test_state_t depth_test; + mask_state_t mask; + clear_state_t clear; + fog_state_t fog; + logic_op_state_t logic_op; + uint32_t enables; + uint32_t enabled_tmu; + needs_t needs; +}; + +// ---------------------------------------------------------------------------- + +struct context_t { + GGLContext procs; + state_t state; + shade_t shade; + iterators_t iterators; + generated_vars_t generated_vars __attribute__((aligned(32))); + uint8_t ditherMatrix[GGL_DITHER_SIZE] __attribute__((aligned(32))); + uint32_t packed; + uint32_t packed8888; + const GGLFormat* formats; + uint32_t dirty; + texture_t* activeTMU; + uint32_t activeTMUIndex; + + void (*init_y)(context_t* c, int32_t y); + void (*step_y)(context_t* c); + void (*scanline)(context_t* c); + void (*span)(context_t* c); + void (*rect)(context_t* c, size_t yc); + + void* base; + Assembly* scanline_as; + GGLenum error; +}; + +// ---------------------------------------------------------------------------- + +void ggl_init_context(context_t* context); +void ggl_uninit_context(context_t* context); +void ggl_error(context_t* c, GGLenum error); +int64_t ggl_system_time(); + +// ---------------------------------------------------------------------------- + +}; + +#endif // ANDROID_GGL_CONTEXT_H + diff --git a/libpixelflinger/include/private/pixelflinger/ggl_fixed.h b/libpixelflinger/include/private/pixelflinger/ggl_fixed.h new file mode 100644 index 0000000..787f620 --- /dev/null +++ b/libpixelflinger/include/private/pixelflinger/ggl_fixed.h @@ -0,0 +1,633 @@ +/* + * Copyright (C) 2005 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_GGL_FIXED_H +#define ANDROID_GGL_FIXED_H + +#include <math.h> +#include <pixelflinger/pixelflinger.h> + +// ---------------------------------------------------------------------------- + +#define CONST __attribute__((const)) +#define ALWAYS_INLINE __attribute__((always_inline)) + +const GGLfixed FIXED_BITS = 16; +const GGLfixed FIXED_EPSILON = 1; +const GGLfixed FIXED_ONE = 1L<<FIXED_BITS; +const GGLfixed FIXED_HALF = 1L<<(FIXED_BITS-1); +const GGLfixed FIXED_MIN = 0x80000000L; +const GGLfixed FIXED_MAX = 0x7FFFFFFFL; + +inline GGLfixed gglIntToFixed(GGLfixed i) ALWAYS_INLINE ; +inline GGLfixed gglFixedToIntRound(GGLfixed f) ALWAYS_INLINE ; +inline GGLfixed gglFixedToIntFloor(GGLfixed f) ALWAYS_INLINE ; +inline GGLfixed gglFixedToIntCeil(GGLfixed f) ALWAYS_INLINE ; +inline GGLfixed gglFracx(GGLfixed v) ALWAYS_INLINE ; +inline GGLfixed gglFloorx(GGLfixed v) ALWAYS_INLINE ; +inline GGLfixed gglCeilx(GGLfixed v) ALWAYS_INLINE ; +inline GGLfixed gglCenterx(GGLfixed v) ALWAYS_INLINE ; +inline GGLfixed gglRoundx(GGLfixed v) ALWAYS_INLINE ; + +GGLfixed gglIntToFixed(GGLfixed i) { + return i<<FIXED_BITS; +} +GGLfixed gglFixedToIntRound(GGLfixed f) { + return (f + FIXED_HALF)>>FIXED_BITS; +} +GGLfixed gglFixedToIntFloor(GGLfixed f) { + return f>>FIXED_BITS; +} +GGLfixed gglFixedToIntCeil(GGLfixed f) { + return (f + ((1<<FIXED_BITS) - 1))>>FIXED_BITS; +} + +GGLfixed gglFracx(GGLfixed v) { + return v & ((1<<FIXED_BITS)-1); +} +GGLfixed gglFloorx(GGLfixed v) { + return gglFixedToIntFloor(v)<<FIXED_BITS; +} +GGLfixed gglCeilx(GGLfixed v) { + return gglFixedToIntCeil(v)<<FIXED_BITS; +} +GGLfixed gglCenterx(GGLfixed v) { + return gglFloorx(v + FIXED_HALF) | FIXED_HALF; +} +GGLfixed gglRoundx(GGLfixed v) { + return gglFixedToIntRound(v)<<FIXED_BITS; +} + +// conversion from (unsigned) int, short, byte to fixed... +#define GGL_B_TO_X(_x) GGLfixed( ((int32_t(_x)+1)>>1)<<10 ) +#define GGL_S_TO_X(_x) GGLfixed( ((int32_t(_x)+1)>>1)<<2 ) +#define GGL_I_TO_X(_x) GGLfixed( ((int32_t(_x)>>1)+1)>>14 ) +#define GGL_UB_TO_X(_x) GGLfixed( uint32_t(_x) + \ + (uint32_t(_x)<<8) + \ + (uint32_t(_x)>>7) ) +#define GGL_US_TO_X(_x) GGLfixed( (_x) + ((_x)>>15) ) +#define GGL_UI_TO_X(_x) GGLfixed( (((_x)>>1)+1)>>15 ) + +// ---------------------------------------------------------------------------- + +GGLfixed gglPowx(GGLfixed x, GGLfixed y) CONST; +GGLfixed gglSqrtx(GGLfixed a) CONST; +GGLfixed gglSqrtRecipx(GGLfixed x) CONST; +GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) CONST; +int32_t gglMulDivi(int32_t a, int32_t b, int32_t c); + +int32_t gglRecipQNormalized(int32_t x, int* exponent); +int32_t gglRecipQ(GGLfixed x, int q) CONST; + +inline GGLfixed gglRecip(GGLfixed x) CONST; +inline GGLfixed gglRecip(GGLfixed x) { + return gglRecipQ(x, 16); +} + +inline GGLfixed gglRecip28(GGLfixed x) CONST; +int32_t gglRecip28(GGLfixed x) { + return gglRecipQ(x, 28); +} + +// ---------------------------------------------------------------------------- + +#if defined(__arm__) && !defined(__thumb__) + +// inline ARM implementations +inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST; +inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) { + GGLfixed result, t; + if (__builtin_constant_p(shift)) { + asm("smull %[lo], %[hi], %[x], %[y] \n" + "movs %[lo], %[lo], lsr %[rshift] \n" + "adc %[lo], %[lo], %[hi], lsl %[lshift] \n" + : [lo]"=r"(result), [hi]"=r"(t), [x]"=r"(x) + : "%[x]"(x), [y]"r"(y), [lshift] "I"(32-shift), [rshift] "I"(shift) + : "cc" + ); + } else { + asm("smull %[lo], %[hi], %[x], %[y] \n" + "movs %[lo], %[lo], lsr %[rshift] \n" + "adc %[lo], %[lo], %[hi], lsl %[lshift] \n" + : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x) + : "%[x]"(x), [y]"r"(y), [lshift] "r"(32-shift), [rshift] "r"(shift) + : "cc" + ); + } + return result; +} + +inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST; +inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) { + GGLfixed result, t; + if (__builtin_constant_p(shift)) { + asm("smull %[lo], %[hi], %[x], %[y] \n" + "add %[lo], %[a], %[lo], lsr %[rshift] \n" + "add %[lo], %[lo], %[hi], lsl %[lshift] \n" + : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x) + : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift) + ); + } else { + asm("smull %[lo], %[hi], %[x], %[y] \n" + "add %[lo], %[a], %[lo], lsr %[rshift] \n" + "add %[lo], %[lo], %[hi], lsl %[lshift] \n" + : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x) + : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift) + ); + } + return result; +} + +inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST; +inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) { + GGLfixed result, t; + if (__builtin_constant_p(shift)) { + asm("smull %[lo], %[hi], %[x], %[y] \n" + "rsb %[lo], %[a], %[lo], lsr %[rshift] \n" + "add %[lo], %[lo], %[hi], lsl %[lshift] \n" + : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x) + : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift) + ); + } else { + asm("smull %[lo], %[hi], %[x], %[y] \n" + "rsb %[lo], %[a], %[lo], lsr %[rshift] \n" + "add %[lo], %[lo], %[hi], lsl %[lshift] \n" + : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x) + : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift) + ); + } + return result; +} + +inline int64_t gglMulii(int32_t x, int32_t y) CONST; +inline int64_t gglMulii(int32_t x, int32_t y) +{ + // 64-bits result: r0=low, r1=high + union { + struct { + int32_t lo; + int32_t hi; + } s; + int64_t res; + }; + asm("smull %0, %1, %2, %3 \n" + : "=r"(s.lo), "=&r"(s.hi) + : "%r"(x), "r"(y) + : + ); + return res; +} +#elif defined(__mips__) && __mips_isa_rev < 6 + +/*inline MIPS implementations*/ +inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST; +inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) { + GGLfixed result,tmp,tmp1,tmp2; + + if (__builtin_constant_p(shift)) { + if (shift == 0) { + asm ("mult %[a], %[b] \t\n" + "mflo %[res] \t\n" + : [res]"=&r"(result),[tmp]"=&r"(tmp) + : [a]"r"(a),[b]"r"(b) + : "%hi","%lo" + ); + } else if (shift == 32) + { + asm ("mult %[a], %[b] \t\n" + "li %[tmp],1\t\n" + "sll %[tmp],%[tmp],0x1f\t\n" + "mflo %[res] \t\n" + "addu %[tmp1],%[tmp],%[res] \t\n" + "sltu %[tmp1],%[tmp1],%[tmp]\t\n" /*obit*/ + "sra %[tmp],%[tmp],0x1f \t\n" + "mfhi %[res] \t\n" + "addu %[res],%[res],%[tmp]\t\n" + "addu %[res],%[res],%[tmp1]\t\n" + : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1) + : [a]"r"(a),[b]"r"(b),[shift]"I"(shift) + : "%hi","%lo" + ); + } else if ((shift >0) && (shift < 32)) + { + asm ("mult %[a], %[b] \t\n" + "li %[tmp],1 \t\n" + "sll %[tmp],%[tmp],%[shiftm1] \t\n" + "mflo %[res] \t\n" + "addu %[tmp1],%[tmp],%[res] \t\n" + "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/ + "addu %[res],%[res],%[tmp] \t\n" + "mfhi %[tmp] \t\n" + "addu %[tmp],%[tmp],%[tmp1] \t\n" + "sll %[tmp],%[tmp],%[lshift] \t\n" + "srl %[res],%[res],%[rshift] \t\n" + "or %[res],%[res],%[tmp] \t\n" + : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) + : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1) + : "%hi","%lo" + ); + } else { + asm ("mult %[a], %[b] \t\n" + "li %[tmp],1 \t\n" + "sll %[tmp],%[tmp],%[shiftm1] \t\n" + "mflo %[res] \t\n" + "addu %[tmp1],%[tmp],%[res] \t\n" + "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/ + "sra %[tmp2],%[tmp],0x1f \t\n" + "addu %[res],%[res],%[tmp] \t\n" + "mfhi %[tmp] \t\n" + "addu %[tmp],%[tmp],%[tmp2] \t\n" + "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/ + "srl %[tmp2],%[res],%[rshift] \t\n" + "srav %[res], %[tmp],%[rshift]\t\n" + "sll %[tmp],%[tmp],1 \t\n" + "sll %[tmp],%[tmp],%[norbits] \t\n" + "or %[tmp],%[tmp],%[tmp2] \t\n" + "movz %[res],%[tmp],%[bit5] \t\n" + : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) + : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20) + : "%hi","%lo" + ); + } + } else { + asm ("mult %[a], %[b] \t\n" + "li %[tmp],1 \t\n" + "sll %[tmp],%[tmp],%[shiftm1] \t\n" + "mflo %[res] \t\n" + "addu %[tmp1],%[tmp],%[res] \t\n" + "sltu %[tmp1],%[tmp1],%[tmp] \t\n" /*obit?*/ + "sra %[tmp2],%[tmp],0x1f \t\n" + "addu %[res],%[res],%[tmp] \t\n" + "mfhi %[tmp] \t\n" + "addu %[tmp],%[tmp],%[tmp2] \t\n" + "addu %[tmp],%[tmp],%[tmp1] \t\n" /*tmp=hi*/ + "srl %[tmp2],%[res],%[rshift] \t\n" + "srav %[res], %[tmp],%[rshift]\t\n" + "sll %[tmp],%[tmp],1 \t\n" + "sll %[tmp],%[tmp],%[norbits] \t\n" + "or %[tmp],%[tmp],%[tmp2] \t\n" + "movz %[res],%[tmp],%[bit5] \t\n" + : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) + : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20) + : "%hi","%lo" + ); + } + + return result; +} + +inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST; +inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) { + GGLfixed result,t,tmp1,tmp2; + + if (__builtin_constant_p(shift)) { + if (shift == 0) { + asm ("mult %[a], %[b] \t\n" + "mflo %[lo] \t\n" + "addu %[lo],%[lo],%[c] \t\n" + : [lo]"=&r"(result) + : [a]"r"(a),[b]"r"(b),[c]"r"(c) + : "%hi","%lo" + ); + } else if (shift == 32) { + asm ("mult %[a], %[b] \t\n" + "mfhi %[lo] \t\n" + "addu %[lo],%[lo],%[c] \t\n" + : [lo]"=&r"(result) + : [a]"r"(a),[b]"r"(b),[c]"r"(c) + : "%hi","%lo" + ); + } else if ((shift>0) && (shift<32)) { + asm ("mult %[a], %[b] \t\n" + "mflo %[res] \t\n" + "mfhi %[t] \t\n" + "srl %[res],%[res],%[rshift] \t\n" + "sll %[t],%[t],%[lshift] \t\n" + "or %[res],%[res],%[t] \t\n" + "addu %[res],%[res],%[c] \t\n" + : [res]"=&r"(result),[t]"=&r"(t) + : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift) + : "%hi","%lo" + ); + } else { + asm ("mult %[a], %[b] \t\n" + "nor %[tmp1],$zero,%[shift]\t\n" + "mflo %[res] \t\n" + "mfhi %[t] \t\n" + "srl %[res],%[res],%[shift] \t\n" + "sll %[tmp2],%[t],1 \t\n" + "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" + "or %[tmp1],%[tmp2],%[res] \t\n" + "srav %[res],%[t],%[shift] \t\n" + "andi %[tmp2],%[shift],0x20\t\n" + "movz %[res],%[tmp1],%[tmp2]\t\n" + "addu %[res],%[res],%[c] \t\n" + : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) + : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift) + : "%hi","%lo" + ); + } + } else { + asm ("mult %[a], %[b] \t\n" + "nor %[tmp1],$zero,%[shift]\t\n" + "mflo %[res] \t\n" + "mfhi %[t] \t\n" + "srl %[res],%[res],%[shift] \t\n" + "sll %[tmp2],%[t],1 \t\n" + "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" + "or %[tmp1],%[tmp2],%[res] \t\n" + "srav %[res],%[t],%[shift] \t\n" + "andi %[tmp2],%[shift],0x20\t\n" + "movz %[res],%[tmp1],%[tmp2]\t\n" + "addu %[res],%[res],%[c] \t\n" + : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) + : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift) + : "%hi","%lo" + ); + } + return result; +} + +inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST; +inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) { + GGLfixed result,t,tmp1,tmp2; + + if (__builtin_constant_p(shift)) { + if (shift == 0) { + asm ("mult %[a], %[b] \t\n" + "mflo %[lo] \t\n" + "subu %[lo],%[lo],%[c] \t\n" + : [lo]"=&r"(result) + : [a]"r"(a),[b]"r"(b),[c]"r"(c) + : "%hi","%lo" + ); + } else if (shift == 32) { + asm ("mult %[a], %[b] \t\n" + "mfhi %[lo] \t\n" + "subu %[lo],%[lo],%[c] \t\n" + : [lo]"=&r"(result) + : [a]"r"(a),[b]"r"(b),[c]"r"(c) + : "%hi","%lo" + ); + } else if ((shift>0) && (shift<32)) { + asm ("mult %[a], %[b] \t\n" + "mflo %[res] \t\n" + "mfhi %[t] \t\n" + "srl %[res],%[res],%[rshift] \t\n" + "sll %[t],%[t],%[lshift] \t\n" + "or %[res],%[res],%[t] \t\n" + "subu %[res],%[res],%[c] \t\n" + : [res]"=&r"(result),[t]"=&r"(t) + : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift) + : "%hi","%lo" + ); + } else { + asm ("mult %[a], %[b] \t\n" + "nor %[tmp1],$zero,%[shift]\t\n" + "mflo %[res] \t\n" + "mfhi %[t] \t\n" + "srl %[res],%[res],%[shift] \t\n" + "sll %[tmp2],%[t],1 \t\n" + "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" + "or %[tmp1],%[tmp2],%[res] \t\n" + "srav %[res],%[t],%[shift] \t\n" + "andi %[tmp2],%[shift],0x20\t\n" + "movz %[res],%[tmp1],%[tmp2]\t\n" + "subu %[res],%[res],%[c] \t\n" + : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) + : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift) + : "%hi","%lo" + ); + } + } else { + asm ("mult %[a], %[b] \t\n" + "nor %[tmp1],$zero,%[shift]\t\n" + "mflo %[res] \t\n" + "mfhi %[t] \t\n" + "srl %[res],%[res],%[shift] \t\n" + "sll %[tmp2],%[t],1 \t\n" + "sllv %[tmp2],%[tmp2],%[tmp1] \t\n" + "or %[tmp1],%[tmp2],%[res] \t\n" + "srav %[res],%[t],%[shift] \t\n" + "andi %[tmp2],%[shift],0x20\t\n" + "movz %[res],%[tmp1],%[tmp2]\t\n" + "subu %[res],%[res],%[c] \t\n" + : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2) + : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift) + : "%hi","%lo" + ); + } + return result; +} + +inline int64_t gglMulii(int32_t x, int32_t y) CONST; +inline int64_t gglMulii(int32_t x, int32_t y) { + union { + struct { +#if defined(__MIPSEL__) + int32_t lo; + int32_t hi; +#elif defined(__MIPSEB__) + int32_t hi; + int32_t lo; +#endif + } s; + int64_t res; + }u; + asm("mult %2, %3 \t\n" + "mfhi %1 \t\n" + "mflo %0 \t\n" + : "=r"(u.s.lo), "=&r"(u.s.hi) + : "%r"(x), "r"(y) + : "%hi","%lo" + ); + return u.res; +} + +#elif defined(__aarch64__) + +// inline AArch64 implementations + +inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST; +inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) +{ + GGLfixed result; + GGLfixed round; + + asm("mov %x[round], #1 \n" + "lsl %x[round], %x[round], %x[shift] \n" + "lsr %x[round], %x[round], #1 \n" + "smaddl %x[result], %w[x], %w[y],%x[round] \n" + "lsr %x[result], %x[result], %x[shift] \n" + : [round]"=&r"(round), [result]"=&r"(result) \ + : [x]"r"(x), [y]"r"(y), [shift] "r"(shift) \ + : + ); + return result; +} +inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST; +inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) +{ + GGLfixed result; + asm("smull %x[result], %w[x], %w[y] \n" + "lsr %x[result], %x[result], %x[shift] \n" + "add %w[result], %w[result], %w[a] \n" + : [result]"=&r"(result) \ + : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \ + : + ); + return result; +} + +inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST; +inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) +{ + + GGLfixed result; + int rshift; + + asm("smull %x[result], %w[x], %w[y] \n" + "lsr %x[result], %x[result], %x[shift] \n" + "sub %w[result], %w[result], %w[a] \n" + : [result]"=&r"(result) \ + : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \ + : + ); + return result; +} +inline int64_t gglMulii(int32_t x, int32_t y) CONST; +inline int64_t gglMulii(int32_t x, int32_t y) +{ + int64_t res; + asm("smull %x0, %w1, %w2 \n" + : "=r"(res) + : "%r"(x), "r"(y) + : + ); + return res; +} + +#else // ---------------------------------------------------------------------- + +inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST; +inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) { + return GGLfixed((int64_t(a)*b + (1<<(shift-1)))>>shift); +} +inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST; +inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) { + return GGLfixed((int64_t(a)*b)>>shift) + c; +} +inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST; +inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) { + return GGLfixed((int64_t(a)*b)>>shift) - c; +} +inline int64_t gglMulii(int32_t a, int32_t b) CONST; +inline int64_t gglMulii(int32_t a, int32_t b) { + return int64_t(a)*b; +} + +#endif + +// ------------------------------------------------------------------------ + +inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) CONST; +inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) { + return gglMulx(a, b, 16); +} +inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) CONST; +inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) { + return gglMulAddx(a, b, c, 16); +} +inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) CONST; +inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) { + return gglMulSubx(a, b, c, 16); +} + +// ------------------------------------------------------------------------ + +inline int32_t gglClz(int32_t x) CONST; +inline int32_t gglClz(int32_t x) +{ +#if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) || defined(__aarch64__) + return __builtin_clz(x); +#else + if (!x) return 32; + int32_t exp = 31; + if (x & 0xFFFF0000) { exp -=16; x >>= 16; } + if (x & 0x0000ff00) { exp -= 8; x >>= 8; } + if (x & 0x000000f0) { exp -= 4; x >>= 4; } + if (x & 0x0000000c) { exp -= 2; x >>= 2; } + if (x & 0x00000002) { exp -= 1; } + return exp; +#endif +} + +// ------------------------------------------------------------------------ + +int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) CONST; + +inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) CONST; +inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) { + return gglDivQ(n, d, 16); +} + +inline int32_t gglDivx(GGLfixed n, GGLfixed d) CONST; +inline int32_t gglDivx(GGLfixed n, GGLfixed d) { + return gglDivQ(n, d, 16); +} + +// ------------------------------------------------------------------------ + +inline GGLfixed gglRecipFast(GGLfixed x) CONST; +inline GGLfixed gglRecipFast(GGLfixed x) +{ + // This is a really bad approximation of 1/x, but it's also + // very fast. x must be strictly positive. + // if x between [0.5, 1[ , then 1/x = 3-2*x + // (we use 2.30 fixed-point) + const int32_t lz = gglClz(x); + return (0xC0000000 - (x << (lz - 1))) >> (30-lz); +} + +// ------------------------------------------------------------------------ + +inline GGLfixed gglClampx(GGLfixed c) CONST; +inline GGLfixed gglClampx(GGLfixed c) +{ +#if defined(__thumb__) + // clamp without branches + c &= ~(c>>31); c = FIXED_ONE - c; + c &= ~(c>>31); c = FIXED_ONE - c; +#else +#if defined(__arm__) + // I don't know why gcc thinks its smarter than me! The code below + // clamps to zero in one instruction, but gcc won't generate it and + // replace it by a cmp + movlt (it's quite amazing actually). + asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c)); +#elif defined(__aarch64__) + asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c)); +#else + c &= ~(c>>31); +#endif + if (c>FIXED_ONE) + c = FIXED_ONE; +#endif + return c; +} + +// ------------------------------------------------------------------------ + +#endif // ANDROID_GGL_FIXED_H |