aboutsummaryrefslogtreecommitdiffstats
path: root/android/skin/argb.h
diff options
context:
space:
mode:
authorThe Android Open Source Project <initial-contribution@android.com>2009-02-10 15:43:59 -0800
committerThe Android Open Source Project <initial-contribution@android.com>2009-02-10 15:43:59 -0800
commitc27f813900a3c114562efbb8df1065e94766fc48 (patch)
treed95919283707dcab61009e27007374a745c9541e /android/skin/argb.h
parent0852ad57fa372f9b2854e4df685eaba8d8ef6790 (diff)
downloadexternal_qemu-c27f813900a3c114562efbb8df1065e94766fc48.zip
external_qemu-c27f813900a3c114562efbb8df1065e94766fc48.tar.gz
external_qemu-c27f813900a3c114562efbb8df1065e94766fc48.tar.bz2
auto import from //branches/cupcake/...@130745
Diffstat (limited to 'android/skin/argb.h')
-rw-r--r--android/skin/argb.h856
1 files changed, 856 insertions, 0 deletions
diff --git a/android/skin/argb.h b/android/skin/argb.h
new file mode 100644
index 0000000..b3f0a6d
--- /dev/null
+++ b/android/skin/argb.h
@@ -0,0 +1,856 @@
+/* Copyright (C) 2007-2008 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+** GNU General Public License for more details.
+*/
+/* this file contains template code and may be included multiple times */
+
+#ifndef ARGB_T_DEFINED
+#define ARGB_T_DEFINED
+
+#if USE_MMX
+#include <mmintrin.h>
+
+typedef __m64 mmx_t;
+typedef mmx_t argb_t;
+
+static inline mmx_t
+mmx_load8888( unsigned value, mmx_t zero )
+{
+ return _mm_unpacklo_pi8( _mm_cvtsi32_si64 (value), zero);
+}
+
+static inline unsigned
+mmx_save8888( mmx_t argb, mmx_t zero )
+{
+ return (unsigned) _mm_cvtsi64_si32( _mm_packs_pu16( argb, zero ) );
+}
+
+static inline mmx_t
+mmx_expand16( int value )
+{
+ mmx_t t1 = _mm_cvtsi32_si64( value );
+ return _mm_packs_pi32( t1, t1 );
+}
+
+static inline int
+mmx_makescale( double s )
+{
+ return (int)(s*(1 << 16));
+}
+
+static inline mmx_t
+mmx_mulshift( mmx_t argb, int multiplier, int rshift, mmx_t zero )
+{
+ mmx_t ar = _mm_unpackhi_pi16(argb, zero );
+ mmx_t gb = _mm_unpacklo_pi16(argb, zero );
+ mmx_t mult = mmx_expand16(multiplier);
+
+ ar = _mm_srli_pi32( _mm_madd_pi16( ar, mult ), rshift );
+ gb = _mm_srli_pi32( _mm_madd_pi16( gb, mult ), rshift );
+
+ return _mm_packs_pi32( gb, ar );
+}
+
+static inline mmx_t
+mmx_interp255( mmx_t m1, mmx_t m2, mmx_t zero, int alpha )
+{
+ mmx_t mult, mult2, t1, t2, r1, r2;
+
+ // m1 = [ a1 | r1 | g1 | b1 ]
+ // m2 = [ a2 | r2 | g2 | b2 ]
+ alpha = (alpha << 16) | (alpha ^ 255);
+ mult = _mm_cvtsi32_si64( alpha ); // mult = [ 0 | 0 | a | 1-a ]
+ mult2 = _mm_slli_si64( mult, 32 ); // mult2 = [ a | 1-a | 0 | 0 ]
+ mult = _mm_or_si64( mult, mult2 ); // mults = [ a | 1-a | a | 1-a ]
+
+ t1 = _mm_unpackhi_pi16( m1, m2 ); // t1 = [ a2 | a1 | r2 | r1 ]
+ r1 = _mm_madd_pi16( t1, mult ); // r1 = [ ra | rr ]
+
+ t2 = _mm_unpacklo_pi16( m1, m2 ); // t1 = [ g2 | g1 | b2 | b1 ]
+ r2 = _mm_madd_pi16( t2, mult ); // r2 = [ rg | rb ]
+
+ r1 = _mm_srli_pi32( r1, 8 );
+ r2 = _mm_srli_pi32( r2, 8 );
+
+ return _mm_packs_pi32( r2, r1 );
+}
+
+#define ARGB_DECL_ZERO() mmx_t _zero = _mm_setzero_si64()
+#define ARGB_DECL(x) mmx_t x
+#define ARGB_DECL2(x1,x2) mmx_t x1, x2
+#define ARGB_ZERO(x) x = _zero
+#define ARGB_UNPACK(x,v) x = mmx_load8888((v), _zero)
+#define ARGB_PACK(x) mmx_save8888(x, _zero)
+#define ARGB_COPY(x,y) x = y
+#define ARGB_SUM(x1,x2,x3) x1 = _mm_add_pi32(x2, x3)
+#define ARGB_REDUCE(x,red) \
+ ({ \
+ int _red = (red) >> 8; \
+ if (_red < 256) \
+ x = mmx_mulshift( x, _red, 8, _zero ); \
+ })
+
+#define ARGB_INTERP255(x1,x2,x3,alpha) \
+ x1 = mmx_interp255( x2, x3, _zero, (alpha))
+
+#define ARGB_ADDW_11(x1,x2,x3) \
+ ARGB_SUM(x1,x2,x3)
+
+#define ARGB_ADDW_31(x1,x2,x3) \
+ ({ \
+ mmx_t _t1 = _mm_add_pi16(x2, x3); \
+ mmx_t _t2 = _mm_slli_pi16(x2, 1); \
+ x1 = _mm_add_pi16(_t1, _t2); \
+ })
+
+#define ARGB_ADDW_13(x1,x2,x3) \
+ ({ \
+ mmx_t _t1 = _mm_add_pi16(x2, x3); \
+ mmx_t _t2 = _mm_slli_pi16(x3, 1); \
+ x1 = _mm_add_pi16(_t1, _t2); \
+ })
+
+#define ARGB_SHR(x1,x2,s) \
+ x1 = _mm_srli_pi16(x2, s)
+
+
+#define ARGB_MULSHIFT(x1,x2,v,s) \
+ x1 = mmx_mulshift(x2, v, s, _zero)
+
+#define ARGB_DONE _mm_empty()
+
+#define ARGB_RESCALE_SHIFT 10
+#define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT))
+#define ARGB_RESCALE(x,s2) x = mmx_mulshift( x, s2, ARGB_RESCALE_SHIFT, _zero )
+
+#else /* !USE_MMX */
+
+typedef uint32_t argb_t;
+
+#define ARGB_DECL_ZERO() argb_t _zero = 0
+#define ARGB_DECL(x) argb_t x##_ag, x##_rb
+#define ARGB_DECL2(x1,x2) argb_t x1##_ag, x1##_rb, x2##_ag, x2##_rb
+#define ARGB_ZERO(x) (x##_ag = x##_rb = 0)
+#define ARGB_COPY(x,y) (x##_ag = y##_ag, x##_rb = y##_rb)
+
+#define ARGB_UNPACK(x,v) \
+ ({ \
+ argb_t _v = (argb_t)(v); \
+ x##_ag = (_v >> 8) & 0xff00ff; \
+ x##_rb = (_v) & 0xff00ff; \
+ })
+
+#define ARGB_PACK(x) (uint32_t)(((x##_ag) << 8) | x##_rb)
+
+#define ARGB_SUM(x1,x2,x3) \
+ ({ \
+ x1##_ag = x2##_ag + x3##_ag; \
+ x1##_rb = x2##_rb + x3##_rb; \
+ })
+
+#define ARGB_REDUCE(x,red) \
+ ({ \
+ int _red = (red) >> 8; \
+ if (_red < 256) { \
+ x##_ag = ((x##_ag*_red) >> 8) & 0xff00ff; \
+ x##_rb = ((x##_rb*_red) >> 8) & 0xff00ff; \
+ } \
+ })
+
+#define ARGB_INTERP255(x1,x2,x3,alpha) \
+ ({ \
+ int _alpha = (alpha); \
+ int _ialpha; \
+ _alpha += _alpha >> 8; \
+ _ialpha = 256 - _alpha; \
+ x1##_ag = ((x2##_ag*_ialpha + x3##_ag*_alpha) >> 8) & 0xff00ff; \
+ x1##_rb = ((x2##_rb*_ialpha + x3##_rb*_alpha) >> 8) & 0xff00ff; \
+ })
+
+#define ARGB_ADDW_11(x1,x2,x3) \
+ ({ \
+ x1##_ag = (x2##_ag + x3##_ag); \
+ x1##_rb = (x2##_rb + x3##_rb); \
+ })
+
+#define ARGB_ADDW_31(x1,x2,x3) \
+ ({ \
+ x1##_ag = (3*x2##_ag + x3##_ag); \
+ x1##_rb = (3*x2##_rb + x3##_rb); \
+ })
+
+#define ARGB_ADDW_13(x1,x2,x3) \
+ ({ \
+ x1##_ag = (x2##_ag + 3*x3##_ag); \
+ x1##_rb = (x2##_rb + 3*x3##_rb); \
+ })
+
+#define ARGB_MULSHIFT(x1,x2,v,s) \
+ ({ \
+ unsigned _vv = (v); \
+ x1##_ag = ((x2##_ag * _vv) >> (s)) & 0xff00ff; \
+ x1##_rb = ((x2##_rb * _vv) >> (s)) & 0xff00ff; \
+ })
+
+#define ARGB_SHR(x1,x2,s) \
+ ({ \
+ int _s = (s); \
+ x1##_ag = (x2##_ag >> _s) & 0xff00ff; \
+ x1##_rb = (x2##_rb >> _s) & 0xff00ff; \
+ })
+
+#define ARGB_DONE ((void)0)
+
+#define ARGB_RESCALE_SHIFT 8
+#define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT))
+#define ARGB_RESCALE(x,scale2) ARGB_MULSHIFT(x,x,scale2,ARGB_RESCALE_SHIFT)
+
+#endif /* !USE_MMX */
+
+#define ARGB_ADD(x1,x2) ARGB_SUM(x1,x1,x2)
+#define ARGB_READ(x,p) ARGB_UNPACK(x,*(uint32_t*)(p))
+#define ARGB_WRITE(x,p) *(uint32_t*)(p) = ARGB_PACK(x)
+
+#endif /* !ARGB_T_DEFINED */
+
+
+
+#ifdef ARGB_SCALE_GENERIC
+static void
+ARGB_SCALE_GENERIC( ScaleOp* op )
+{
+ int dst_pitch = op->dst_pitch;
+ int src_pitch = op->src_pitch;
+ uint8_t* dst_line = op->dst_line;
+ uint8_t* src_line = op->src_line;
+ ARGB_DECL_SCALE(scale2, op->scale);
+ int h;
+ int sx = op->sx;
+ int sy = op->sy;
+ int ix = op->ix;
+ int iy = op->iy;
+
+ src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch;
+ sx &= 0xffff;
+ sy &= 0xffff;
+
+ for ( h = op->rd.h; h > 0; h-- ) {
+ uint8_t* dst = dst_line;
+ uint8_t* src = src_line;
+ uint8_t* dst_end = dst + 4*op->rd.w;
+ int sx1 = sx;
+ int sy1 = sy;
+
+ for ( ; dst < dst_end; ) {
+ int sx2 = sx1 + ix;
+ int sy2 = sy1 + iy;
+
+ ARGB_DECL_ZERO();
+ ARGB_DECL(spix);
+ ARGB_DECL(pix);
+ ARGB_ZERO(pix);
+
+ /* the current destination pixel maps to the (sx1,sy1)-(sx2,sy2)
+ * source square, we're going to compute the sum of its pixels'
+ * colors... simple box filtering
+ */
+ {
+ int gsy, gsx;
+ for ( gsy = 0; gsy < sy2; gsy += 65536 ) {
+ for ( gsx = 0; gsx < sx2; gsx += 65536 ) {
+ uint8_t* s = src + (gsx >> 16)*4 + (gsy >> 16)*src_pitch;
+ int xmin = gsx, xmax = gsx + 65536, ymin = gsy, ymax = gsy + 65536;
+ unsigned ww, hh;
+ unsigned red;
+
+ if (xmin < sx1) xmin = sx1;
+ if (xmax > sx2) xmax = sx2;
+ if (ymin < sy1) ymin = sy1;
+ if (ymax > sy2) ymax = sy2;
+
+ ww = (unsigned)(xmax-xmin);
+ red = ww;
+
+ hh = (unsigned)(ymax-ymin);
+ red = (hh < 65536) ? (red*hh >> 16U) : red;
+
+ ARGB_READ(spix,s);
+ ARGB_REDUCE(spix,red);
+ ARGB_ADD(pix,spix);
+ }
+ }
+ }
+
+ ARGB_RESCALE(pix,scale2);
+ ARGB_WRITE(pix,dst);
+
+ sx1 = sx2;
+ src += (sx1 >> 16)*4;
+ sx1 &= 0xffff;
+ dst += 4;
+ }
+
+ sy += iy;
+ src_line += (sy >> 16)*src_pitch;
+ sy &= 0xffff;
+
+ dst_line += dst_pitch;
+ }
+ ARGB_DONE;
+}
+#endif
+#undef ARGB_SCALE_GENERIC
+
+
+#ifdef ARGB_SCALE_05_TO_10
+static inline int cross( int x, int y ) {
+ if (x == 65536 && y == 65536)
+ return 65536;
+
+ return (int)((unsigned)x * (unsigned)y >> 16U);
+}
+
+static void
+scale_05_to_10( ScaleOp* op )
+{
+ int dst_pitch = op->dst_pitch;
+ int src_pitch = op->src_pitch;
+ uint8_t* dst_line = op->dst_line;
+ uint8_t* src_line = op->src_line;
+ ARGB_DECL_SCALE(scale2, op->scale);
+ int h;
+ int sx = op->sx;
+ int sy = op->sy;
+ int ix = op->ix;
+ int iy = op->iy;
+
+ src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch;
+ sx &= 0xffff;
+ sy &= 0xffff;
+
+ for ( h = op->rd.h; h > 0; h-- ) {
+ uint8_t* dst = dst_line;
+ uint8_t* src = src_line;
+ uint8_t* dst_end = dst + 4*op->rd.w;
+ int sx1 = sx;
+ int sy1 = sy;
+
+ for ( ; dst < dst_end; ) {
+ int sx2 = sx1 + ix;
+ int sy2 = sy1 + iy;
+
+ ARGB_DECL_ZERO();
+ ARGB_DECL2(spix, pix);
+
+ int off = src_pitch;
+ int fx1 = sx1 & 0xffff;
+ int fx2 = sx2 & 0xffff;
+ int fy1 = sy1 & 0xffff;
+ int fy2 = sy2 & 0xffff;
+
+ int center_x = ((sx1 >> 16) + 1) < ((sx2-1) >> 16);
+ int center_y = ((sy1 >> 16) + 1) < ((sy2-1) >> 16);
+
+ ARGB_ZERO(pix);
+
+ if (fx2 == 0) {
+ fx2 = 65536;
+ }
+ if (fy2 == 0) {
+ fy2 = 65536;
+ }
+ fx1 = 65536 - fx1;
+ fy1 = 65536 - fy1;
+
+ /** TOP BAND
+ **/
+
+ /* top-left pixel */
+ ARGB_READ(spix,src);
+ ARGB_REDUCE(spix,cross(fx1,fy1));
+ ARGB_ADD(pix,spix);
+
+ /* top-center pixel, if any */
+ ARGB_READ(spix,src + 4);
+ if (center_x) {
+ ARGB_REDUCE(spix,fy1);
+ ARGB_ADD(pix,spix);
+ ARGB_READ(spix,src + 8);
+ }
+
+ /* top-right pixel */
+ ARGB_REDUCE(spix,cross(fx2,fy1));
+ ARGB_ADD(pix,spix);
+
+ /** MIDDLE BAND, IF ANY
+ **/
+ if (center_y) {
+ /* left-middle pixel */
+ ARGB_READ(spix,src + off);
+ ARGB_REDUCE(spix,fx1);
+ ARGB_ADD(pix,spix);
+
+ /* center pixel, if any */
+ ARGB_READ(spix,src + off + 4);
+ if (center_x) {
+ ARGB_ADD(pix,spix);
+ ARGB_READ(spix,src + off + 8);
+ }
+
+ /* right-middle pixel */
+ ARGB_REDUCE(spix,fx2);
+ ARGB_ADD(pix,spix);
+
+ off += src_pitch;
+ }
+
+ /** BOTTOM BAND
+ **/
+ /* left-bottom pixel */
+ ARGB_READ(spix,src + off);
+ ARGB_REDUCE(spix,cross(fx1,fy2));
+ ARGB_ADD(pix,spix);
+
+ /* center-bottom, if any */
+ ARGB_READ(spix,src + off + 4);
+ if (center_x) {
+ ARGB_REDUCE(spix,fy2);
+ ARGB_ADD(pix,spix);
+ ARGB_READ(spix,src + off + 8);
+ }
+
+ /* right-bottom pixel */
+ ARGB_REDUCE(spix,cross(fx2,fy2));
+ ARGB_ADD(pix,spix);
+
+ /** WRITE IT
+ **/
+ ARGB_RESCALE(pix,scale2);
+ ARGB_WRITE(pix,dst);
+
+ sx1 = sx2;
+ src += (sx1 >> 16)*4;
+ sx1 &= 0xffff;
+ dst += 4;
+ }
+
+ sy += iy;
+ src_line += (sy >> 16)*src_pitch;
+ sy &= 0xffff;
+
+ dst_line += dst_pitch;
+ }
+ ARGB_DONE;
+}
+#endif
+#undef ARGB_SCALE_05_TO_10
+
+
+#ifdef ARGB_SCALE_UP_BILINEAR
+static void
+scale_up_bilinear( ScaleOp* op )
+{
+ int dst_pitch = op->dst_pitch;
+ int src_pitch = op->src_pitch;
+ uint8_t* dst_line = op->dst_line;
+ uint8_t* src_line = op->src_line;
+ int sx = op->sx;
+ int sy = op->sy;
+ int ix = op->ix;
+ int iy = op->iy;
+ int xlimit, ylimit;
+ int h, sx0;
+
+ /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
+ /* the four nearest source pixels, which are at (0.5,0.5) offsets */
+
+ sx = sx + ix/2 - 32768;
+ sy = sy + iy/2 - 32768;
+
+ xlimit = (op->src_w-1);
+ ylimit = (op->src_h-1);
+
+ sx0 = sx;
+
+ for ( h = op->rd.h; h > 0; h-- ) {
+ uint8_t* dst = dst_line;
+ uint8_t* dst_end = dst + 4*op->rd.w;
+
+ sx = sx0;
+ for ( ; dst < dst_end; ) {
+ int ex1, ex2, ey1, ey2, alpha;
+ uint8_t* s;
+
+ ARGB_DECL_ZERO();
+ ARGB_DECL2(spix1,spix2);
+ ARGB_DECL2(pix3,pix4);
+ ARGB_DECL(pix);
+
+ /* find the four neighbours */
+ ex1 = (sx >> 16);
+ ey1 = (sy >> 16);
+ ex2 = (sx+65535) >> 16;
+ ey2 = (sy+65535) >> 16;
+
+ if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
+ if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
+ if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
+ if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
+
+ ex2 = (ex2-ex1)*4;
+ ey2 = (ey2-ey1)*src_pitch;
+
+ /* interpolate */
+ s = src_line + ex1*4 + ey1*src_pitch;
+ ARGB_READ(spix1, s);
+ ARGB_READ(spix2, s+ex2);
+
+ alpha = (sx >> 8) & 0xff;
+ ARGB_INTERP255(pix3,spix1,spix2,alpha);
+
+ s += ey2;
+ ARGB_READ(spix1, s);
+ ARGB_READ(spix2, s+ex2);
+
+ ARGB_INTERP255(pix4,spix1,spix2,alpha);
+
+ alpha = (sy >> 8) & 0xff;
+ ARGB_INTERP255(pix,pix3,pix4,alpha);
+
+ ARGB_WRITE(pix,dst);
+
+ sx += ix;
+ dst += 4;
+ }
+
+ sy += iy;
+ dst_line += dst_pitch;
+ }
+ ARGB_DONE;
+}
+#endif
+#undef ARGB_SCALE_UP_BILINEAR
+
+#ifdef ARGB_SCALE_UP_QUICK_4x4
+static void
+ARGB_SCALE_UP_QUICK_4x4( ScaleOp* op )
+{
+ int dst_pitch = op->dst_pitch;
+ int src_pitch = op->src_pitch;
+ uint8_t* dst_line = op->dst_line;
+ uint8_t* src_line = op->src_line;
+ int sx = op->sx;
+ int sy = op->sy;
+ int ix = op->ix;
+ int iy = op->iy;
+ int xlimit, ylimit;
+ int h, sx0;
+
+ /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
+ /* the four nearest source pixels, which are at (0.5,0.5) offsets */
+
+ sx = sx + ix/2 - 32768;
+ sy = sy + iy/2 - 32768;
+
+ xlimit = (op->src_w-1);
+ ylimit = (op->src_h-1);
+
+ sx0 = sx;
+
+ for ( h = op->rd.h; h > 0; h-- ) {
+ uint8_t* dst = dst_line;
+ uint8_t* dst_end = dst + 4*op->rd.w;
+
+ sx = sx0;
+ for ( ; dst < dst_end; ) {
+ int ex1, ex2, ey1, ey2;
+ uint8_t* p;
+ ARGB_DECL_ZERO();
+ ARGB_DECL(pix);
+ ARGB_DECL2(spix1, spix2);
+ ARGB_DECL2(pix3, pix4);
+
+ /* find the four neighbours */
+ ex1 = (sx >> 16);
+ ey1 = (sy >> 16);
+ ex2 = (sx+65535) >> 16;
+ ey2 = (sy+65535) >> 16;
+
+ if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
+ if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
+ if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
+ if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
+
+ /* interpolate */
+ p = (src_line + ex1*4 + ey1*src_pitch);
+
+ ex2 = (ex2-ex1)*4;
+ ey2 = (ey2-ey1)*src_pitch;
+
+ switch (((sx >> 14) & 3) | ((sy >> 12) & 12)) {
+ case 0:
+ *(uint32_t*)dst = *(uint32_t*)p;
+ break;
+
+ /* top-line is easy */
+ case 1:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_31(pix,spix1,spix2);
+ ARGB_SHR(pix,pix,2);
+ ARGB_WRITE(pix, dst);
+ break;
+
+ case 2:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_11(pix, spix1, spix2);
+ ARGB_SHR(pix,pix,1);
+ ARGB_WRITE(pix, dst);
+ break;
+
+ case 3:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_13(pix,spix1,spix2);
+ ARGB_SHR(pix,pix,2);
+ ARGB_WRITE(pix, dst);
+ break;
+
+ /* second line is harder */
+ case 4:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ey2);
+ ARGB_ADDW_31(pix,spix1,spix2);
+ ARGB_SHR(pix,pix,2);
+ ARGB_WRITE(pix, dst);
+ break;
+
+ case 5:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_31(pix3,spix1,spix2);
+ p += ey2;
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_31(pix4,spix1,spix2);
+
+ ARGB_ADDW_31(pix,pix3,pix4);
+ ARGB_SHR(pix,pix,4);
+ ARGB_WRITE(pix,dst);
+ break;
+
+ case 6:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_11(pix3,spix1,spix2);
+ p += ey2;
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_11(pix4,spix1,spix2);
+
+ ARGB_ADDW_31(pix,pix3,pix4);
+ ARGB_SHR(pix,pix,3);
+ ARGB_WRITE(pix,dst);
+ break;
+
+ case 7:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_13(pix3,spix1,spix2);
+ p += ey2;
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_13(pix4,spix1,spix2);
+
+ ARGB_ADDW_31(pix,pix3,pix4);
+ ARGB_SHR(pix,pix,4);
+ ARGB_WRITE(pix,dst);
+ break;
+
+ /* third line */
+ case 8:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ey2);
+ ARGB_ADDW_11(pix,spix1,spix2);
+ ARGB_SHR(pix,pix,1);
+ ARGB_WRITE(pix, dst);
+ break;
+
+ case 9:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_31(pix3,spix1,spix2);
+ p += ey2;
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_31(pix4,spix1,spix2);
+
+ ARGB_ADDW_11(pix,pix3,pix4);
+ ARGB_SHR(pix,pix,3);
+ ARGB_WRITE(pix,dst);
+ break;
+
+ case 10:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_11(pix3,spix1,spix2);
+ p += ey2;
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_11(pix4,spix1,spix2);
+
+ ARGB_ADDW_11(pix,pix3,pix4);
+ ARGB_SHR(pix,pix,2);
+ ARGB_WRITE(pix,dst);
+ break;
+
+ case 11:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_13(pix3,spix1,spix2);
+ p += ey2;
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_13(pix4,spix1,spix2);
+
+ ARGB_ADDW_11(pix,pix3,pix4);
+ ARGB_SHR(pix,pix,3);
+ ARGB_WRITE(pix,dst);
+ break;
+
+ /* last line */
+ case 12:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ey2);
+ ARGB_ADDW_13(pix,spix1,spix2);
+ ARGB_SHR(pix,pix,2);
+ ARGB_WRITE(pix, dst);
+ break;
+
+ case 13:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_31(pix3,spix1,spix2);
+ p += ey2;
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_31(pix4,spix1,spix2);
+
+ ARGB_ADDW_13(pix,pix3,pix4);
+ ARGB_SHR(pix,pix,4);
+ ARGB_WRITE(pix,dst);
+ break;
+
+ case 14:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_11(pix3,spix1,spix2);
+ p += ey2;
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_11(pix4,spix1,spix2);
+
+ ARGB_ADDW_13(pix,pix3,pix4);
+ ARGB_SHR(pix,pix,3);
+ ARGB_WRITE(pix,dst);
+ break;
+
+ default:
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_13(pix3,spix1,spix2);
+ p += ey2;
+ ARGB_READ(spix1, p);
+ ARGB_READ(spix2, p+ex2);
+ ARGB_ADDW_13(pix4,spix1,spix2);
+
+ ARGB_ADDW_13(pix,pix3,pix4);
+ ARGB_SHR(pix,pix,4);
+ ARGB_WRITE(pix,dst);
+ }
+ sx += ix;
+ dst += 4;
+ }
+
+ sy += iy;
+ dst_line += dst_pitch;
+ }
+ ARGB_DONE;
+}
+#endif
+#undef ARGB_SCALE_UP_QUICK_4x4
+
+
+#ifdef ARGB_SCALE_NEAREST
+/* this version scales up with nearest neighbours - looks crap */
+static void
+ARGB_SCALE_NEAREST( ScaleOp* op )
+{
+ int dst_pitch = op->dst_pitch;
+ int src_pitch = op->src_pitch;
+ uint8_t* dst_line = op->dst_line;
+ uint8_t* src_line = op->src_line;
+ int sx = op->sx;
+ int sy = op->sy;
+ int ix = op->ix;
+ int iy = op->iy;
+ int xlimit, ylimit;
+ int h, sx0;
+
+ /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
+ /* the four nearest source pixels, which are at (0.5,0.5) offsets */
+
+ sx = sx + ix/2 - 32768;
+ sy = sy + iy/2 - 32768;
+
+ xlimit = (op->src_w-1);
+ ylimit = (op->src_h-1);
+
+ sx0 = sx;
+
+ for ( h = op->rd.h; h > 0; h-- ) {
+ uint8_t* dst = dst_line;
+ uint8_t* dst_end = dst + 4*op->rd.w;
+
+ sx = sx0;
+ for ( ; dst < dst_end; ) {
+ int ex1, ex2, ey1, ey2;
+ unsigned* p;
+
+ /* find the top-left neighbour */
+ ex1 = (sx >> 16);
+ ey1 = (sy >> 16);
+ ex2 = ex1+1;
+ ey2 = ey1+1;
+
+ if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
+ if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
+ if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
+ if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
+
+ p = (unsigned*)(src_line + ex1*4 + ey1*src_pitch);
+ if ((sx & 0xffff) >= 32768)
+ p += (ex2-ex1);
+ if ((sy & 0xffff) >= 32768)
+ p = (unsigned*)((char*)p + (ey2-ey1)*src_pitch);
+
+ *(unsigned*)dst = p[0];
+
+ sx += ix;
+ dst += 4;
+ }
+
+ sy += iy;
+ dst_line += dst_pitch;
+ }
+}
+#endif
+#undef ARGB_SCALE_NEAREST