diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:32:55 -0800 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:32:55 -0800 |
commit | dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0 (patch) | |
tree | 2ba8d1a0846d69b18f623515e8d9b5d9fe38b590 /libpixelflinger/codeflinger/load_store.cpp | |
parent | e54eebbf1a908d65ee8cf80bab62821c05666d70 (diff) | |
download | system_core-dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0.zip system_core-dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0.tar.gz system_core-dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0.tar.bz2 |
auto import from //depot/cupcake/@135843
Diffstat (limited to 'libpixelflinger/codeflinger/load_store.cpp')
-rw-r--r-- | libpixelflinger/codeflinger/load_store.cpp | 378 |
1 files changed, 378 insertions, 0 deletions
diff --git a/libpixelflinger/codeflinger/load_store.cpp b/libpixelflinger/codeflinger/load_store.cpp new file mode 100644 index 0000000..93c5825 --- /dev/null +++ b/libpixelflinger/codeflinger/load_store.cpp @@ -0,0 +1,378 @@ +/* libs/pixelflinger/codeflinger/load_store.cpp +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#include <assert.h> +#include <stdio.h> +#include <cutils/log.h> + +#include "codeflinger/GGLAssembler.h" + +namespace android { + +// ---------------------------------------------------------------------------- + +void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) +{ + const int bits = addr.size; + const int inc = (flags & WRITE_BACK)?1:0; + switch (bits) { + case 32: + if (inc) STR(AL, s.reg, addr.reg, immed12_post(4)); + else STR(AL, s.reg, addr.reg); + break; + case 24: + // 24 bits formats are a little special and used only for RGB + // 0x00BBGGRR is unpacked as R,G,B + STRB(AL, s.reg, addr.reg, immed12_pre(0)); + MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); + STRB(AL, s.reg, addr.reg, immed12_pre(1)); + MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); + STRB(AL, s.reg, addr.reg, immed12_pre(2)); + if (!(s.flags & CORRUPTIBLE)) { + MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16)); + } + if (inc) + ADD(AL, 0, addr.reg, addr.reg, imm(3)); + break; + case 16: + if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2)); + else STRH(AL, s.reg, addr.reg); + break; + case 8: + if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1)); + else STRB(AL, s.reg, addr.reg); + break; + } +} + +void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags) +{ + Scratch scratches(registerFile()); + int s0; + + const int bits = addr.size; + const int inc = (flags & WRITE_BACK)?1:0; + switch (bits) { + case 32: + if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4)); + else LDR(AL, s.reg, addr.reg); + break; + case 24: + // 24 bits formats are a little special and used only for RGB + // R,G,B is packed as 0x00BBGGRR + s0 = scratches.obtain(); + if (s.reg != addr.reg) { + LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R + LDRB(AL, s0, addr.reg, immed12_pre(1)); // G + ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8)); + LDRB(AL, s0, addr.reg, immed12_pre(2)); // B + ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16)); + } else { + int s1 = scratches.obtain(); + LDRB(AL, s1, addr.reg, immed12_pre(0)); // R + LDRB(AL, s0, addr.reg, immed12_pre(1)); // G + ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8)); + LDRB(AL, s0, addr.reg, immed12_pre(2)); // B + ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16)); + } + if (inc) + ADD(AL, 0, addr.reg, addr.reg, imm(3)); + break; + case 16: + if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2)); + else LDRH(AL, s.reg, addr.reg); + break; + case 8: + if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1)); + else LDRB(AL, s.reg, addr.reg); + break; + } +} + +void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) +{ + const int maskLen = h-l; + + assert(maskLen<=8); + assert(h); + + if (h != bits) { + const int mask = ((1<<maskLen)-1) << l; + if (isValidImmediate(mask)) { + AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; + } else if (isValidImmediate(~mask)) { + BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; + } else { + MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); + l += 32-h; + h = 32; + } + s = d.reg; + } + + if (l) { + MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; + s = d.reg; + } + + if (s != d.reg) { + MOV(AL, 0, d.reg, s); + } + + d.s = maskLen; +} + +void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component) +{ + extract(d, s.reg, + s.format.c[component].h, + s.format.c[component].l, + s.size()); +} + +void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) +{ + integer_t r(d.reg, 32, d.flags); + extract(r, s.reg, + s.format.c[component].h, + s.format.c[component].l, + s.size()); + d = component_t(r); +} + + +void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits) +{ + if (s.l || (s.flags & CLEAR_HI)) { + extract(d, s.reg, s.h, s.l, 32); + expand(d, d, dbits); + } else { + expand(d, integer_t(s.reg, s.size(), s.flags), dbits); + } +} + +void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) +{ + integer_t r(d.reg, 32, d.flags); + expand(r, s, dbits); + d = component_t(r); +} + +void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) +{ + assert(src.size()); + + int sbits = src.size(); + int s = src.reg; + int d = dst.reg; + + // be sure to set 'dst' after we read 'src' as they may be identical + dst.s = dbits; + dst.flags = 0; + + if (dbits<=sbits) { + if (s != d) { + MOV(AL, 0, d, s); + } + return; + } + + if (sbits == 1) { + RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); + // d = (s<<dbits) - s; + return; + } + + if (dbits % sbits) { + MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); + // d = s << (dbits-sbits); + dbits -= sbits; + do { + ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); + // d |= d >> sbits; + dbits -= sbits; + sbits *= 2; + } while(dbits>0); + return; + } + + dbits -= sbits; + do { + ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); + // d |= d<<sbits; + s = d; + dbits -= sbits; + if (sbits*2 < dbits) { + sbits *= 2; + } + } while(dbits>0); +} + +void GGLAssembler::downshift( + pixel_t& d, int component, component_t s, const reg_t& dither) +{ + const needs_t& needs = mBuilderContext.needs; + Scratch scratches(registerFile()); + + int sh = s.h; + int sl = s.l; + int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; + int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; + int sbits = sh - sl; + + int dh = d.format.c[component].h; + int dl = d.format.c[component].l; + int dbits = dh - dl; + int dithering = 0; + + LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); + + if (sbits>dbits) { + // see if we need to dither + dithering = mDithering; + } + + int ireg = d.reg; + if (!(d.flags & FIRST)) { + if (s.flags & CORRUPTIBLE) { + ireg = s.reg; + } else { + ireg = scratches.obtain(); + } + } + d.flags &= ~FIRST; + + if (maskHiBits) { + // we need to mask the high bits (and possibly the lowbits too) + // and we might be able to use immediate mask. + if (!dithering) { + // we don't do this if we only have maskLoBits because we can + // do it more efficiently below (in the case where dl=0) + const int offset = sh - dbits; + if (dbits<=8 && offset >= 0) { + const uint32_t mask = ((1<<dbits)-1) << offset; + if (isValidImmediate(mask) || isValidImmediate(~mask)) { + build_and_immediate(ireg, s.reg, mask, 32); + sl = offset; + s.reg = ireg; + sbits = dbits; + maskLoBits = maskHiBits = 0; + } + } + } else { + // in the dithering case though, we need to preserve the lower bits + const uint32_t mask = ((1<<sbits)-1) << sl; + if (isValidImmediate(mask) || isValidImmediate(~mask)) { + build_and_immediate(ireg, s.reg, mask, 32); + s.reg = ireg; + maskLoBits = maskHiBits = 0; + } + } + } + + // XXX: we could special case (maskHiBits & !maskLoBits) + // like we do for maskLoBits below, but it happens very rarely + // that we have maskHiBits only and the conditions necessary to lead + // to better code (like doing d |= s << 24) + + if (maskHiBits) { + MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); + sl += 32-sh; + sh = 32; + s.reg = ireg; + maskHiBits = 0; + } + + // Downsampling should be performed as follows: + // V * ((1<<dbits)-1) / ((1<<sbits)-1) + // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] + // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] + // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits + // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) + // + // By approximating (1>>dbits) and (1>>sbits) to 0: + // + // V>>(sbits-dbits) - V>>sbits + // + // A good approximation is V>>(sbits-dbits), + // but better one (needed for dithering) is: + // + // (V>>(sbits-dbits)<<sbits - V)>>sbits + // (V<<dbits - V)>>sbits + // (V - V>>dbits)>>(sbits-dbits) + + // Dithering is done here + if (dithering) { + comment("dithering"); + if (sl) { + MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); + sh -= sl; + sl = 0; + s.reg = ireg; + } + // scaling (V-V>>dbits) + SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); + const int shift = (GGL_DITHER_BITS - (sbits-dbits)); + if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); + else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); + else ADD(AL, 0, ireg, ireg, dither.reg); + s.reg = ireg; + } + + if ((maskLoBits|dithering) && (sh > dbits)) { + int shift = sh-dbits; + if (dl) { + MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); + if (ireg == d.reg) { + MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); + } else { + ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); + } + } else { + if (ireg == d.reg) { + MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); + } else { + ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); + } + } + } else { + int shift = sh-dh; + if (shift>0) { + if (ireg == d.reg) { + MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); + } else { + ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); + } + } else if (shift<0) { + if (ireg == d.reg) { + MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); + } else { + ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); + } + } else { + if (ireg == d.reg) { + if (s.reg != d.reg) { + MOV(AL, 0, d.reg, s.reg); + } + } else { + ORR(AL, 0, d.reg, d.reg, s.reg); + } + } + } +} + +}; // namespace android |