summaryrefslogtreecommitdiffstats
path: root/libpixelflinger/codeflinger/load_store.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libpixelflinger/codeflinger/load_store.cpp')
-rw-r--r--libpixelflinger/codeflinger/load_store.cpp378
1 files changed, 378 insertions, 0 deletions
diff --git a/libpixelflinger/codeflinger/load_store.cpp b/libpixelflinger/codeflinger/load_store.cpp
new file mode 100644
index 0000000..93c5825
--- /dev/null
+++ b/libpixelflinger/codeflinger/load_store.cpp
@@ -0,0 +1,378 @@
+/* libs/pixelflinger/codeflinger/load_store.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <cutils/log.h>
+
+#include "codeflinger/GGLAssembler.h"
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
+{
+ const int bits = addr.size;
+ const int inc = (flags & WRITE_BACK)?1:0;
+ switch (bits) {
+ case 32:
+ if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
+ else STR(AL, s.reg, addr.reg);
+ break;
+ case 24:
+ // 24 bits formats are a little special and used only for RGB
+ // 0x00BBGGRR is unpacked as R,G,B
+ STRB(AL, s.reg, addr.reg, immed12_pre(0));
+ MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
+ STRB(AL, s.reg, addr.reg, immed12_pre(1));
+ MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
+ STRB(AL, s.reg, addr.reg, immed12_pre(2));
+ if (!(s.flags & CORRUPTIBLE)) {
+ MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
+ }
+ if (inc)
+ ADD(AL, 0, addr.reg, addr.reg, imm(3));
+ break;
+ case 16:
+ if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
+ else STRH(AL, s.reg, addr.reg);
+ break;
+ case 8:
+ if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
+ else STRB(AL, s.reg, addr.reg);
+ break;
+ }
+}
+
+void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
+{
+ Scratch scratches(registerFile());
+ int s0;
+
+ const int bits = addr.size;
+ const int inc = (flags & WRITE_BACK)?1:0;
+ switch (bits) {
+ case 32:
+ if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
+ else LDR(AL, s.reg, addr.reg);
+ break;
+ case 24:
+ // 24 bits formats are a little special and used only for RGB
+ // R,G,B is packed as 0x00BBGGRR
+ s0 = scratches.obtain();
+ if (s.reg != addr.reg) {
+ LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
+ LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
+ ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
+ LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
+ ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
+ } else {
+ int s1 = scratches.obtain();
+ LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
+ LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
+ ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
+ LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
+ ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
+ }
+ if (inc)
+ ADD(AL, 0, addr.reg, addr.reg, imm(3));
+ break;
+ case 16:
+ if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
+ else LDRH(AL, s.reg, addr.reg);
+ break;
+ case 8:
+ if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
+ else LDRB(AL, s.reg, addr.reg);
+ break;
+ }
+}
+
+void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
+{
+ const int maskLen = h-l;
+
+ assert(maskLen<=8);
+ assert(h);
+
+ if (h != bits) {
+ const int mask = ((1<<maskLen)-1) << l;
+ if (isValidImmediate(mask)) {
+ AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
+ } else if (isValidImmediate(~mask)) {
+ BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
+ } else {
+ MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
+ l += 32-h;
+ h = 32;
+ }
+ s = d.reg;
+ }
+
+ if (l) {
+ MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
+ s = d.reg;
+ }
+
+ if (s != d.reg) {
+ MOV(AL, 0, d.reg, s);
+ }
+
+ d.s = maskLen;
+}
+
+void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
+{
+ extract(d, s.reg,
+ s.format.c[component].h,
+ s.format.c[component].l,
+ s.size());
+}
+
+void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
+{
+ integer_t r(d.reg, 32, d.flags);
+ extract(r, s.reg,
+ s.format.c[component].h,
+ s.format.c[component].l,
+ s.size());
+ d = component_t(r);
+}
+
+
+void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
+{
+ if (s.l || (s.flags & CLEAR_HI)) {
+ extract(d, s.reg, s.h, s.l, 32);
+ expand(d, d, dbits);
+ } else {
+ expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
+ }
+}
+
+void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
+{
+ integer_t r(d.reg, 32, d.flags);
+ expand(r, s, dbits);
+ d = component_t(r);
+}
+
+void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
+{
+ assert(src.size());
+
+ int sbits = src.size();
+ int s = src.reg;
+ int d = dst.reg;
+
+ // be sure to set 'dst' after we read 'src' as they may be identical
+ dst.s = dbits;
+ dst.flags = 0;
+
+ if (dbits<=sbits) {
+ if (s != d) {
+ MOV(AL, 0, d, s);
+ }
+ return;
+ }
+
+ if (sbits == 1) {
+ RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
+ // d = (s<<dbits) - s;
+ return;
+ }
+
+ if (dbits % sbits) {
+ MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
+ // d = s << (dbits-sbits);
+ dbits -= sbits;
+ do {
+ ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
+ // d |= d >> sbits;
+ dbits -= sbits;
+ sbits *= 2;
+ } while(dbits>0);
+ return;
+ }
+
+ dbits -= sbits;
+ do {
+ ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
+ // d |= d<<sbits;
+ s = d;
+ dbits -= sbits;
+ if (sbits*2 < dbits) {
+ sbits *= 2;
+ }
+ } while(dbits>0);
+}
+
+void GGLAssembler::downshift(
+ pixel_t& d, int component, component_t s, const reg_t& dither)
+{
+ const needs_t& needs = mBuilderContext.needs;
+ Scratch scratches(registerFile());
+
+ int sh = s.h;
+ int sl = s.l;
+ int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
+ int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
+ int sbits = sh - sl;
+
+ int dh = d.format.c[component].h;
+ int dl = d.format.c[component].l;
+ int dbits = dh - dl;
+ int dithering = 0;
+
+ LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
+
+ if (sbits>dbits) {
+ // see if we need to dither
+ dithering = mDithering;
+ }
+
+ int ireg = d.reg;
+ if (!(d.flags & FIRST)) {
+ if (s.flags & CORRUPTIBLE) {
+ ireg = s.reg;
+ } else {
+ ireg = scratches.obtain();
+ }
+ }
+ d.flags &= ~FIRST;
+
+ if (maskHiBits) {
+ // we need to mask the high bits (and possibly the lowbits too)
+ // and we might be able to use immediate mask.
+ if (!dithering) {
+ // we don't do this if we only have maskLoBits because we can
+ // do it more efficiently below (in the case where dl=0)
+ const int offset = sh - dbits;
+ if (dbits<=8 && offset >= 0) {
+ const uint32_t mask = ((1<<dbits)-1) << offset;
+ if (isValidImmediate(mask) || isValidImmediate(~mask)) {
+ build_and_immediate(ireg, s.reg, mask, 32);
+ sl = offset;
+ s.reg = ireg;
+ sbits = dbits;
+ maskLoBits = maskHiBits = 0;
+ }
+ }
+ } else {
+ // in the dithering case though, we need to preserve the lower bits
+ const uint32_t mask = ((1<<sbits)-1) << sl;
+ if (isValidImmediate(mask) || isValidImmediate(~mask)) {
+ build_and_immediate(ireg, s.reg, mask, 32);
+ s.reg = ireg;
+ maskLoBits = maskHiBits = 0;
+ }
+ }
+ }
+
+ // XXX: we could special case (maskHiBits & !maskLoBits)
+ // like we do for maskLoBits below, but it happens very rarely
+ // that we have maskHiBits only and the conditions necessary to lead
+ // to better code (like doing d |= s << 24)
+
+ if (maskHiBits) {
+ MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
+ sl += 32-sh;
+ sh = 32;
+ s.reg = ireg;
+ maskHiBits = 0;
+ }
+
+ // Downsampling should be performed as follows:
+ // V * ((1<<dbits)-1) / ((1<<sbits)-1)
+ // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
+ // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
+ // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
+ // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
+ //
+ // By approximating (1>>dbits) and (1>>sbits) to 0:
+ //
+ // V>>(sbits-dbits) - V>>sbits
+ //
+ // A good approximation is V>>(sbits-dbits),
+ // but better one (needed for dithering) is:
+ //
+ // (V>>(sbits-dbits)<<sbits - V)>>sbits
+ // (V<<dbits - V)>>sbits
+ // (V - V>>dbits)>>(sbits-dbits)
+
+ // Dithering is done here
+ if (dithering) {
+ comment("dithering");
+ if (sl) {
+ MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
+ sh -= sl;
+ sl = 0;
+ s.reg = ireg;
+ }
+ // scaling (V-V>>dbits)
+ SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
+ const int shift = (GGL_DITHER_BITS - (sbits-dbits));
+ if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
+ else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
+ else ADD(AL, 0, ireg, ireg, dither.reg);
+ s.reg = ireg;
+ }
+
+ if ((maskLoBits|dithering) && (sh > dbits)) {
+ int shift = sh-dbits;
+ if (dl) {
+ MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
+ if (ireg == d.reg) {
+ MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
+ } else {
+ ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
+ }
+ } else {
+ if (ireg == d.reg) {
+ MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
+ } else {
+ ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
+ }
+ }
+ } else {
+ int shift = sh-dh;
+ if (shift>0) {
+ if (ireg == d.reg) {
+ MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
+ } else {
+ ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
+ }
+ } else if (shift<0) {
+ if (ireg == d.reg) {
+ MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
+ } else {
+ ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
+ }
+ } else {
+ if (ireg == d.reg) {
+ if (s.reg != d.reg) {
+ MOV(AL, 0, d.reg, s.reg);
+ }
+ } else {
+ ORR(AL, 0, d.reg, d.reg, s.reg);
+ }
+ }
+ }
+}
+
+}; // namespace android