summaryrefslogtreecommitdiffstats
path: root/libpixelflinger/codeflinger
diff options
context:
space:
mode:
authorThe Android Open Source Project <initial-contribution@android.com>2009-03-03 19:32:55 -0800
committerThe Android Open Source Project <initial-contribution@android.com>2009-03-03 19:32:55 -0800
commitdd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0 (patch)
tree2ba8d1a0846d69b18f623515e8d9b5d9fe38b590 /libpixelflinger/codeflinger
parente54eebbf1a908d65ee8cf80bab62821c05666d70 (diff)
downloadsystem_core-dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0.zip
system_core-dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0.tar.gz
system_core-dd7bc3319deb2b77c5d07a51b7d6cd7e11b5beb0.tar.bz2
auto import from //depot/cupcake/@135843
Diffstat (limited to 'libpixelflinger/codeflinger')
-rw-r--r--libpixelflinger/codeflinger/ARMAssembler.cpp428
-rw-r--r--libpixelflinger/codeflinger/ARMAssembler.h155
-rw-r--r--libpixelflinger/codeflinger/ARMAssemblerInterface.cpp173
-rw-r--r--libpixelflinger/codeflinger/ARMAssemblerInterface.h324
-rw-r--r--libpixelflinger/codeflinger/ARMAssemblerProxy.cpp200
-rw-r--r--libpixelflinger/codeflinger/ARMAssemblerProxy.h123
-rw-r--r--libpixelflinger/codeflinger/CodeCache.cpp151
-rw-r--r--libpixelflinger/codeflinger/CodeCache.h134
-rw-r--r--libpixelflinger/codeflinger/GGLAssembler.cpp1150
-rw-r--r--libpixelflinger/codeflinger/GGLAssembler.h554
-rw-r--r--libpixelflinger/codeflinger/armreg.h300
-rw-r--r--libpixelflinger/codeflinger/blending.cpp682
-rw-r--r--libpixelflinger/codeflinger/disassem.c702
-rw-r--r--libpixelflinger/codeflinger/disassem.h65
-rw-r--r--libpixelflinger/codeflinger/load_store.cpp378
-rw-r--r--libpixelflinger/codeflinger/texturing.cpp1251
16 files changed, 6770 insertions, 0 deletions
diff --git a/libpixelflinger/codeflinger/ARMAssembler.cpp b/libpixelflinger/codeflinger/ARMAssembler.cpp
new file mode 100644
index 0000000..ff7b0b3
--- /dev/null
+++ b/libpixelflinger/codeflinger/ARMAssembler.cpp
@@ -0,0 +1,428 @@
+/* libs/pixelflinger/codeflinger/ARMAssembler.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#define LOG_TAG "ARMAssembler"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cutils/log.h>
+#include <cutils/properties.h>
+
+#if defined(WITH_LIB_HARDWARE)
+#include <hardware_legacy/qemu_tracing.h>
+#endif
+
+#include <private/pixelflinger/ggl_context.h>
+
+#include "codeflinger/ARMAssembler.h"
+#include "codeflinger/CodeCache.h"
+#include "codeflinger/disassem.h"
+
+// ----------------------------------------------------------------------------
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+#if 0
+#pragma mark -
+#pragma mark ARMAssembler...
+#endif
+
+ARMAssembler::ARMAssembler(const sp<Assembly>& assembly)
+ : ARMAssemblerInterface(),
+ mAssembly(assembly)
+{
+ mBase = mPC = (uint32_t *)assembly->base();
+ mDuration = ggl_system_time();
+#if defined(WITH_LIB_HARDWARE)
+ mQemuTracing = true;
+#endif
+}
+
+ARMAssembler::~ARMAssembler()
+{
+}
+
+uint32_t* ARMAssembler::pc() const
+{
+ return mPC;
+}
+
+uint32_t* ARMAssembler::base() const
+{
+ return mBase;
+}
+
+void ARMAssembler::reset()
+{
+ mBase = mPC = (uint32_t *)mAssembly->base();
+ mBranchTargets.clear();
+ mLabels.clear();
+ mLabelsInverseMapping.clear();
+ mComments.clear();
+}
+
+// ----------------------------------------------------------------------------
+
+void ARMAssembler::disassemble(const char* name)
+{
+ if (name) {
+ printf("%s:\n", name);
+ }
+ size_t count = pc()-base();
+ uint32_t* i = base();
+ while (count--) {
+ ssize_t label = mLabelsInverseMapping.indexOfKey(i);
+ if (label >= 0) {
+ printf("%s:\n", mLabelsInverseMapping.valueAt(label));
+ }
+ ssize_t comment = mComments.indexOfKey(i);
+ if (comment >= 0) {
+ printf("; %s\n", mComments.valueAt(comment));
+ }
+ printf("%08x: %08x ", int(i), int(i[0]));
+ ::disassemble((u_int)i);
+ i++;
+ }
+}
+
+void ARMAssembler::comment(const char* string)
+{
+ mComments.add(mPC, string);
+}
+
+void ARMAssembler::label(const char* theLabel)
+{
+ mLabels.add(theLabel, mPC);
+ mLabelsInverseMapping.add(mPC, theLabel);
+}
+
+void ARMAssembler::B(int cc, const char* label)
+{
+ mBranchTargets.add(branch_target_t(label, mPC));
+ *mPC++ = (cc<<28) | (0xA<<24) | 0;
+}
+
+void ARMAssembler::BL(int cc, const char* label)
+{
+ mBranchTargets.add(branch_target_t(label, mPC));
+ *mPC++ = (cc<<28) | (0xB<<24) | 0;
+}
+
+#if 0
+#pragma mark -
+#pragma mark Prolog/Epilog & Generate...
+#endif
+
+
+void ARMAssembler::prolog()
+{
+ // write dummy prolog code
+ mPrologPC = mPC;
+ STM(AL, FD, SP, 1, LSAVED);
+}
+
+void ARMAssembler::epilog(uint32_t touched)
+{
+ touched &= LSAVED;
+ if (touched) {
+ // write prolog code
+ uint32_t* pc = mPC;
+ mPC = mPrologPC;
+ STM(AL, FD, SP, 1, touched | LLR);
+ mPC = pc;
+ // write epilog code
+ LDM(AL, FD, SP, 1, touched | LLR);
+ BX(AL, LR);
+ } else { // heh, no registers to save!
+ // write prolog code
+ uint32_t* pc = mPC;
+ mPC = mPrologPC;
+ MOV(AL, 0, R0, R0); // NOP
+ mPC = pc;
+ // write epilog code
+ BX(AL, LR);
+ }
+}
+
+int ARMAssembler::generate(const char* name)
+{
+ // fixup all the branches
+ size_t count = mBranchTargets.size();
+ while (count--) {
+ const branch_target_t& bt = mBranchTargets[count];
+ uint32_t* target_pc = mLabels.valueFor(bt.label);
+ LOG_ALWAYS_FATAL_IF(!target_pc,
+ "error resolving branch targets, target_pc is null");
+ int32_t offset = int32_t(target_pc - (bt.pc+2));
+ *bt.pc |= offset & 0xFFFFFF;
+ }
+
+ mAssembly->resize( int(pc()-base())*4 );
+
+ // the instruction cache is flushed by CodeCache
+ const int64_t duration = ggl_system_time() - mDuration;
+ const char * const format = "generated %s (%d ins) at [%p:%p] in %lld ns\n";
+ LOGI(format, name, int(pc()-base()), base(), pc(), duration);
+
+#if defined(WITH_LIB_HARDWARE)
+ if (__builtin_expect(mQemuTracing, 0)) {
+ int err = qemu_add_mapping(int(base()), name);
+ mQemuTracing = (err >= 0);
+ }
+#endif
+
+ char value[PROPERTY_VALUE_MAX];
+ property_get("debug.pf.disasm", value, "0");
+ if (atoi(value) != 0) {
+ printf(format, name, int(pc()-base()), base(), pc(), duration);
+ disassemble(name);
+ }
+
+ return NO_ERROR;
+}
+
+uint32_t* ARMAssembler::pcForLabel(const char* label)
+{
+ return mLabels.valueFor(label);
+}
+
+// ----------------------------------------------------------------------------
+
+#if 0
+#pragma mark -
+#pragma mark Data Processing...
+#endif
+
+void ARMAssembler::dataProcessing(int opcode, int cc,
+ int s, int Rd, int Rn, uint32_t Op2)
+{
+ *mPC++ = (cc<<28) | (opcode<<21) | (s<<20) | (Rn<<16) | (Rd<<12) | Op2;
+}
+
+#if 0
+#pragma mark -
+#pragma mark Multiply...
+#endif
+
+// multiply...
+void ARMAssembler::MLA(int cc, int s,
+ int Rd, int Rm, int Rs, int Rn) {
+ if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; }
+ LOG_FATAL_IF(Rd==Rm, "MLA(r%u,r%u,r%u,r%u)", Rd,Rm,Rs,Rn);
+ *mPC++ = (cc<<28) | (1<<21) | (s<<20) |
+ (Rd<<16) | (Rn<<12) | (Rs<<8) | 0x90 | Rm;
+}
+void ARMAssembler::MUL(int cc, int s,
+ int Rd, int Rm, int Rs) {
+ if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; }
+ LOG_FATAL_IF(Rd==Rm, "MUL(r%u,r%u,r%u)", Rd,Rm,Rs);
+ *mPC++ = (cc<<28) | (s<<20) | (Rd<<16) | (Rs<<8) | 0x90 | Rm;
+}
+void ARMAssembler::UMULL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) {
+ LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
+ "UMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
+ *mPC++ = (cc<<28) | (1<<23) | (s<<20) |
+ (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
+}
+void ARMAssembler::UMUAL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) {
+ LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
+ "UMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
+ *mPC++ = (cc<<28) | (1<<23) | (1<<21) | (s<<20) |
+ (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
+}
+void ARMAssembler::SMULL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) {
+ LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
+ "SMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
+ *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (s<<20) |
+ (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
+}
+void ARMAssembler::SMUAL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) {
+ LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
+ "SMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
+ *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (1<<21) | (s<<20) |
+ (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
+}
+
+#if 0
+#pragma mark -
+#pragma mark Branches...
+#endif
+
+// branches...
+void ARMAssembler::B(int cc, uint32_t* pc)
+{
+ int32_t offset = int32_t(pc - (mPC+2));
+ *mPC++ = (cc<<28) | (0xA<<24) | (offset & 0xFFFFFF);
+}
+
+void ARMAssembler::BL(int cc, uint32_t* pc)
+{
+ int32_t offset = int32_t(pc - (mPC+2));
+ *mPC++ = (cc<<28) | (0xB<<24) | (offset & 0xFFFFFF);
+}
+
+void ARMAssembler::BX(int cc, int Rn)
+{
+ *mPC++ = (cc<<28) | 0x12FFF10 | Rn;
+}
+
+#if 0
+#pragma mark -
+#pragma mark Data Transfer...
+#endif
+
+// data transfert...
+void ARMAssembler::LDR(int cc, int Rd, int Rn, uint32_t offset) {
+ *mPC++ = (cc<<28) | (1<<26) | (1<<20) | (Rn<<16) | (Rd<<12) | offset;
+}
+void ARMAssembler::LDRB(int cc, int Rd, int Rn, uint32_t offset) {
+ *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (1<<20) | (Rn<<16) | (Rd<<12) | offset;
+}
+void ARMAssembler::STR(int cc, int Rd, int Rn, uint32_t offset) {
+ *mPC++ = (cc<<28) | (1<<26) | (Rn<<16) | (Rd<<12) | offset;
+}
+void ARMAssembler::STRB(int cc, int Rd, int Rn, uint32_t offset) {
+ *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (Rn<<16) | (Rd<<12) | offset;
+}
+
+void ARMAssembler::LDRH(int cc, int Rd, int Rn, uint32_t offset) {
+ *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xB0 | offset;
+}
+void ARMAssembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset) {
+ *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xD0 | offset;
+}
+void ARMAssembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset) {
+ *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xF0 | offset;
+}
+void ARMAssembler::STRH(int cc, int Rd, int Rn, uint32_t offset) {
+ *mPC++ = (cc<<28) | (Rn<<16) | (Rd<<12) | 0xB0 | offset;
+}
+
+#if 0
+#pragma mark -
+#pragma mark Block Data Transfer...
+#endif
+
+// block data transfer...
+void ARMAssembler::LDM(int cc, int dir,
+ int Rn, int W, uint32_t reg_list)
+{ // ED FD EA FA IB IA DB DA
+ const uint8_t P[8] = { 1, 0, 1, 0, 1, 0, 1, 0 };
+ const uint8_t U[8] = { 1, 1, 0, 0, 1, 1, 0, 0 };
+ *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
+ (uint32_t(U[dir])<<23) | (1<<20) | (W<<21) | (Rn<<16) | reg_list;
+}
+
+void ARMAssembler::STM(int cc, int dir,
+ int Rn, int W, uint32_t reg_list)
+{ // FA EA FD ED IB IA DB DA
+ const uint8_t P[8] = { 0, 1, 0, 1, 1, 0, 1, 0 };
+ const uint8_t U[8] = { 0, 0, 1, 1, 1, 1, 0, 0 };
+ *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
+ (uint32_t(U[dir])<<23) | (0<<20) | (W<<21) | (Rn<<16) | reg_list;
+}
+
+#if 0
+#pragma mark -
+#pragma mark Special...
+#endif
+
+// special...
+void ARMAssembler::SWP(int cc, int Rn, int Rd, int Rm) {
+ *mPC++ = (cc<<28) | (2<<23) | (Rn<<16) | (Rd << 12) | 0x90 | Rm;
+}
+void ARMAssembler::SWPB(int cc, int Rn, int Rd, int Rm) {
+ *mPC++ = (cc<<28) | (2<<23) | (1<<22) | (Rn<<16) | (Rd << 12) | 0x90 | Rm;
+}
+void ARMAssembler::SWI(int cc, uint32_t comment) {
+ *mPC++ = (cc<<28) | (0xF<<24) | comment;
+}
+
+#if 0
+#pragma mark -
+#pragma mark DSP instructions...
+#endif
+
+// DSP instructions...
+void ARMAssembler::PLD(int Rn, uint32_t offset) {
+ LOG_ALWAYS_FATAL_IF(!((offset&(1<<24)) && !(offset&(1<<21))),
+ "PLD only P=1, W=0");
+ *mPC++ = 0xF550F000 | (Rn<<16) | offset;
+}
+
+void ARMAssembler::CLZ(int cc, int Rd, int Rm)
+{
+ *mPC++ = (cc<<28) | 0x16F0F10| (Rd<<12) | Rm;
+}
+
+void ARMAssembler::QADD(int cc, int Rd, int Rm, int Rn)
+{
+ *mPC++ = (cc<<28) | 0x1000050 | (Rn<<16) | (Rd<<12) | Rm;
+}
+
+void ARMAssembler::QDADD(int cc, int Rd, int Rm, int Rn)
+{
+ *mPC++ = (cc<<28) | 0x1400050 | (Rn<<16) | (Rd<<12) | Rm;
+}
+
+void ARMAssembler::QSUB(int cc, int Rd, int Rm, int Rn)
+{
+ *mPC++ = (cc<<28) | 0x1200050 | (Rn<<16) | (Rd<<12) | Rm;
+}
+
+void ARMAssembler::QDSUB(int cc, int Rd, int Rm, int Rn)
+{
+ *mPC++ = (cc<<28) | 0x1600050 | (Rn<<16) | (Rd<<12) | Rm;
+}
+
+void ARMAssembler::SMUL(int cc, int xy,
+ int Rd, int Rm, int Rs)
+{
+ *mPC++ = (cc<<28) | 0x1600080 | (Rd<<16) | (Rs<<8) | (xy<<4) | Rm;
+}
+
+void ARMAssembler::SMULW(int cc, int y,
+ int Rd, int Rm, int Rs)
+{
+ *mPC++ = (cc<<28) | 0x12000A0 | (Rd<<16) | (Rs<<8) | (y<<4) | Rm;
+}
+
+void ARMAssembler::SMLA(int cc, int xy,
+ int Rd, int Rm, int Rs, int Rn)
+{
+ *mPC++ = (cc<<28) | 0x1000080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (xy<<4) | Rm;
+}
+
+void ARMAssembler::SMLAL(int cc, int xy,
+ int RdHi, int RdLo, int Rs, int Rm)
+{
+ *mPC++ = (cc<<28) | 0x1400080 | (RdHi<<16) | (RdLo<<12) | (Rs<<8) | (xy<<4) | Rm;
+}
+
+void ARMAssembler::SMLAW(int cc, int y,
+ int Rd, int Rm, int Rs, int Rn)
+{
+ *mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm;
+}
+
+}; // namespace android
+
diff --git a/libpixelflinger/codeflinger/ARMAssembler.h b/libpixelflinger/codeflinger/ARMAssembler.h
new file mode 100644
index 0000000..8837e07
--- /dev/null
+++ b/libpixelflinger/codeflinger/ARMAssembler.h
@@ -0,0 +1,155 @@
+/* libs/pixelflinger/codeflinger/ARMAssembler.h
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#ifndef ANDROID_ARMASSEMBLER_H
+#define ANDROID_ARMASSEMBLER_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <utils/Vector.h>
+#include <utils/KeyedVector.h>
+
+#include "tinyutils/smartpointer.h"
+#include "codeflinger/ARMAssemblerInterface.h"
+#include "codeflinger/CodeCache.h"
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+class ARMAssembler : public ARMAssemblerInterface
+{
+public:
+ ARMAssembler(const sp<Assembly>& assembly);
+ virtual ~ARMAssembler();
+
+ uint32_t* base() const;
+ uint32_t* pc() const;
+
+
+ void disassemble(const char* name);
+
+ // ------------------------------------------------------------------------
+ // ARMAssemblerInterface...
+ // ------------------------------------------------------------------------
+
+ virtual void reset();
+
+ virtual int generate(const char* name);
+
+ virtual void prolog();
+ virtual void epilog(uint32_t touched);
+ virtual void comment(const char* string);
+
+ virtual void dataProcessing(int opcode, int cc, int s,
+ int Rd, int Rn,
+ uint32_t Op2);
+ virtual void MLA(int cc, int s,
+ int Rd, int Rm, int Rs, int Rn);
+ virtual void MUL(int cc, int s,
+ int Rd, int Rm, int Rs);
+ virtual void UMULL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs);
+ virtual void UMUAL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs);
+ virtual void SMULL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs);
+ virtual void SMUAL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs);
+
+ virtual void B(int cc, uint32_t* pc);
+ virtual void BL(int cc, uint32_t* pc);
+ virtual void BX(int cc, int Rn);
+ virtual void label(const char* theLabel);
+ virtual void B(int cc, const char* label);
+ virtual void BL(int cc, const char* label);
+
+ virtual uint32_t* pcForLabel(const char* label);
+
+ virtual void LDR (int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0));
+ virtual void LDRB(int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0));
+ virtual void STR (int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0));
+ virtual void STRB(int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0));
+ virtual void LDRH (int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0));
+ virtual void LDRSB(int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0));
+ virtual void LDRSH(int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0));
+ virtual void STRH (int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0));
+ virtual void LDM(int cc, int dir,
+ int Rn, int W, uint32_t reg_list);
+ virtual void STM(int cc, int dir,
+ int Rn, int W, uint32_t reg_list);
+
+ virtual void SWP(int cc, int Rn, int Rd, int Rm);
+ virtual void SWPB(int cc, int Rn, int Rd, int Rm);
+ virtual void SWI(int cc, uint32_t comment);
+
+ virtual void PLD(int Rn, uint32_t offset);
+ virtual void CLZ(int cc, int Rd, int Rm);
+ virtual void QADD(int cc, int Rd, int Rm, int Rn);
+ virtual void QDADD(int cc, int Rd, int Rm, int Rn);
+ virtual void QSUB(int cc, int Rd, int Rm, int Rn);
+ virtual void QDSUB(int cc, int Rd, int Rm, int Rn);
+ virtual void SMUL(int cc, int xy,
+ int Rd, int Rm, int Rs);
+ virtual void SMULW(int cc, int y,
+ int Rd, int Rm, int Rs);
+ virtual void SMLA(int cc, int xy,
+ int Rd, int Rm, int Rs, int Rn);
+ virtual void SMLAL(int cc, int xy,
+ int RdHi, int RdLo, int Rs, int Rm);
+ virtual void SMLAW(int cc, int y,
+ int Rd, int Rm, int Rs, int Rn);
+
+private:
+ ARMAssembler(const ARMAssembler& rhs);
+ ARMAssembler& operator = (const ARMAssembler& rhs);
+
+ sp<Assembly> mAssembly;
+ uint32_t* mBase;
+ uint32_t* mPC;
+ uint32_t* mPrologPC;
+ int64_t mDuration;
+#if defined(WITH_LIB_HARDWARE)
+ bool mQemuTracing;
+#endif
+
+ struct branch_target_t {
+ inline branch_target_t() : label(0), pc(0) { }
+ inline branch_target_t(const char* l, uint32_t* p)
+ : label(l), pc(p) { }
+ const char* label;
+ uint32_t* pc;
+ };
+
+ Vector<branch_target_t> mBranchTargets;
+ KeyedVector< const char*, uint32_t* > mLabels;
+ KeyedVector< uint32_t*, const char* > mLabelsInverseMapping;
+ KeyedVector< uint32_t*, const char* > mComments;
+};
+
+}; // namespace android
+
+#endif //ANDROID_ARMASSEMBLER_H
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
new file mode 100644
index 0000000..7fa0de0
--- /dev/null
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
@@ -0,0 +1,173 @@
+/* libs/pixelflinger/codeflinger/ARMAssemblerInterface.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <cutils/log.h>
+#include "codeflinger/ARMAssemblerInterface.h"
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+ARMAssemblerInterface::~ARMAssemblerInterface()
+{
+}
+
+int ARMAssemblerInterface::buildImmediate(
+ uint32_t immediate, uint32_t& rot, uint32_t& imm)
+{
+ rot = 0;
+ imm = immediate;
+ if (imm > 0x7F) { // skip the easy cases
+ while (!(imm&3) || (imm&0xFC000000)) {
+ uint32_t newval;
+ newval = imm >> 2;
+ newval |= (imm&3) << 30;
+ imm = newval;
+ rot += 2;
+ if (rot == 32) {
+ rot = 0;
+ break;
+ }
+ }
+ }
+ rot = (16 - (rot>>1)) & 0xF;
+
+ if (imm>=0x100)
+ return -EINVAL;
+
+ if (((imm>>(rot<<1)) | (imm<<(32-(rot<<1)))) != immediate)
+ return -1;
+
+ return 0;
+}
+
+// shifters...
+
+bool ARMAssemblerInterface::isValidImmediate(uint32_t immediate)
+{
+ uint32_t rot, imm;
+ return buildImmediate(immediate, rot, imm) == 0;
+}
+
+uint32_t ARMAssemblerInterface::imm(uint32_t immediate)
+{
+ uint32_t rot, imm;
+ int err = buildImmediate(immediate, rot, imm);
+
+ LOG_ALWAYS_FATAL_IF(err==-EINVAL,
+ "immediate %08x cannot be encoded",
+ immediate);
+
+ LOG_ALWAYS_FATAL_IF(err,
+ "immediate (%08x) encoding bogus!",
+ immediate);
+
+ return (1<<25) | (rot<<8) | imm;
+}
+
+uint32_t ARMAssemblerInterface::reg_imm(int Rm, int type, uint32_t shift)
+{
+ return ((shift&0x1F)<<7) | ((type&0x3)<<5) | (Rm&0xF);
+}
+
+uint32_t ARMAssemblerInterface::reg_rrx(int Rm)
+{
+ return (ROR<<5) | (Rm&0xF);
+}
+
+uint32_t ARMAssemblerInterface::reg_reg(int Rm, int type, int Rs)
+{
+ return ((Rs&0xF)<<8) | ((type&0x3)<<5) | (1<<4) | (Rm&0xF);
+}
+
+// addressing modes...
+// LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0)
+uint32_t ARMAssemblerInterface::immed12_pre(int32_t immed12, int W)
+{
+ LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
+ "LDR(B)/STR(B)/PLD immediate too big (%08x)",
+ immed12);
+ return (1<<24) | (((uint32_t(immed12)>>31)^1)<<23) |
+ ((W&1)<<21) | (abs(immed12)&0x7FF);
+}
+
+uint32_t ARMAssemblerInterface::immed12_post(int32_t immed12)
+{
+ LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
+ "LDR(B)/STR(B)/PLD immediate too big (%08x)",
+ immed12);
+
+ return (((uint32_t(immed12)>>31)^1)<<23) | (abs(immed12)&0x7FF);
+}
+
+uint32_t ARMAssemblerInterface::reg_scale_pre(int Rm, int type,
+ uint32_t shift, int W)
+{
+ return (1<<25) | (1<<24) |
+ (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) |
+ reg_imm(abs(Rm), type, shift);
+}
+
+uint32_t ARMAssemblerInterface::reg_scale_post(int Rm, int type, uint32_t shift)
+{
+ return (1<<25) | (((uint32_t(Rm)>>31)^1)<<23) | reg_imm(abs(Rm), type, shift);
+}
+
+// LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0)
+uint32_t ARMAssemblerInterface::immed8_pre(int32_t immed8, int W)
+{
+ uint32_t offset = abs(immed8);
+
+ LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
+ "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
+ immed8);
+
+ return (1<<24) | (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) |
+ ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF));
+}
+
+uint32_t ARMAssemblerInterface::immed8_post(int32_t immed8)
+{
+ uint32_t offset = abs(immed8);
+
+ LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
+ "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
+ immed8);
+
+ return (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) |
+ (((offset&0xF0)<<4) | (offset&0xF));
+}
+
+uint32_t ARMAssemblerInterface::reg_pre(int Rm, int W)
+{
+ return (1<<24) | (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | (abs(Rm)&0xF);
+}
+
+uint32_t ARMAssemblerInterface::reg_post(int Rm)
+{
+ return (((uint32_t(Rm)>>31)^1)<<23) | (abs(Rm)&0xF);
+}
+
+
+}; // namespace android
+
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
new file mode 100644
index 0000000..465b3bd
--- /dev/null
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
@@ -0,0 +1,324 @@
+/* libs/pixelflinger/codeflinger/ARMAssemblerInterface.h
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+
+#ifndef ANDROID_ARMASSEMBLER_INTERFACE_H
+#define ANDROID_ARMASSEMBLER_INTERFACE_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+class ARMAssemblerInterface
+{
+public:
+ virtual ~ARMAssemblerInterface();
+
+ enum {
+ EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV,
+ HS = CS,
+ LO = CC
+ };
+ enum {
+ S = 1
+ };
+ enum {
+ LSL, LSR, ASR, ROR
+ };
+ enum {
+ ED, FD, EA, FA,
+ IB, IA, DB, DA
+ };
+ enum {
+ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15,
+ SP = R13,
+ LR = R14,
+ PC = R15
+ };
+ enum {
+ #define LIST(rr) L##rr=1<<rr
+ LIST(R0), LIST(R1), LIST(R2), LIST(R3), LIST(R4), LIST(R5), LIST(R6),
+ LIST(R7), LIST(R8), LIST(R9), LIST(R10), LIST(R11), LIST(R12),
+ LIST(R13), LIST(R14), LIST(R15),
+ LIST(SP), LIST(LR), LIST(PC),
+ #undef LIST
+ LSAVED = LR4|LR5|LR6|LR7|LR8|LR9|LR10|LR11 | LLR
+ };
+
+ // -----------------------------------------------------------------------
+ // shifters and addressing modes
+ // -----------------------------------------------------------------------
+
+ // shifters...
+ static bool isValidImmediate(uint32_t immed);
+ static int buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm);
+
+ static uint32_t imm(uint32_t immediate);
+ static uint32_t reg_imm(int Rm, int type, uint32_t shift);
+ static uint32_t reg_rrx(int Rm);
+ static uint32_t reg_reg(int Rm, int type, int Rs);
+
+ // addressing modes...
+ // LDR(B)/STR(B)/PLD
+ // (immediate and Rm can be negative, which indicates U=0)
+ static uint32_t immed12_pre(int32_t immed12, int W=0);
+ static uint32_t immed12_post(int32_t immed12);
+ static uint32_t reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0);
+ static uint32_t reg_scale_post(int Rm, int type=0, uint32_t shift=0);
+
+ // LDRH/LDRSB/LDRSH/STRH
+ // (immediate and Rm can be negative, which indicates U=0)
+ static uint32_t immed8_pre(int32_t immed8, int W=0);
+ static uint32_t immed8_post(int32_t immed8);
+ static uint32_t reg_pre(int Rm, int W=0);
+ static uint32_t reg_post(int Rm);
+
+ // -----------------------------------------------------------------------
+ // basic instructions & code generation
+ // -----------------------------------------------------------------------
+
+ // generate the code
+ virtual void reset() = 0;
+ virtual int generate(const char* name) = 0;
+ virtual void disassemble(const char* name) = 0;
+
+ // construct prolog and epilog
+ virtual void prolog() = 0;
+ virtual void epilog(uint32_t touched) = 0;
+ virtual void comment(const char* string) = 0;
+
+ // data processing...
+ enum {
+ opAND, opEOR, opSUB, opRSB, opADD, opADC, opSBC, opRSC,
+ opTST, opTEQ, opCMP, opCMN, opORR, opMOV, opBIC, opMVN
+ };
+
+ virtual void
+ dataProcessing( int opcode, int cc, int s,
+ int Rd, int Rn,
+ uint32_t Op2) = 0;
+
+ // multiply...
+ virtual void MLA(int cc, int s,
+ int Rd, int Rm, int Rs, int Rn) = 0;
+ virtual void MUL(int cc, int s,
+ int Rd, int Rm, int Rs) = 0;
+ virtual void UMULL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) = 0;
+ virtual void UMUAL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) = 0;
+ virtual void SMULL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) = 0;
+ virtual void SMUAL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) = 0;
+
+ // branches...
+ virtual void B(int cc, uint32_t* pc) = 0;
+ virtual void BL(int cc, uint32_t* pc) = 0;
+ virtual void BX(int cc, int Rn) = 0;
+
+ virtual void label(const char* theLabel) = 0;
+ virtual void B(int cc, const char* label) = 0;
+ virtual void BL(int cc, const char* label) = 0;
+
+ // valid only after generate() has been called
+ virtual uint32_t* pcForLabel(const char* label) = 0;
+
+ // data transfer...
+ virtual void LDR (int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0)) = 0;
+ virtual void LDRB(int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0)) = 0;
+ virtual void STR (int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0)) = 0;
+ virtual void STRB(int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0)) = 0;
+
+ virtual void LDRH (int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0)) = 0;
+ virtual void LDRSB(int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0)) = 0;
+ virtual void LDRSH(int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0)) = 0;
+ virtual void STRH (int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0)) = 0;
+
+ // block data transfer...
+ virtual void LDM(int cc, int dir,
+ int Rn, int W, uint32_t reg_list) = 0;
+ virtual void STM(int cc, int dir,
+ int Rn, int W, uint32_t reg_list) = 0;
+
+ // special...
+ virtual void SWP(int cc, int Rn, int Rd, int Rm) = 0;
+ virtual void SWPB(int cc, int Rn, int Rd, int Rm) = 0;
+ virtual void SWI(int cc, uint32_t comment) = 0;
+
+ // DSP instructions...
+ enum {
+ // B=0, T=1
+ // yx
+ xyBB = 0, // 0000,
+ xyTB = 2, // 0010,
+ xyBT = 4, // 0100,
+ xyTT = 6, // 0110,
+ yB = 0, // 0000,
+ yT = 4, // 0100
+ };
+
+ virtual void PLD(int Rn, uint32_t offset) = 0;
+
+ virtual void CLZ(int cc, int Rd, int Rm) = 0;
+
+ virtual void QADD(int cc, int Rd, int Rm, int Rn) = 0;
+ virtual void QDADD(int cc, int Rd, int Rm, int Rn) = 0;
+ virtual void QSUB(int cc, int Rd, int Rm, int Rn) = 0;
+ virtual void QDSUB(int cc, int Rd, int Rm, int Rn) = 0;
+
+ virtual void SMUL(int cc, int xy,
+ int Rd, int Rm, int Rs) = 0;
+ virtual void SMULW(int cc, int y,
+ int Rd, int Rm, int Rs) = 0;
+ virtual void SMLA(int cc, int xy,
+ int Rd, int Rm, int Rs, int Rn) = 0;
+ virtual void SMLAL(int cc, int xy,
+ int RdHi, int RdLo, int Rs, int Rm) = 0;
+ virtual void SMLAW(int cc, int y,
+ int Rd, int Rm, int Rs, int Rn) = 0;
+
+ // -----------------------------------------------------------------------
+ // convenience...
+ // -----------------------------------------------------------------------
+ inline void
+ ADC(int cc, int s, int Rd, int Rn, uint32_t Op2) {
+ dataProcessing(opADC, cc, s, Rd, Rn, Op2);
+ }
+ inline void
+ ADD(int cc, int s, int Rd, int Rn, uint32_t Op2) {
+ dataProcessing(opADD, cc, s, Rd, Rn, Op2);
+ }
+ inline void
+ AND(int cc, int s, int Rd, int Rn, uint32_t Op2) {
+ dataProcessing(opAND, cc, s, Rd, Rn, Op2);
+ }
+ inline void
+ BIC(int cc, int s, int Rd, int Rn, uint32_t Op2) {
+ dataProcessing(opBIC, cc, s, Rd, Rn, Op2);
+ }
+ inline void
+ EOR(int cc, int s, int Rd, int Rn, uint32_t Op2) {
+ dataProcessing(opEOR, cc, s, Rd, Rn, Op2);
+ }
+ inline void
+ MOV(int cc, int s, int Rd, uint32_t Op2) {
+ dataProcessing(opMOV, cc, s, Rd, 0, Op2);
+ }
+ inline void
+ MVN(int cc, int s, int Rd, uint32_t Op2) {
+ dataProcessing(opMVN, cc, s, Rd, 0, Op2);
+ }
+ inline void
+ ORR(int cc, int s, int Rd, int Rn, uint32_t Op2) {
+ dataProcessing(opORR, cc, s, Rd, Rn, Op2);
+ }
+ inline void
+ RSB(int cc, int s, int Rd, int Rn, uint32_t Op2) {
+ dataProcessing(opRSB, cc, s, Rd, Rn, Op2);
+ }
+ inline void
+ RSC(int cc, int s, int Rd, int Rn, uint32_t Op2) {
+ dataProcessing(opRSC, cc, s, Rd, Rn, Op2);
+ }
+ inline void
+ SBC(int cc, int s, int Rd, int Rn, uint32_t Op2) {
+ dataProcessing(opSBC, cc, s, Rd, Rn, Op2);
+ }
+ inline void
+ SUB(int cc, int s, int Rd, int Rn, uint32_t Op2) {
+ dataProcessing(opSUB, cc, s, Rd, Rn, Op2);
+ }
+ inline void
+ TEQ(int cc, int Rn, uint32_t Op2) {
+ dataProcessing(opTEQ, cc, 1, 0, Rn, Op2);
+ }
+ inline void
+ TST(int cc, int Rn, uint32_t Op2) {
+ dataProcessing(opTST, cc, 1, 0, Rn, Op2);
+ }
+ inline void
+ CMP(int cc, int Rn, uint32_t Op2) {
+ dataProcessing(opCMP, cc, 1, 0, Rn, Op2);
+ }
+ inline void
+ CMN(int cc, int Rn, uint32_t Op2) {
+ dataProcessing(opCMN, cc, 1, 0, Rn, Op2);
+ }
+
+ inline void SMULBB(int cc, int Rd, int Rm, int Rs) {
+ SMUL(cc, xyBB, Rd, Rm, Rs); }
+ inline void SMULTB(int cc, int Rd, int Rm, int Rs) {
+ SMUL(cc, xyTB, Rd, Rm, Rs); }
+ inline void SMULBT(int cc, int Rd, int Rm, int Rs) {
+ SMUL(cc, xyBT, Rd, Rm, Rs); }
+ inline void SMULTT(int cc, int Rd, int Rm, int Rs) {
+ SMUL(cc, xyTT, Rd, Rm, Rs); }
+
+ inline void SMULWB(int cc, int Rd, int Rm, int Rs) {
+ SMULW(cc, yB, Rd, Rm, Rs); }
+ inline void SMULWT(int cc, int Rd, int Rm, int Rs) {
+ SMULW(cc, yT, Rd, Rm, Rs); }
+
+ inline void
+ SMLABB(int cc, int Rd, int Rm, int Rs, int Rn) {
+ SMLA(cc, xyBB, Rd, Rm, Rs, Rn); }
+ inline void
+ SMLATB(int cc, int Rd, int Rm, int Rs, int Rn) {
+ SMLA(cc, xyTB, Rd, Rm, Rs, Rn); }
+ inline void
+ SMLABT(int cc, int Rd, int Rm, int Rs, int Rn) {
+ SMLA(cc, xyBT, Rd, Rm, Rs, Rn); }
+ inline void
+ SMLATT(int cc, int Rd, int Rm, int Rs, int Rn) {
+ SMLA(cc, xyTT, Rd, Rm, Rs, Rn); }
+
+ inline void
+ SMLALBB(int cc, int RdHi, int RdLo, int Rs, int Rm) {
+ SMLAL(cc, xyBB, RdHi, RdLo, Rs, Rm); }
+ inline void
+ SMLALTB(int cc, int RdHi, int RdLo, int Rs, int Rm) {
+ SMLAL(cc, xyTB, RdHi, RdLo, Rs, Rm); }
+ inline void
+ SMLALBT(int cc, int RdHi, int RdLo, int Rs, int Rm) {
+ SMLAL(cc, xyBT, RdHi, RdLo, Rs, Rm); }
+ inline void
+ SMLALTT(int cc, int RdHi, int RdLo, int Rs, int Rm) {
+ SMLAL(cc, xyTT, RdHi, RdLo, Rs, Rm); }
+
+ inline void
+ SMLAWB(int cc, int Rd, int Rm, int Rs, int Rn) {
+ SMLAW(cc, yB, Rd, Rm, Rs, Rn); }
+ inline void
+ SMLAWT(int cc, int Rd, int Rm, int Rs, int Rn) {
+ SMLAW(cc, yT, Rd, Rm, Rs, Rn); }
+};
+
+}; // namespace android
+
+#endif //ANDROID_ARMASSEMBLER_INTERFACE_H
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
new file mode 100644
index 0000000..18c4618
--- /dev/null
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
@@ -0,0 +1,200 @@
+/* libs/pixelflinger/codeflinger/ARMAssemblerProxy.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "codeflinger/ARMAssemblerProxy.h"
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+ARMAssemblerProxy::ARMAssemblerProxy()
+ : mTarget(0)
+{
+}
+
+ARMAssemblerProxy::ARMAssemblerProxy(ARMAssemblerInterface* target)
+ : mTarget(target)
+{
+}
+
+ARMAssemblerProxy::~ARMAssemblerProxy()
+{
+ delete mTarget;
+}
+
+void ARMAssemblerProxy::setTarget(ARMAssemblerInterface* target)
+{
+ delete mTarget;
+ mTarget = target;
+}
+
+void ARMAssemblerProxy::reset() {
+ mTarget->reset();
+}
+int ARMAssemblerProxy::generate(const char* name) {
+ return mTarget->generate(name);
+}
+void ARMAssemblerProxy::disassemble(const char* name) {
+ return mTarget->disassemble(name);
+}
+void ARMAssemblerProxy::prolog() {
+ mTarget->prolog();
+}
+void ARMAssemblerProxy::epilog(uint32_t touched) {
+ mTarget->epilog(touched);
+}
+void ARMAssemblerProxy::comment(const char* string) {
+ mTarget->comment(string);
+}
+
+
+void ARMAssemblerProxy::dataProcessing( int opcode, int cc, int s,
+ int Rd, int Rn, uint32_t Op2)
+{
+ mTarget->dataProcessing(opcode, cc, s, Rd, Rn, Op2);
+}
+
+void ARMAssemblerProxy::MLA(int cc, int s, int Rd, int Rm, int Rs, int Rn) {
+ mTarget->MLA(cc, s, Rd, Rm, Rs, Rn);
+}
+void ARMAssemblerProxy::MUL(int cc, int s, int Rd, int Rm, int Rs) {
+ mTarget->MUL(cc, s, Rd, Rm, Rs);
+}
+void ARMAssemblerProxy::UMULL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) {
+ mTarget->UMULL(cc, s, RdLo, RdHi, Rm, Rs);
+}
+void ARMAssemblerProxy::UMUAL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) {
+ mTarget->UMUAL(cc, s, RdLo, RdHi, Rm, Rs);
+}
+void ARMAssemblerProxy::SMULL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) {
+ mTarget->SMULL(cc, s, RdLo, RdHi, Rm, Rs);
+}
+void ARMAssemblerProxy::SMUAL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs) {
+ mTarget->SMUAL(cc, s, RdLo, RdHi, Rm, Rs);
+}
+
+void ARMAssemblerProxy::B(int cc, uint32_t* pc) {
+ mTarget->B(cc, pc);
+}
+void ARMAssemblerProxy::BL(int cc, uint32_t* pc) {
+ mTarget->BL(cc, pc);
+}
+void ARMAssemblerProxy::BX(int cc, int Rn) {
+ mTarget->BX(cc, Rn);
+}
+void ARMAssemblerProxy::label(const char* theLabel) {
+ mTarget->label(theLabel);
+}
+void ARMAssemblerProxy::B(int cc, const char* label) {
+ mTarget->B(cc, label);
+}
+void ARMAssemblerProxy::BL(int cc, const char* label) {
+ mTarget->BL(cc, label);
+}
+
+uint32_t* ARMAssemblerProxy::pcForLabel(const char* label) {
+ return mTarget->pcForLabel(label);
+}
+
+void ARMAssemblerProxy::LDR(int cc, int Rd, int Rn, uint32_t offset) {
+ mTarget->LDR(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::LDRB(int cc, int Rd, int Rn, uint32_t offset) {
+ mTarget->LDRB(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::STR(int cc, int Rd, int Rn, uint32_t offset) {
+ mTarget->STR(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::STRB(int cc, int Rd, int Rn, uint32_t offset) {
+ mTarget->STRB(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::LDRH(int cc, int Rd, int Rn, uint32_t offset) {
+ mTarget->LDRH(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::LDRSB(int cc, int Rd, int Rn, uint32_t offset) {
+ mTarget->LDRSB(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::LDRSH(int cc, int Rd, int Rn, uint32_t offset) {
+ mTarget->LDRSH(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::STRH(int cc, int Rd, int Rn, uint32_t offset) {
+ mTarget->STRH(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::LDM(int cc, int dir, int Rn, int W, uint32_t reg_list) {
+ mTarget->LDM(cc, dir, Rn, W, reg_list);
+}
+void ARMAssemblerProxy::STM(int cc, int dir, int Rn, int W, uint32_t reg_list) {
+ mTarget->STM(cc, dir, Rn, W, reg_list);
+}
+
+void ARMAssemblerProxy::SWP(int cc, int Rn, int Rd, int Rm) {
+ mTarget->SWP(cc, Rn, Rd, Rm);
+}
+void ARMAssemblerProxy::SWPB(int cc, int Rn, int Rd, int Rm) {
+ mTarget->SWPB(cc, Rn, Rd, Rm);
+}
+void ARMAssemblerProxy::SWI(int cc, uint32_t comment) {
+ mTarget->SWI(cc, comment);
+}
+
+
+void ARMAssemblerProxy::PLD(int Rn, uint32_t offset) {
+ mTarget->PLD(Rn, offset);
+}
+void ARMAssemblerProxy::CLZ(int cc, int Rd, int Rm) {
+ mTarget->CLZ(cc, Rd, Rm);
+}
+void ARMAssemblerProxy::QADD(int cc, int Rd, int Rm, int Rn) {
+ mTarget->QADD(cc, Rd, Rm, Rn);
+}
+void ARMAssemblerProxy::QDADD(int cc, int Rd, int Rm, int Rn) {
+ mTarget->QDADD(cc, Rd, Rm, Rn);
+}
+void ARMAssemblerProxy::QSUB(int cc, int Rd, int Rm, int Rn) {
+ mTarget->QSUB(cc, Rd, Rm, Rn);
+}
+void ARMAssemblerProxy::QDSUB(int cc, int Rd, int Rm, int Rn) {
+ mTarget->QDSUB(cc, Rd, Rm, Rn);
+}
+void ARMAssemblerProxy::SMUL(int cc, int xy, int Rd, int Rm, int Rs) {
+ mTarget->SMUL(cc, xy, Rd, Rm, Rs);
+}
+void ARMAssemblerProxy::SMULW(int cc, int y, int Rd, int Rm, int Rs) {
+ mTarget->SMULW(cc, y, Rd, Rm, Rs);
+}
+void ARMAssemblerProxy::SMLA(int cc, int xy, int Rd, int Rm, int Rs, int Rn) {
+ mTarget->SMLA(cc, xy, Rd, Rm, Rs, Rn);
+}
+void ARMAssemblerProxy::SMLAL( int cc, int xy,
+ int RdHi, int RdLo, int Rs, int Rm) {
+ mTarget->SMLAL(cc, xy, RdHi, RdLo, Rs, Rm);
+}
+void ARMAssemblerProxy::SMLAW(int cc, int y, int Rd, int Rm, int Rs, int Rn) {
+ mTarget->SMLAW(cc, y, Rd, Rm, Rs, Rn);
+}
+
+
+}; // namespace android
+
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.h b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
new file mode 100644
index 0000000..4bdca9c
--- /dev/null
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
@@ -0,0 +1,123 @@
+/* libs/pixelflinger/codeflinger/ARMAssemblerProxy.h
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+
+#ifndef ANDROID_ARMASSEMBLER_PROXY_H
+#define ANDROID_ARMASSEMBLER_PROXY_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "codeflinger/ARMAssemblerInterface.h"
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+class ARMAssemblerProxy : public ARMAssemblerInterface
+{
+public:
+ // ARMAssemblerProxy take ownership of the target
+
+ ARMAssemblerProxy();
+ ARMAssemblerProxy(ARMAssemblerInterface* target);
+ virtual ~ARMAssemblerProxy();
+
+ void setTarget(ARMAssemblerInterface* target);
+
+ virtual void reset();
+ virtual int generate(const char* name);
+ virtual void disassemble(const char* name);
+
+ virtual void prolog();
+ virtual void epilog(uint32_t touched);
+ virtual void comment(const char* string);
+
+ virtual void dataProcessing(int opcode, int cc, int s,
+ int Rd, int Rn,
+ uint32_t Op2);
+ virtual void MLA(int cc, int s,
+ int Rd, int Rm, int Rs, int Rn);
+ virtual void MUL(int cc, int s,
+ int Rd, int Rm, int Rs);
+ virtual void UMULL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs);
+ virtual void UMUAL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs);
+ virtual void SMULL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs);
+ virtual void SMUAL(int cc, int s,
+ int RdLo, int RdHi, int Rm, int Rs);
+
+ virtual void B(int cc, uint32_t* pc);
+ virtual void BL(int cc, uint32_t* pc);
+ virtual void BX(int cc, int Rn);
+ virtual void label(const char* theLabel);
+ virtual void B(int cc, const char* label);
+ virtual void BL(int cc, const char* label);
+
+ uint32_t* pcForLabel(const char* label);
+
+ virtual void LDR (int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0));
+ virtual void LDRB(int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0));
+ virtual void STR (int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0));
+ virtual void STRB(int cc, int Rd,
+ int Rn, uint32_t offset = immed12_pre(0));
+ virtual void LDRH (int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0));
+ virtual void LDRSB(int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0));
+ virtual void LDRSH(int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0));
+ virtual void STRH (int cc, int Rd,
+ int Rn, uint32_t offset = immed8_pre(0));
+ virtual void LDM(int cc, int dir,
+ int Rn, int W, uint32_t reg_list);
+ virtual void STM(int cc, int dir,
+ int Rn, int W, uint32_t reg_list);
+
+ virtual void SWP(int cc, int Rn, int Rd, int Rm);
+ virtual void SWPB(int cc, int Rn, int Rd, int Rm);
+ virtual void SWI(int cc, uint32_t comment);
+
+ virtual void PLD(int Rn, uint32_t offset);
+ virtual void CLZ(int cc, int Rd, int Rm);
+ virtual void QADD(int cc, int Rd, int Rm, int Rn);
+ virtual void QDADD(int cc, int Rd, int Rm, int Rn);
+ virtual void QSUB(int cc, int Rd, int Rm, int Rn);
+ virtual void QDSUB(int cc, int Rd, int Rm, int Rn);
+ virtual void SMUL(int cc, int xy,
+ int Rd, int Rm, int Rs);
+ virtual void SMULW(int cc, int y,
+ int Rd, int Rm, int Rs);
+ virtual void SMLA(int cc, int xy,
+ int Rd, int Rm, int Rs, int Rn);
+ virtual void SMLAL(int cc, int xy,
+ int RdHi, int RdLo, int Rs, int Rm);
+ virtual void SMLAW(int cc, int y,
+ int Rd, int Rm, int Rs, int Rn);
+
+private:
+ ARMAssemblerInterface* mTarget;
+};
+
+}; // namespace android
+
+#endif //ANDROID_ARMASSEMBLER_PROXY_H
diff --git a/libpixelflinger/codeflinger/CodeCache.cpp b/libpixelflinger/codeflinger/CodeCache.cpp
new file mode 100644
index 0000000..29410c8
--- /dev/null
+++ b/libpixelflinger/codeflinger/CodeCache.cpp
@@ -0,0 +1,151 @@
+/* libs/pixelflinger/codeflinger/CodeCache.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <cutils/log.h>
+#include <cutils/atomic.h>
+
+#include "codeflinger/CodeCache.h"
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+#if defined(__arm__)
+#include <unistd.h>
+#include <errno.h>
+#endif
+
+// ----------------------------------------------------------------------------
+
+Assembly::Assembly(size_t size)
+ : mCount(1), mSize(0)
+{
+ mBase = (uint32_t*)malloc(size);
+ if (mBase) {
+ mSize = size;
+ }
+}
+
+Assembly::~Assembly()
+{
+ free(mBase);
+}
+
+void Assembly::incStrong(const void*) const
+{
+ android_atomic_inc(&mCount);
+}
+
+void Assembly::decStrong(const void*) const
+{
+ if (android_atomic_dec(&mCount) == 1) {
+ delete this;
+ }
+}
+
+ssize_t Assembly::size() const
+{
+ if (!mBase) return NO_MEMORY;
+ return mSize;
+}
+
+uint32_t* Assembly::base() const
+{
+ return mBase;
+}
+
+ssize_t Assembly::resize(size_t newSize)
+{
+ mBase = (uint32_t*)realloc(mBase, newSize);
+ mSize = newSize;
+ return size();
+}
+
+// ----------------------------------------------------------------------------
+
+CodeCache::CodeCache(size_t size)
+ : mCacheSize(size), mCacheInUse(0)
+{
+ pthread_mutex_init(&mLock, 0);
+}
+
+CodeCache::~CodeCache()
+{
+ pthread_mutex_destroy(&mLock);
+}
+
+sp<Assembly> CodeCache::lookup(const AssemblyKeyBase& keyBase) const
+{
+ pthread_mutex_lock(&mLock);
+ sp<Assembly> r;
+ ssize_t index = mCacheData.indexOfKey(key_t(keyBase));
+ if (index >= 0) {
+ const cache_entry_t& e = mCacheData.valueAt(index);
+ e.when = mWhen++;
+ r = e.entry;
+ }
+ pthread_mutex_unlock(&mLock);
+ return r;
+}
+
+int CodeCache::cache( const AssemblyKeyBase& keyBase,
+ const sp<Assembly>& assembly)
+{
+ pthread_mutex_lock(&mLock);
+
+ const ssize_t assemblySize = assembly->size();
+ while (mCacheInUse + assemblySize > mCacheSize) {
+ // evict the LRU
+ size_t lru = 0;
+ size_t count = mCacheData.size();
+ for (size_t i=0 ; i<count ; i++) {
+ const cache_entry_t& e = mCacheData.valueAt(i);
+ if (e.when < mCacheData.valueAt(lru).when) {
+ lru = i;
+ }
+ }
+ const cache_entry_t& e = mCacheData.valueAt(lru);
+ mCacheInUse -= e.entry->size();
+ mCacheData.removeItemsAt(lru);
+ }
+
+ ssize_t err = mCacheData.add(key_t(keyBase), cache_entry_t(assembly, mWhen));
+ if (err >= 0) {
+ mCacheInUse += assemblySize;
+ mWhen++;
+ // synchronize caches...
+#if defined(__arm__)
+ const long base = long(assembly->base());
+ const long curr = base + long(assembly->size());
+ err = cacheflush(base, curr, 0);
+ LOGE_IF(err, "__ARM_NR_cacheflush error %s\n",
+ strerror(errno));
+#endif
+ }
+
+ pthread_mutex_unlock(&mLock);
+ return err;
+}
+
+// ----------------------------------------------------------------------------
+
+}; // namespace android
diff --git a/libpixelflinger/codeflinger/CodeCache.h b/libpixelflinger/codeflinger/CodeCache.h
new file mode 100644
index 0000000..370ce17
--- /dev/null
+++ b/libpixelflinger/codeflinger/CodeCache.h
@@ -0,0 +1,134 @@
+/* libs/pixelflinger/codeflinger/CodeCache.h
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+
+#ifndef ANDROID_CODECACHE_H
+#define ANDROID_CODECACHE_H
+
+#include <stdint.h>
+#include <pthread.h>
+#include <sys/types.h>
+
+#include <utils/KeyedVector.h>
+
+#include "tinyutils/smartpointer.h"
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+class AssemblyKeyBase {
+public:
+ virtual ~AssemblyKeyBase() { }
+ virtual int compare_type(const AssemblyKeyBase& key) const = 0;
+};
+
+template <typename T>
+class AssemblyKey : public AssemblyKeyBase
+{
+public:
+ AssemblyKey(const T& rhs) : mKey(rhs) { }
+ virtual int compare_type(const AssemblyKeyBase& key) const {
+ const T& rhs = static_cast<const AssemblyKey&>(key).mKey;
+ return android::compare_type(mKey, rhs);
+ }
+private:
+ T mKey;
+};
+
+// ----------------------------------------------------------------------------
+
+class Assembly
+{
+public:
+ Assembly(size_t size);
+ virtual ~Assembly();
+
+ ssize_t size() const;
+ uint32_t* base() const;
+ ssize_t resize(size_t size);
+
+ // protocol for sp<>
+ void incStrong(const void* id) const;
+ void decStrong(const void* id) const;
+ typedef void weakref_type;
+
+private:
+ mutable int32_t mCount;
+ uint32_t* mBase;
+ ssize_t mSize;
+};
+
+// ----------------------------------------------------------------------------
+
+class CodeCache
+{
+public:
+// pretty simple cache API...
+ CodeCache(size_t size);
+ ~CodeCache();
+
+ sp<Assembly> lookup(const AssemblyKeyBase& key) const;
+
+ int cache( const AssemblyKeyBase& key,
+ const sp<Assembly>& assembly);
+
+private:
+ // nothing to see here...
+ struct cache_entry_t {
+ inline cache_entry_t() { }
+ inline cache_entry_t(const sp<Assembly>& a, int64_t w)
+ : entry(a), when(w) { }
+ sp<Assembly> entry;
+ mutable int64_t when;
+ };
+
+ class key_t {
+ friend int compare_type(
+ const key_value_pair_t<key_t, cache_entry_t>&,
+ const key_value_pair_t<key_t, cache_entry_t>&);
+ const AssemblyKeyBase* mKey;
+ public:
+ key_t() { };
+ key_t(const AssemblyKeyBase& k) : mKey(&k) { }
+ };
+
+ mutable pthread_mutex_t mLock;
+ mutable int64_t mWhen;
+ size_t mCacheSize;
+ size_t mCacheInUse;
+ KeyedVector<key_t, cache_entry_t> mCacheData;
+
+ friend int compare_type(
+ const key_value_pair_t<key_t, cache_entry_t>&,
+ const key_value_pair_t<key_t, cache_entry_t>&);
+};
+
+// KeyedVector uses compare_type(), which is more efficient, than
+// just using operator < ()
+inline int compare_type(
+ const key_value_pair_t<CodeCache::key_t, CodeCache::cache_entry_t>& lhs,
+ const key_value_pair_t<CodeCache::key_t, CodeCache::cache_entry_t>& rhs)
+{
+ return lhs.key.mKey->compare_type(*(rhs.key.mKey));
+}
+
+// ----------------------------------------------------------------------------
+
+}; // namespace android
+
+#endif //ANDROID_CODECACHE_H
diff --git a/libpixelflinger/codeflinger/GGLAssembler.cpp b/libpixelflinger/codeflinger/GGLAssembler.cpp
new file mode 100644
index 0000000..1cd189c
--- /dev/null
+++ b/libpixelflinger/codeflinger/GGLAssembler.cpp
@@ -0,0 +1,1150 @@
+/* libs/pixelflinger/codeflinger/GGLAssembler.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#define LOG_TAG "GGLAssembler"
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <cutils/log.h>
+
+#include "codeflinger/GGLAssembler.h"
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
+ : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7)
+{
+}
+
+GGLAssembler::~GGLAssembler()
+{
+}
+
+void GGLAssembler::prolog()
+{
+ ARMAssemblerProxy::prolog();
+}
+
+void GGLAssembler::epilog(uint32_t touched)
+{
+ ARMAssemblerProxy::epilog(touched);
+}
+
+void GGLAssembler::reset(int opt_level)
+{
+ ARMAssemblerProxy::reset();
+ RegisterAllocator::reset();
+ mOptLevel = opt_level;
+}
+
+// ---------------------------------------------------------------------------
+
+int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
+{
+ int err = 0;
+ int opt_level = mOptLevel;
+ while (opt_level >= 0) {
+ reset(opt_level);
+ err = scanline_core(needs, c);
+ if (err == 0)
+ break;
+ opt_level--;
+ }
+
+ // XXX: in theory, pcForLabel is not valid before generate()
+ uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
+ uint32_t* fragment_end_pc = pcForLabel("epilog");
+ const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
+
+ // build a name for our pipeline
+ char name[64];
+ sprintf(name,
+ "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
+ needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
+
+ if (err) {
+ LOGE("Error while generating ""%s""\n", name);
+ disassemble(name);
+ return -1;
+ }
+
+ return generate(name);
+}
+
+int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
+{
+ int64_t duration = ggl_system_time();
+
+ mBlendFactorCached = 0;
+ mBlending = 0;
+ mMasking = 0;
+ mAA = GGL_READ_NEEDS(P_AA, needs.p);
+ mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
+ mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
+ mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
+ mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
+ mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0;
+ mBuilderContext.needs = needs;
+ mBuilderContext.c = c;
+ mBuilderContext.Rctx = reserveReg(R0); // context always in R0
+ mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
+
+ // ------------------------------------------------------------------------
+
+ decodeLogicOpNeeds(needs);
+
+ decodeTMUNeeds(needs, c);
+
+ mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
+ mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
+ mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
+ mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
+
+ if (!mCbFormat.c[GGLFormat::ALPHA].h) {
+ if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
+ (mBlendSrc == GGL_DST_ALPHA)) {
+ mBlendSrc = GGL_ONE;
+ }
+ if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
+ (mBlendSrcA == GGL_DST_ALPHA)) {
+ mBlendSrcA = GGL_ONE;
+ }
+ if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
+ (mBlendDst == GGL_DST_ALPHA)) {
+ mBlendDst = GGL_ONE;
+ }
+ if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
+ (mBlendDstA == GGL_DST_ALPHA)) {
+ mBlendDstA = GGL_ONE;
+ }
+ }
+
+ // if we need the framebuffer, read it now
+ const int blending = blending_codes(mBlendSrc, mBlendDst) |
+ blending_codes(mBlendSrcA, mBlendDstA);
+
+ // XXX: handle special cases, destination not modified...
+ if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
+ (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
+ // Destination unmodified (beware of logic ops)
+ } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
+ (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
+ // Destination is zero (beware of logic ops)
+ }
+
+ int fbComponents = 0;
+ const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
+ for (int i=0 ; i<4 ; i++) {
+ const int mask = 1<<i;
+ component_info_t& info = mInfo[i];
+ int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
+ int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
+ if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
+ fs = GGL_ONE;
+ info.masked = !!(masking & mask);
+ info.inDest = !info.masked && mCbFormat.c[i].h &&
+ ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
+ if (mCbFormat.components >= GGL_LUMINANCE &&
+ (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
+ info.inDest = false;
+ }
+ info.needed = (i==GGLFormat::ALPHA) &&
+ (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
+ info.replaced = !!(mTextureMachine.replaced & mask);
+ info.iterated = (!info.replaced && (info.inDest || info.needed));
+ info.smooth = mSmooth && info.iterated;
+ info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA);
+ info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
+
+ mBlending |= (info.blend ? mask : 0);
+ mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
+ fbComponents |= mCbFormat.c[i].h ? mask : 0;
+ }
+
+ mAllMasked = (mMasking == fbComponents);
+ if (mAllMasked) {
+ mDithering = 0;
+ }
+
+ fragment_parts_t parts;
+
+ // ------------------------------------------------------------------------
+ prolog();
+ // ------------------------------------------------------------------------
+
+ build_scanline_prolog(parts, needs);
+
+ if (registerFile().status())
+ return registerFile().status();
+
+ // ------------------------------------------------------------------------
+ label("fragment_loop");
+ // ------------------------------------------------------------------------
+ {
+ Scratch regs(registerFile());
+
+ if (mDithering) {
+ // update the dither index.
+ MOV(AL, 0, parts.count.reg,
+ reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
+ ADD(AL, 0, parts.count.reg, parts.count.reg,
+ imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
+ MOV(AL, 0, parts.count.reg,
+ reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
+ }
+
+ // XXX: could we do an early alpha-test here in some cases?
+ // It would probaly be used only with smooth-alpha and no texture
+ // (or no alpha component in the texture).
+
+ // Early z-test
+ if (mAlphaTest==GGL_ALWAYS) {
+ build_depth_test(parts, Z_TEST|Z_WRITE);
+ } else {
+ // we cannot do the z-write here, because
+ // it might be killed by the alpha-test later
+ build_depth_test(parts, Z_TEST);
+ }
+
+ { // texture coordinates
+ Scratch scratches(registerFile());
+
+ // texel generation
+ build_textures(parts, regs);
+ }
+
+ if ((blending & (FACTOR_DST|BLEND_DST)) ||
+ (mMasking && !mAllMasked) ||
+ (mLogicOp & LOGIC_OP_DST))
+ {
+ // blending / logic_op / masking need the framebuffer
+ mDstPixel.setTo(regs.obtain(), &mCbFormat);
+
+ // load the framebuffer pixel
+ comment("fetch color-buffer");
+ load(parts.cbPtr, mDstPixel);
+ }
+
+ if (registerFile().status())
+ return registerFile().status();
+
+ pixel_t pixel;
+ int directTex = mTextureMachine.directTexture;
+ if (directTex | parts.packed) {
+ // note: we can't have both here
+ // iterated color or direct texture
+ pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
+ pixel.flags &= ~CORRUPTIBLE;
+ } else {
+ if (mDithering) {
+ const int ctxtReg = mBuilderContext.Rctx;
+ const int mask = GGL_DITHER_SIZE-1;
+ parts.dither = reg_t(regs.obtain());
+ AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
+ ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
+ LDRB(AL, parts.dither.reg, parts.dither.reg,
+ immed12_pre(GGL_OFFSETOF(ditherMatrix)));
+ }
+
+ // allocate a register for the resulting pixel
+ pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
+
+ build_component(pixel, parts, GGLFormat::ALPHA, regs);
+
+ if (mAlphaTest!=GGL_ALWAYS) {
+ // only handle the z-write part here. We know z-test
+ // was successful, as well as alpha-test.
+ build_depth_test(parts, Z_WRITE);
+ }
+
+ build_component(pixel, parts, GGLFormat::RED, regs);
+ build_component(pixel, parts, GGLFormat::GREEN, regs);
+ build_component(pixel, parts, GGLFormat::BLUE, regs);
+
+ pixel.flags |= CORRUPTIBLE;
+ }
+
+ if (registerFile().status())
+ return registerFile().status();
+
+ if (pixel.reg == -1) {
+ // be defensive here. if we're here it's probably
+ // that this whole fragment is a no-op.
+ pixel = mDstPixel;
+ }
+
+ if (!mAllMasked) {
+ // logic operation
+ build_logic_op(pixel, regs);
+
+ // masking
+ build_masking(pixel, regs);
+
+ comment("store");
+ store(parts.cbPtr, pixel, WRITE_BACK);
+ }
+ }
+
+ if (registerFile().status())
+ return registerFile().status();
+
+ // update the iterated color...
+ if (parts.reload != 3) {
+ build_smooth_shade(parts);
+ }
+
+ // update iterated z
+ build_iterate_z(parts);
+
+ // update iterated fog
+ build_iterate_f(parts);
+
+ SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
+ B(PL, "fragment_loop");
+ label("epilog");
+ epilog(registerFile().touched());
+
+ if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
+ if (mDepthTest!=GGL_ALWAYS) {
+ label("discard_before_textures");
+ build_iterate_texture_coordinates(parts);
+ }
+ label("discard_after_textures");
+ build_smooth_shade(parts);
+ build_iterate_z(parts);
+ build_iterate_f(parts);
+ if (!mAllMasked) {
+ ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
+ }
+ SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
+ B(PL, "fragment_loop");
+ epilog(registerFile().touched());
+ }
+
+ return registerFile().status();
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::build_scanline_prolog(
+ fragment_parts_t& parts, const needs_t& needs)
+{
+ Scratch scratches(registerFile());
+ int Rctx = mBuilderContext.Rctx;
+
+ // compute count
+ comment("compute ct (# of pixels to process)");
+ parts.count.setTo(obtainReg());
+ int Rx = scratches.obtain();
+ int Ry = scratches.obtain();
+ CONTEXT_LOAD(Rx, iterators.xl);
+ CONTEXT_LOAD(parts.count.reg, iterators.xr);
+ CONTEXT_LOAD(Ry, iterators.y);
+
+ // parts.count = iterators.xr - Rx
+ SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
+ SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
+
+ if (mDithering) {
+ // parts.count.reg = 0xNNNNXXDD
+ // NNNN = count-1
+ // DD = dither offset
+ // XX = 0xxxxxxx (x = garbage)
+ Scratch scratches(registerFile());
+ int tx = scratches.obtain();
+ int ty = scratches.obtain();
+ AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
+ AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
+ ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
+ ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
+ } else {
+ // parts.count.reg = 0xNNNN0000
+ // NNNN = count-1
+ MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
+ }
+
+ if (!mAllMasked) {
+ // compute dst ptr
+ comment("compute color-buffer pointer");
+ const int cb_bits = mCbFormat.size*8;
+ int Rs = scratches.obtain();
+ parts.cbPtr.setTo(obtainReg(), cb_bits);
+ CONTEXT_LOAD(Rs, state.buffers.color.stride);
+ CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
+ SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs
+ base_offset(parts.cbPtr, parts.cbPtr, Rs);
+ scratches.recycle(Rs);
+ }
+
+ // init fog
+ const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
+ if (need_fog) {
+ comment("compute initial fog coordinate");
+ Scratch scratches(registerFile());
+ int dfdx = scratches.obtain();
+ int ydfdy = scratches.obtain();
+ int f = ydfdy;
+ CONTEXT_LOAD(dfdx, generated_vars.dfdx);
+ CONTEXT_LOAD(ydfdy, iterators.ydfdy);
+ MLA(AL, 0, f, Rx, dfdx, ydfdy);
+ CONTEXT_STORE(f, generated_vars.f);
+ }
+
+ // init Z coordinate
+ if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
+ parts.z = reg_t(obtainReg());
+ comment("compute initial Z coordinate");
+ Scratch scratches(registerFile());
+ int dzdx = scratches.obtain();
+ int ydzdy = parts.z.reg;
+ CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point
+ CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point
+ MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
+
+ // we're going to index zbase of parts.count
+ // zbase = base + (xl-count + stride*y)*2
+ int Rs = dzdx;
+ int zbase = scratches.obtain();
+ CONTEXT_LOAD(Rs, state.buffers.depth.stride);
+ CONTEXT_LOAD(zbase, state.buffers.depth.data);
+ SMLABB(AL, Rs, Ry, Rs, Rx);
+ ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
+ ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
+ CONTEXT_STORE(zbase, generated_vars.zbase);
+ }
+
+ // init texture coordinates
+ init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
+ scratches.recycle(Ry);
+
+ // iterated color
+ init_iterated_color(parts, reg_t(Rx));
+
+ // init coverage factor application (anti-aliasing)
+ if (mAA) {
+ parts.covPtr.setTo(obtainReg(), 16);
+ CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
+ ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
+ }
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::build_component( pixel_t& pixel,
+ const fragment_parts_t& parts,
+ int component,
+ Scratch& regs)
+{
+ static char const * comments[] = {"alpha", "red", "green", "blue"};
+ comment(comments[component]);
+
+ // local register file
+ Scratch scratches(registerFile());
+ const int dst_component_size = pixel.component_size(component);
+
+ component_t temp(-1);
+ build_incoming_component( temp, dst_component_size,
+ parts, component, scratches, regs);
+
+ if (mInfo[component].inDest) {
+
+ // blending...
+ build_blending( temp, mDstPixel, component, scratches );
+
+ // downshift component and rebuild pixel...
+ downshift(pixel, component, temp, parts.dither);
+ }
+}
+
+void GGLAssembler::build_incoming_component(
+ component_t& temp,
+ int dst_size,
+ const fragment_parts_t& parts,
+ int component,
+ Scratch& scratches,
+ Scratch& global_regs)
+{
+ const uint32_t component_mask = 1<<component;
+
+ // Figure out what we need for the blending stage...
+ int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
+ int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
+ if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
+ fs = GGL_ONE;
+ }
+
+ // Figure out what we need to extract and for what reason
+ const int blending = blending_codes(fs, fd);
+
+ // Are we actually going to blend?
+ const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
+
+ // expand the source if the destination has more bits
+ int need_expander = false;
+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
+ texture_unit_t& tmu = mTextureMachine.tmu[i];
+ if ((tmu.format_idx) &&
+ (parts.texel[i].component_size(component) < dst_size)) {
+ need_expander = true;
+ }
+ }
+
+ // do we need to extract this component?
+ const bool multiTexture = mTextureMachine.activeUnits > 1;
+ const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
+ (isAlphaSourceNeeded());
+ int need_extract = mInfo[component].needed;
+ if (mInfo[component].inDest)
+ {
+ need_extract |= ((need_blending ?
+ (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
+ need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
+ need_extract |= mInfo[component].smooth;
+ need_extract |= mInfo[component].fog;
+ need_extract |= mDithering;
+ need_extract |= multiTexture;
+ }
+
+ if (need_extract) {
+ Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
+ component_t fragment;
+
+ // iterated color
+ build_iterated_color(fragment, parts, component, regs);
+
+ // texture environement (decal, modulate, replace)
+ build_texture_environment(fragment, parts, component, regs);
+
+ // expand the source if the destination has more bits
+ if (need_expander && (fragment.size() < dst_size)) {
+ // we're here only if we fetched a texel
+ // (so we know for sure fragment is CORRUPTIBLE)
+ expand(fragment, fragment, dst_size);
+ }
+
+ // We have a few specific things to do for the alpha-channel
+ if ((component==GGLFormat::ALPHA) &&
+ (mInfo[component].needed || fragment.size()<dst_size))
+ {
+ // convert to integer_t first and make sure
+ // we don't corrupt a needed register
+ if (fragment.l) {
+ component_t incoming(fragment);
+ modify(fragment, regs);
+ MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
+ fragment.h -= fragment.l;
+ fragment.l = 0;
+ }
+
+ // coverage factor application
+ build_coverage_application(fragment, parts, regs);
+
+ // alpha-test
+ build_alpha_test(fragment, parts);
+
+ if (blend_needs_alpha_source) {
+ // We keep only 8 bits for the blending stage
+ const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
+ if (fragment.flags & CORRUPTIBLE) {
+ fragment.flags &= ~CORRUPTIBLE;
+ mAlphaSource.setTo(fragment.reg,
+ fragment.size(), fragment.flags);
+ if (shift) {
+ MOV(AL, 0, mAlphaSource.reg,
+ reg_imm(mAlphaSource.reg, LSR, shift));
+ }
+ } else {
+ // XXX: it would better to do this in build_blend_factor()
+ // so we can avoid the extra MOV below.
+ mAlphaSource.setTo(regs.obtain(),
+ fragment.size(), CORRUPTIBLE);
+ if (shift) {
+ MOV(AL, 0, mAlphaSource.reg,
+ reg_imm(fragment.reg, LSR, shift));
+ } else {
+ MOV(AL, 0, mAlphaSource.reg, fragment.reg);
+ }
+ }
+ mAlphaSource.s -= shift;
+ }
+ }
+
+ // fog...
+ build_fog( fragment, component, regs );
+
+ temp = fragment;
+ } else {
+ if (mInfo[component].inDest) {
+ // extraction not needed and replace
+ // we just select the right component
+ if ((mTextureMachine.replaced & component_mask) == 0) {
+ // component wasn't replaced, so use it!
+ temp = component_t(parts.iterated, component);
+ }
+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
+ if ((tmu.mask & component_mask) &&
+ ((tmu.replaced & component_mask) == 0)) {
+ temp = component_t(parts.texel[i], component);
+ }
+ }
+ }
+ }
+}
+
+bool GGLAssembler::isAlphaSourceNeeded() const
+{
+ // XXX: also needed for alpha-test
+ const int bs = mBlendSrc;
+ const int bd = mBlendDst;
+ return bs==GGL_SRC_ALPHA_SATURATE ||
+ bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
+ bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
+{
+ if (mSmooth && !parts.iterated_packed) {
+ // update the iterated color in a pipelined way...
+ comment("update iterated color");
+ Scratch scratches(registerFile());
+
+ const int reload = parts.reload;
+ for (int i=0 ; i<4 ; i++) {
+ if (!mInfo[i].iterated)
+ continue;
+
+ int c = parts.argb[i].reg;
+ int dx = parts.argb_dx[i].reg;
+
+ if (reload & 1) {
+ c = scratches.obtain();
+ CONTEXT_LOAD(c, generated_vars.argb[i].c);
+ }
+ if (reload & 2) {
+ dx = scratches.obtain();
+ CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
+ }
+
+ if (mSmooth) {
+ ADD(AL, 0, c, c, dx);
+ }
+
+ if (reload & 1) {
+ CONTEXT_STORE(c, generated_vars.argb[i].c);
+ scratches.recycle(c);
+ }
+ if (reload & 2) {
+ scratches.recycle(dx);
+ }
+ }
+ }
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::build_coverage_application(component_t& fragment,
+ const fragment_parts_t& parts, Scratch& regs)
+{
+ // here fragment.l is guarenteed to be 0
+ if (mAA) {
+ // coverages are 1.15 fixed-point numbers
+ comment("coverage application");
+
+ component_t incoming(fragment);
+ modify(fragment, regs);
+
+ Scratch scratches(registerFile());
+ int cf = scratches.obtain();
+ LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
+ if (fragment.h > 31) {
+ fragment.h--;
+ SMULWB(AL, fragment.reg, incoming.reg, cf);
+ } else {
+ MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
+ SMULWB(AL, fragment.reg, fragment.reg, cf);
+ }
+ }
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::build_alpha_test(component_t& fragment,
+ const fragment_parts_t& parts)
+{
+ if (mAlphaTest != GGL_ALWAYS) {
+ comment("Alpha Test");
+ Scratch scratches(registerFile());
+ int ref = scratches.obtain();
+ const int shift = GGL_COLOR_BITS-fragment.size();
+ CONTEXT_LOAD(ref, state.alpha_test.ref);
+ if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
+ else CMP(AL, fragment.reg, ref);
+ int cc = NV;
+ switch (mAlphaTest) {
+ case GGL_NEVER: cc = NV; break;
+ case GGL_LESS: cc = LT; break;
+ case GGL_EQUAL: cc = EQ; break;
+ case GGL_LEQUAL: cc = LS; break;
+ case GGL_GREATER: cc = HI; break;
+ case GGL_NOTEQUAL: cc = NE; break;
+ case GGL_GEQUAL: cc = HS; break;
+ }
+ B(cc^1, "discard_after_textures");
+ }
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::build_depth_test(
+ const fragment_parts_t& parts, uint32_t mask)
+{
+ mask &= Z_TEST|Z_WRITE;
+ const needs_t& needs = mBuilderContext.needs;
+ const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
+ Scratch scratches(registerFile());
+
+ if (mDepthTest != GGL_ALWAYS || zmask) {
+ int cc=AL, ic=AL;
+ switch (mDepthTest) {
+ case GGL_LESS: ic = HI; break;
+ case GGL_EQUAL: ic = EQ; break;
+ case GGL_LEQUAL: ic = HS; break;
+ case GGL_GREATER: ic = LT; break;
+ case GGL_NOTEQUAL: ic = NE; break;
+ case GGL_GEQUAL: ic = LS; break;
+ case GGL_NEVER:
+ // this never happens, because it's taken care of when
+ // computing the needs. but we keep it for completness.
+ comment("Depth Test (NEVER)");
+ B(AL, "discard_before_textures");
+ return;
+ case GGL_ALWAYS:
+ // we're here because zmask is enabled
+ mask &= ~Z_TEST; // test always passes.
+ break;
+ }
+
+ // inverse the condition
+ cc = ic^1;
+
+ if ((mask & Z_WRITE) && !zmask) {
+ mask &= ~Z_WRITE;
+ }
+
+ if (!mask)
+ return;
+
+ comment("Depth Test");
+
+ int zbase = scratches.obtain();
+ int depth = scratches.obtain();
+ int z = parts.z.reg;
+
+ CONTEXT_LOAD(zbase, generated_vars.zbase); // stall
+ SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
+ // above does zbase = zbase + ((count >> 16) << 1)
+
+ if (mask & Z_TEST) {
+ LDRH(AL, depth, zbase); // stall
+ CMP(AL, depth, reg_imm(z, LSR, 16));
+ B(cc, "discard_before_textures");
+ }
+ if (mask & Z_WRITE) {
+ if (mask == Z_WRITE) {
+ // only z-write asked, cc is meaningless
+ ic = AL;
+ }
+ MOV(AL, 0, depth, reg_imm(z, LSR, 16));
+ STRH(ic, depth, zbase);
+ }
+ }
+}
+
+void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
+{
+ const needs_t& needs = mBuilderContext.needs;
+ if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
+ Scratch scratches(registerFile());
+ int dzdx = scratches.obtain();
+ CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall
+ ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
+ }
+}
+
+void GGLAssembler::build_iterate_f(const fragment_parts_t& parts)
+{
+ const needs_t& needs = mBuilderContext.needs;
+ if (GGL_READ_NEEDS(P_FOG, needs.p)) {
+ Scratch scratches(registerFile());
+ int dfdx = scratches.obtain();
+ int f = scratches.obtain();
+ CONTEXT_LOAD(f, generated_vars.f);
+ CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall
+ ADD(AL, 0, f, f, dfdx);
+ CONTEXT_STORE(f, generated_vars.f);
+ }
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
+{
+ const needs_t& needs = mBuilderContext.needs;
+ const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
+ if (opcode == GGL_COPY)
+ return;
+
+ comment("logic operation");
+
+ pixel_t s(pixel);
+ if (!(pixel.flags & CORRUPTIBLE)) {
+ pixel.reg = regs.obtain();
+ pixel.flags |= CORRUPTIBLE;
+ }
+
+ pixel_t d(mDstPixel);
+ switch(opcode) {
+ case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break;
+ case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break;
+ case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break;
+ case GGL_COPY: break;
+ case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break;
+ case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break;
+ case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break;
+ case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break;
+ case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg);
+ MVN(AL, 0, pixel.reg, pixel.reg); break;
+ case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg);
+ MVN(AL, 0, pixel.reg, pixel.reg); break;
+ case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break;
+ case GGL_OR_REVERSE: // s | ~d == ~(~s & d)
+ BIC(AL, 0, pixel.reg, d.reg, s.reg);
+ MVN(AL, 0, pixel.reg, pixel.reg); break;
+ case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break;
+ case GGL_OR_INVERTED: // ~s | d == ~(s & ~d)
+ BIC(AL, 0, pixel.reg, s.reg, d.reg);
+ MVN(AL, 0, pixel.reg, pixel.reg); break;
+ case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg);
+ MVN(AL, 0, pixel.reg, pixel.reg); break;
+ case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break;
+ };
+}
+
+// ---------------------------------------------------------------------------
+
+static uint32_t find_bottom(uint32_t val)
+{
+ uint32_t i = 0;
+ while (!(val & (3<<i)))
+ i+= 2;
+ return i;
+}
+
+static void normalize(uint32_t& val, uint32_t& rot)
+{
+ rot = 0;
+ while (!(val&3) || (val & 0xFC000000)) {
+ uint32_t newval;
+ newval = val >> 2;
+ newval |= (val&3) << 30;
+ val = newval;
+ rot += 2;
+ if (rot == 32) {
+ rot = 0;
+ break;
+ }
+ }
+}
+
+void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
+{
+ uint32_t rot;
+ uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
+ mask &= size;
+
+ if (mask == size) {
+ if (d != s)
+ MOV( AL, 0, d, s);
+ return;
+ }
+
+ int negative_logic = !isValidImmediate(mask);
+ if (negative_logic) {
+ mask = ~mask & size;
+ }
+ normalize(mask, rot);
+
+ if (mask) {
+ while (mask) {
+ uint32_t bitpos = find_bottom(mask);
+ int shift = rot + bitpos;
+ uint32_t m = mask & (0xff << bitpos);
+ mask &= ~m;
+ m >>= bitpos;
+ int32_t newMask = (m<<shift) | (m>>(32-shift));
+ if (!negative_logic) {
+ AND( AL, 0, d, s, imm(newMask) );
+ } else {
+ BIC( AL, 0, d, s, imm(newMask) );
+ }
+ s = d;
+ }
+ } else {
+ MOV( AL, 0, d, imm(0));
+ }
+}
+
+void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
+{
+ if (!mMasking || mAllMasked) {
+ return;
+ }
+
+ comment("color mask");
+
+ pixel_t fb(mDstPixel);
+ pixel_t s(pixel);
+ if (!(pixel.flags & CORRUPTIBLE)) {
+ pixel.reg = regs.obtain();
+ pixel.flags |= CORRUPTIBLE;
+ }
+
+ int mask = 0;
+ for (int i=0 ; i<4 ; i++) {
+ const int component_mask = 1<<i;
+ const int h = fb.format.c[i].h;
+ const int l = fb.format.c[i].l;
+ if (h && (!(mMasking & component_mask))) {
+ mask |= ((1<<(h-l))-1) << l;
+ }
+ }
+
+ // There is no need to clear the masked components of the source
+ // (unless we applied a logic op), because they're already zeroed
+ // by construction (masked components are not computed)
+
+ if (mLogicOp) {
+ const needs_t& needs = mBuilderContext.needs;
+ const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
+ if (opcode != GGL_CLEAR) {
+ // clear masked component of source
+ build_and_immediate(pixel.reg, s.reg, mask, fb.size());
+ s = pixel;
+ }
+ }
+
+ // clear non masked components of destination
+ build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
+
+ // or back the channels that were masked
+ if (s.reg == fb.reg) {
+ // this is in fact a MOV
+ if (s.reg == pixel.reg) {
+ // ugh. this in in fact a nop
+ } else {
+ MOV(AL, 0, pixel.reg, fb.reg);
+ }
+ } else {
+ ORR(AL, 0, pixel.reg, s.reg, fb.reg);
+ }
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::base_offset(
+ const pointer_t& d, const pointer_t& b, const reg_t& o)
+{
+ switch (b.size) {
+ case 32:
+ ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
+ break;
+ case 24:
+ if (d.reg == b.reg) {
+ ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
+ ADD(AL, 0, d.reg, d.reg, o.reg);
+ } else {
+ ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
+ ADD(AL, 0, d.reg, d.reg, b.reg);
+ }
+ break;
+ case 16:
+ ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
+ break;
+ case 8:
+ ADD(AL, 0, d.reg, b.reg, o.reg);
+ break;
+ }
+}
+
+// ----------------------------------------------------------------------------
+// cheezy register allocator...
+// ----------------------------------------------------------------------------
+
+void RegisterAllocator::reset()
+{
+ mRegs.reset();
+}
+
+int RegisterAllocator::reserveReg(int reg)
+{
+ return mRegs.reserve(reg);
+}
+
+int RegisterAllocator::obtainReg()
+{
+ return mRegs.obtain();
+}
+
+void RegisterAllocator::recycleReg(int reg)
+{
+ mRegs.recycle(reg);
+}
+
+RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
+{
+ return mRegs;
+}
+
+// ----------------------------------------------------------------------------
+
+RegisterAllocator::RegisterFile::RegisterFile()
+ : mRegs(0), mTouched(0), mStatus(0)
+{
+ reserve(ARMAssemblerInterface::SP);
+ reserve(ARMAssemblerInterface::PC);
+}
+
+RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs)
+ : mRegs(rhs.mRegs), mTouched(rhs.mTouched)
+{
+}
+
+RegisterAllocator::RegisterFile::~RegisterFile()
+{
+}
+
+bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
+{
+ return (mRegs == rhs.mRegs);
+}
+
+void RegisterAllocator::RegisterFile::reset()
+{
+ mRegs = mTouched = mStatus = 0;
+ reserve(ARMAssemblerInterface::SP);
+ reserve(ARMAssemblerInterface::PC);
+}
+
+int RegisterAllocator::RegisterFile::reserve(int reg)
+{
+ LOG_ALWAYS_FATAL_IF(isUsed(reg),
+ "reserving register %d, but already in use",
+ reg);
+ mRegs |= (1<<reg);
+ mTouched |= mRegs;
+ return reg;
+}
+
+void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
+{
+ mRegs |= regMask;
+ mTouched |= regMask;
+}
+
+int RegisterAllocator::RegisterFile::isUsed(int reg) const
+{
+ LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg);
+ return mRegs & (1<<reg);
+}
+
+int RegisterAllocator::RegisterFile::obtain()
+{
+ const char priorityList[14] = { 0, 1, 2, 3,
+ 12, 14, 4, 5,
+ 6, 7, 8, 9,
+ 10, 11 };
+ const int nbreg = sizeof(priorityList);
+ int i, r;
+ for (i=0 ; i<nbreg ; i++) {
+ r = priorityList[i];
+ if (!isUsed(r)) {
+ break;
+ }
+ }
+ // this is not an error anymore because, we'll try again with
+ // a lower optimization level.
+ //LOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
+ if (i >= nbreg) {
+ mStatus |= OUT_OF_REGISTERS;
+ // we return SP so we can more easily debug things
+ // the code will never be run anyway.
+ return ARMAssemblerInterface::SP;
+ }
+ reserve(r);
+ return r;
+}
+
+bool RegisterAllocator::RegisterFile::hasFreeRegs() const
+{
+ return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true;
+}
+
+int RegisterAllocator::RegisterFile::countFreeRegs() const
+{
+ int f = ~mRegs & 0xFFFF;
+ // now count number of 1
+ f = (f & 0x5555) + ((f>>1) & 0x5555);
+ f = (f & 0x3333) + ((f>>2) & 0x3333);
+ f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
+ f = (f & 0x00FF) + ((f>>8) & 0x00FF);
+ return f;
+}
+
+void RegisterAllocator::RegisterFile::recycle(int reg)
+{
+ LOG_FATAL_IF(!isUsed(reg),
+ "recycling unallocated register %d",
+ reg);
+ mRegs &= ~(1<<reg);
+}
+
+void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
+{
+ LOG_FATAL_IF((mRegs & regMask)!=regMask,
+ "recycling unallocated registers "
+ "(recycle=%08x, allocated=%08x, unallocated=%08x)",
+ regMask, mRegs, mRegs&regMask);
+ mRegs &= ~regMask;
+}
+
+uint32_t RegisterAllocator::RegisterFile::touched() const
+{
+ return mTouched;
+}
+
+// ----------------------------------------------------------------------------
+
+}; // namespace android
+
diff --git a/libpixelflinger/codeflinger/GGLAssembler.h b/libpixelflinger/codeflinger/GGLAssembler.h
new file mode 100644
index 0000000..d1d29f0
--- /dev/null
+++ b/libpixelflinger/codeflinger/GGLAssembler.h
@@ -0,0 +1,554 @@
+/* libs/pixelflinger/codeflinger/GGLAssembler.h
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+
+#ifndef ANDROID_GGLASSEMBLER_H
+#define ANDROID_GGLASSEMBLER_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <private/pixelflinger/ggl_context.h>
+
+#include "codeflinger/ARMAssemblerProxy.h"
+
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+#define CONTEXT_LOAD(REG, FIELD) \
+ LDR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
+
+#define CONTEXT_STORE(REG, FIELD) \
+ STR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
+
+
+class RegisterAllocator
+{
+public:
+ class RegisterFile;
+
+ RegisterFile& registerFile();
+ int reserveReg(int reg);
+ int obtainReg();
+ void recycleReg(int reg);
+ void reset();
+
+ class RegisterFile
+ {
+ public:
+ RegisterFile();
+ RegisterFile(const RegisterFile& rhs);
+ ~RegisterFile();
+
+ void reset();
+
+ bool operator == (const RegisterFile& rhs) const;
+ bool operator != (const RegisterFile& rhs) const {
+ return !operator == (rhs);
+ }
+
+ int reserve(int reg);
+ void reserveSeveral(uint32_t regMask);
+
+ void recycle(int reg);
+ void recycleSeveral(uint32_t regMask);
+
+ int obtain();
+ inline int isUsed(int reg) const;
+
+ bool hasFreeRegs() const;
+ int countFreeRegs() const;
+
+ uint32_t touched() const;
+ inline uint32_t status() const { return mStatus; }
+
+ enum {
+ OUT_OF_REGISTERS = 0x1
+ };
+
+ private:
+ uint32_t mRegs;
+ uint32_t mTouched;
+ uint32_t mStatus;
+ };
+
+ class Scratch
+ {
+ public:
+ Scratch(RegisterFile& regFile)
+ : mRegFile(regFile), mScratch(0) {
+ }
+ ~Scratch() {
+ mRegFile.recycleSeveral(mScratch);
+ }
+ int obtain() {
+ int reg = mRegFile.obtain();
+ mScratch |= 1<<reg;
+ return reg;
+ }
+ void recycle(int reg) {
+ mRegFile.recycle(reg);
+ mScratch &= ~(1<<reg);
+ }
+ bool isUsed(int reg) {
+ return (mScratch & (1<<reg));
+ }
+ int countFreeRegs() {
+ return mRegFile.countFreeRegs();
+ }
+ private:
+ RegisterFile& mRegFile;
+ uint32_t mScratch;
+ };
+
+ class Spill
+ {
+ public:
+ Spill(RegisterFile& regFile, ARMAssemblerInterface& gen, uint32_t reglist)
+ : mRegFile(regFile), mGen(gen), mRegList(reglist), mCount(0)
+ {
+ if (reglist) {
+ int count = 0;
+ while (reglist) {
+ count++;
+ reglist &= ~(1 << (31 - __builtin_clz(reglist)));
+ }
+ if (count == 1) {
+ int reg = 31 - __builtin_clz(mRegList);
+ mGen.STR(mGen.AL, reg, mGen.SP, mGen.immed12_pre(-4, 1));
+ } else {
+ mGen.STM(mGen.AL, mGen.DB, mGen.SP, 1, mRegList);
+ }
+ mRegFile.recycleSeveral(mRegList);
+ mCount = count;
+ }
+ }
+ ~Spill() {
+ if (mRegList) {
+ if (mCount == 1) {
+ int reg = 31 - __builtin_clz(mRegList);
+ mGen.LDR(mGen.AL, reg, mGen.SP, mGen.immed12_post(4));
+ } else {
+ mGen.LDM(mGen.AL, mGen.IA, mGen.SP, 1, mRegList);
+ }
+ mRegFile.reserveSeveral(mRegList);
+ }
+ }
+ private:
+ RegisterFile& mRegFile;
+ ARMAssemblerInterface& mGen;
+ uint32_t mRegList;
+ int mCount;
+ };
+
+private:
+ RegisterFile mRegs;
+};
+
+// ----------------------------------------------------------------------------
+
+class GGLAssembler : public ARMAssemblerProxy, public RegisterAllocator
+{
+public:
+
+ GGLAssembler(ARMAssemblerInterface* target);
+ virtual ~GGLAssembler();
+
+ uint32_t* base() const { return 0; } // XXX
+ uint32_t* pc() const { return 0; } // XXX
+
+ void reset(int opt_level);
+
+ virtual void prolog();
+ virtual void epilog(uint32_t touched);
+
+ // generate scanline code for given needs
+ int scanline(const needs_t& needs, context_t const* c);
+ int scanline_core(const needs_t& needs, context_t const* c);
+
+ enum {
+ CLEAR_LO = 0x0001,
+ CLEAR_HI = 0x0002,
+ CORRUPTIBLE = 0x0004,
+ FIRST = 0x0008
+ };
+
+ enum { //load/store flags
+ WRITE_BACK = 0x0001
+ };
+
+ struct reg_t {
+ reg_t() : reg(-1), flags(0) {
+ }
+ reg_t(int r, int f=0)
+ : reg(r), flags(f) {
+ }
+ void setTo(int r, int f=0) {
+ reg=r; flags=f;
+ }
+ int reg;
+ uint16_t flags;
+ };
+
+ struct integer_t : public reg_t {
+ integer_t() : reg_t(), s(0) {
+ }
+ integer_t(int r, int sz=32, int f=0)
+ : reg_t(r, f), s(sz) {
+ }
+ void setTo(int r, int sz=32, int f=0) {
+ reg_t::setTo(r, f); s=sz;
+ }
+ int8_t s;
+ inline int size() const { return s; }
+ };
+
+ struct pixel_t : public reg_t {
+ pixel_t() : reg_t() {
+ memset(&format, 0, sizeof(GGLFormat));
+ }
+ pixel_t(int r, const GGLFormat* fmt, int f=0)
+ : reg_t(r, f), format(*fmt) {
+ }
+ void setTo(int r, const GGLFormat* fmt, int f=0) {
+ reg_t::setTo(r, f); format = *fmt;
+ }
+ GGLFormat format;
+ inline int hi(int c) const { return format.c[c].h; }
+ inline int low(int c) const { return format.c[c].l; }
+ inline int mask(int c) const { return ((1<<size(c))-1) << low(c); }
+ inline int size() const { return format.size*8; }
+ inline int size(int c) const { return component_size(c); }
+ inline int component_size(int c) const { return hi(c) - low(c); }
+ };
+
+ struct component_t : public reg_t {
+ component_t() : reg_t(), h(0), l(0) {
+ }
+ component_t(int r, int f=0)
+ : reg_t(r, f), h(0), l(0) {
+ }
+ component_t(int r, int lo, int hi, int f=0)
+ : reg_t(r, f), h(hi), l(lo) {
+ }
+ explicit component_t(const integer_t& rhs)
+ : reg_t(rhs.reg, rhs.flags), h(rhs.s), l(0) {
+ }
+ explicit component_t(const pixel_t& rhs, int component) {
+ setTo( rhs.reg,
+ rhs.format.c[component].l,
+ rhs.format.c[component].h,
+ rhs.flags|CLEAR_LO|CLEAR_HI);
+ }
+ void setTo(int r, int lo=0, int hi=0, int f=0) {
+ reg_t::setTo(r, f); h=hi; l=lo;
+ }
+ int8_t h;
+ int8_t l;
+ inline int size() const { return h-l; }
+ };
+
+ struct pointer_t : public reg_t {
+ pointer_t() : reg_t(), size(0) {
+ }
+ pointer_t(int r, int s, int f=0)
+ : reg_t(r, f), size(s) {
+ }
+ void setTo(int r, int s, int f=0) {
+ reg_t::setTo(r, f); size=s;
+ }
+ int8_t size;
+ };
+
+
+private:
+ struct tex_coord_t {
+ reg_t s;
+ reg_t t;
+ pointer_t ptr;
+ };
+
+ struct fragment_parts_t {
+ uint32_t packed : 1;
+ uint32_t reload : 2;
+ uint32_t iterated_packed : 1;
+ pixel_t iterated;
+ pointer_t cbPtr;
+ pointer_t covPtr;
+ reg_t count;
+ reg_t argb[4];
+ reg_t argb_dx[4];
+ reg_t z;
+ reg_t dither;
+ pixel_t texel[GGL_TEXTURE_UNIT_COUNT];
+ tex_coord_t coords[GGL_TEXTURE_UNIT_COUNT];
+ };
+
+ struct texture_unit_t {
+ int format_idx;
+ GGLFormat format;
+ int bits;
+ int swrap;
+ int twrap;
+ int env;
+ int pot;
+ int linear;
+ uint8_t mask;
+ uint8_t replaced;
+ };
+
+ struct texture_machine_t {
+ texture_unit_t tmu[GGL_TEXTURE_UNIT_COUNT];
+ uint8_t mask;
+ uint8_t replaced;
+ uint8_t directTexture;
+ uint8_t activeUnits;
+ };
+
+ struct component_info_t {
+ bool masked : 1;
+ bool inDest : 1;
+ bool needed : 1;
+ bool replaced : 1;
+ bool iterated : 1;
+ bool smooth : 1;
+ bool blend : 1;
+ bool fog : 1;
+ };
+
+ struct builder_context_t {
+ context_t const* c;
+ needs_t needs;
+ int Rctx;
+ };
+
+ template <typename T>
+ void modify(T& r, Scratch& regs)
+ {
+ if (!(r.flags & CORRUPTIBLE)) {
+ r.reg = regs.obtain();
+ r.flags |= CORRUPTIBLE;
+ }
+ }
+
+ // helpers
+ void base_offset(const pointer_t& d, const pointer_t& b, const reg_t& o);
+
+ // texture environement
+ void modulate( component_t& dest,
+ const component_t& incoming,
+ const pixel_t& texel, int component);
+
+ void decal( component_t& dest,
+ const component_t& incoming,
+ const pixel_t& texel, int component);
+
+ void blend( component_t& dest,
+ const component_t& incoming,
+ const pixel_t& texel, int component, int tmu);
+
+ void add( component_t& dest,
+ const component_t& incoming,
+ const pixel_t& texel, int component);
+
+ // load/store stuff
+ void store(const pointer_t& addr, const pixel_t& src, uint32_t flags=0);
+ void load(const pointer_t& addr, const pixel_t& dest, uint32_t flags=0);
+ void extract(integer_t& d, const pixel_t& s, int component);
+ void extract(component_t& d, const pixel_t& s, int component);
+ void extract(integer_t& d, int s, int h, int l, int bits=32);
+ void expand(integer_t& d, const integer_t& s, int dbits);
+ void expand(integer_t& d, const component_t& s, int dbits);
+ void expand(component_t& d, const component_t& s, int dbits);
+ void downshift(pixel_t& d, int component, component_t s, const reg_t& dither);
+
+
+ void mul_factor( component_t& d,
+ const integer_t& v,
+ const integer_t& f);
+
+ void mul_factor_add( component_t& d,
+ const integer_t& v,
+ const integer_t& f,
+ const component_t& a);
+
+ void component_add( component_t& d,
+ const integer_t& dst,
+ const integer_t& src);
+
+ void component_sat( const component_t& v);
+
+
+ void build_scanline_prolog( fragment_parts_t& parts,
+ const needs_t& needs);
+
+ void build_smooth_shade(const fragment_parts_t& parts);
+
+ void build_component( pixel_t& pixel,
+ const fragment_parts_t& parts,
+ int component,
+ Scratch& global_scratches);
+
+ void build_incoming_component(
+ component_t& temp,
+ int dst_size,
+ const fragment_parts_t& parts,
+ int component,
+ Scratch& scratches,
+ Scratch& global_scratches);
+
+ void init_iterated_color(fragment_parts_t& parts, const reg_t& x);
+
+ void build_iterated_color( component_t& fragment,
+ const fragment_parts_t& parts,
+ int component,
+ Scratch& regs);
+
+ void decodeLogicOpNeeds(const needs_t& needs);
+
+ void decodeTMUNeeds(const needs_t& needs, context_t const* c);
+
+ void init_textures( tex_coord_t* coords,
+ const reg_t& x,
+ const reg_t& y);
+
+ void build_textures( fragment_parts_t& parts,
+ Scratch& regs);
+
+ void filter8( const fragment_parts_t& parts,
+ pixel_t& texel, const texture_unit_t& tmu,
+ int U, int V, pointer_t& txPtr,
+ int FRAC_BITS);
+
+ void filter16( const fragment_parts_t& parts,
+ pixel_t& texel, const texture_unit_t& tmu,
+ int U, int V, pointer_t& txPtr,
+ int FRAC_BITS);
+
+ void filter24( const fragment_parts_t& parts,
+ pixel_t& texel, const texture_unit_t& tmu,
+ int U, int V, pointer_t& txPtr,
+ int FRAC_BITS);
+
+ void filter32( const fragment_parts_t& parts,
+ pixel_t& texel, const texture_unit_t& tmu,
+ int U, int V, pointer_t& txPtr,
+ int FRAC_BITS);
+
+ void build_texture_environment( component_t& fragment,
+ const fragment_parts_t& parts,
+ int component,
+ Scratch& regs);
+
+ void wrapping( int d,
+ int coord, int size,
+ int tx_wrap, int tx_linear);
+
+ void build_fog( component_t& temp,
+ int component,
+ Scratch& parent_scratches);
+
+ void build_blending( component_t& in_out,
+ const pixel_t& pixel,
+ int component,
+ Scratch& parent_scratches);
+
+ void build_blend_factor(
+ integer_t& factor, int f, int component,
+ const pixel_t& dst_pixel,
+ integer_t& fragment,
+ integer_t& fb,
+ Scratch& scratches);
+
+ void build_blendFOneMinusF( component_t& temp,
+ const integer_t& factor,
+ const integer_t& fragment,
+ const integer_t& fb);
+
+ void build_blendOneMinusFF( component_t& temp,
+ const integer_t& factor,
+ const integer_t& fragment,
+ const integer_t& fb);
+
+ void build_coverage_application(component_t& fragment,
+ const fragment_parts_t& parts,
+ Scratch& regs);
+
+ void build_alpha_test(component_t& fragment, const fragment_parts_t& parts);
+
+ enum { Z_TEST=1, Z_WRITE=2 };
+ void build_depth_test(const fragment_parts_t& parts, uint32_t mask);
+ void build_iterate_z(const fragment_parts_t& parts);
+ void build_iterate_f(const fragment_parts_t& parts);
+ void build_iterate_texture_coordinates(const fragment_parts_t& parts);
+
+ void build_logic_op(pixel_t& pixel, Scratch& regs);
+
+ void build_masking(pixel_t& pixel, Scratch& regs);
+
+ void build_and_immediate(int d, int s, uint32_t mask, int bits);
+
+ bool isAlphaSourceNeeded() const;
+
+ enum {
+ FACTOR_SRC=1, FACTOR_DST=2, BLEND_SRC=4, BLEND_DST=8
+ };
+
+ enum {
+ LOGIC_OP=1, LOGIC_OP_SRC=2, LOGIC_OP_DST=4
+ };
+
+ static int blending_codes(int fs, int fd);
+
+ builder_context_t mBuilderContext;
+ texture_machine_t mTextureMachine;
+ component_info_t mInfo[4];
+ int mBlending;
+ int mMasking;
+ int mAllMasked;
+ int mLogicOp;
+ int mAlphaTest;
+ int mAA;
+ int mDithering;
+ int mDepthTest;
+
+ int mSmooth;
+ int mFog;
+ pixel_t mDstPixel;
+
+ GGLFormat mCbFormat;
+
+ int mBlendFactorCached;
+ integer_t mAlphaSource;
+
+ int mBaseRegister;
+
+ int mBlendSrc;
+ int mBlendDst;
+ int mBlendSrcA;
+ int mBlendDstA;
+
+ int mOptLevel;
+};
+
+// ----------------------------------------------------------------------------
+
+}; // namespace android
+
+#endif // ANDROID_GGLASSEMBLER_H
diff --git a/libpixelflinger/codeflinger/armreg.h b/libpixelflinger/codeflinger/armreg.h
new file mode 100644
index 0000000..fde81ba
--- /dev/null
+++ b/libpixelflinger/codeflinger/armreg.h
@@ -0,0 +1,300 @@
+/* $NetBSD: armreg.h,v 1.28 2003/10/31 16:30:15 scw Exp $ */
+
+/*-
+ * Copyright (c) 1998, 2001 Ben Harris
+ * Copyright (c) 1994-1996 Mark Brinicombe.
+ * Copyright (c) 1994 Brini.
+ * All rights reserved.
+ *
+ * This code is derived from software written for Brini by Mark Brinicombe
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Brini.
+ * 4. The name of the company nor the name of the author may be used to
+ * endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: /repoman/r/ncvs/src/sys/arm/include/armreg.h,v 1.3 2005/11/21 19:06:25 cognet Exp $
+ */
+
+#ifndef MACHINE_ARMREG_H
+#define MACHINE_ARMREG_H
+#define INSN_SIZE 4
+#define INSN_COND_MASK 0xf0000000 /* Condition mask */
+#define PSR_MODE 0x0000001f /* mode mask */
+#define PSR_USR26_MODE 0x00000000
+#define PSR_FIQ26_MODE 0x00000001
+#define PSR_IRQ26_MODE 0x00000002
+#define PSR_SVC26_MODE 0x00000003
+#define PSR_USR32_MODE 0x00000010
+#define PSR_FIQ32_MODE 0x00000011
+#define PSR_IRQ32_MODE 0x00000012
+#define PSR_SVC32_MODE 0x00000013
+#define PSR_ABT32_MODE 0x00000017
+#define PSR_UND32_MODE 0x0000001b
+#define PSR_SYS32_MODE 0x0000001f
+#define PSR_32_MODE 0x00000010
+#define PSR_FLAGS 0xf0000000 /* flags */
+
+#define PSR_C_bit (1 << 29) /* carry */
+
+/* The high-order byte is always the implementor */
+#define CPU_ID_IMPLEMENTOR_MASK 0xff000000
+#define CPU_ID_ARM_LTD 0x41000000 /* 'A' */
+#define CPU_ID_DEC 0x44000000 /* 'D' */
+#define CPU_ID_INTEL 0x69000000 /* 'i' */
+#define CPU_ID_TI 0x54000000 /* 'T' */
+
+/* How to decide what format the CPUID is in. */
+#define CPU_ID_ISOLD(x) (((x) & 0x0000f000) == 0x00000000)
+#define CPU_ID_IS7(x) (((x) & 0x0000f000) == 0x00007000)
+#define CPU_ID_ISNEW(x) (!CPU_ID_ISOLD(x) && !CPU_ID_IS7(x))
+
+/* On ARM3 and ARM6, this byte holds the foundry ID. */
+#define CPU_ID_FOUNDRY_MASK 0x00ff0000
+#define CPU_ID_FOUNDRY_VLSI 0x00560000
+
+/* On ARM7 it holds the architecture and variant (sub-model) */
+#define CPU_ID_7ARCH_MASK 0x00800000
+#define CPU_ID_7ARCH_V3 0x00000000
+#define CPU_ID_7ARCH_V4T 0x00800000
+#define CPU_ID_7VARIANT_MASK 0x007f0000
+
+/* On more recent ARMs, it does the same, but in a different format */
+#define CPU_ID_ARCH_MASK 0x000f0000
+#define CPU_ID_ARCH_V3 0x00000000
+#define CPU_ID_ARCH_V4 0x00010000
+#define CPU_ID_ARCH_V4T 0x00020000
+#define CPU_ID_ARCH_V5 0x00030000
+#define CPU_ID_ARCH_V5T 0x00040000
+#define CPU_ID_ARCH_V5TE 0x00050000
+#define CPU_ID_VARIANT_MASK 0x00f00000
+
+/* Next three nybbles are part number */
+#define CPU_ID_PARTNO_MASK 0x0000fff0
+
+/* Intel XScale has sub fields in part number */
+#define CPU_ID_XSCALE_COREGEN_MASK 0x0000e000 /* core generation */
+#define CPU_ID_XSCALE_COREREV_MASK 0x00001c00 /* core revision */
+#define CPU_ID_XSCALE_PRODUCT_MASK 0x000003f0 /* product number */
+
+/* And finally, the revision number. */
+#define CPU_ID_REVISION_MASK 0x0000000f
+
+/* Individual CPUs are probably best IDed by everything but the revision. */
+#define CPU_ID_CPU_MASK 0xfffffff0
+
+/* Fake CPU IDs for ARMs without CP15 */
+#define CPU_ID_ARM2 0x41560200
+#define CPU_ID_ARM250 0x41560250
+
+/* Pre-ARM7 CPUs -- [15:12] == 0 */
+#define CPU_ID_ARM3 0x41560300
+#define CPU_ID_ARM600 0x41560600
+#define CPU_ID_ARM610 0x41560610
+#define CPU_ID_ARM620 0x41560620
+
+/* ARM7 CPUs -- [15:12] == 7 */
+#define CPU_ID_ARM700 0x41007000 /* XXX This is a guess. */
+#define CPU_ID_ARM710 0x41007100
+#define CPU_ID_ARM7500 0x41027100 /* XXX This is a guess. */
+#define CPU_ID_ARM710A 0x41047100 /* inc ARM7100 */
+#define CPU_ID_ARM7500FE 0x41077100
+#define CPU_ID_ARM710T 0x41807100
+#define CPU_ID_ARM720T 0x41807200
+#define CPU_ID_ARM740T8K 0x41807400 /* XXX no MMU, 8KB cache */
+#define CPU_ID_ARM740T4K 0x41817400 /* XXX no MMU, 4KB cache */
+
+/* Post-ARM7 CPUs */
+#define CPU_ID_ARM810 0x41018100
+#define CPU_ID_ARM920T 0x41129200
+#define CPU_ID_ARM920T_ALT 0x41009200
+#define CPU_ID_ARM922T 0x41029220
+#define CPU_ID_ARM940T 0x41029400 /* XXX no MMU */
+#define CPU_ID_ARM946ES 0x41049460 /* XXX no MMU */
+#define CPU_ID_ARM966ES 0x41049660 /* XXX no MMU */
+#define CPU_ID_ARM966ESR1 0x41059660 /* XXX no MMU */
+#define CPU_ID_ARM1020E 0x4115a200 /* (AKA arm10 rev 1) */
+#define CPU_ID_ARM1022ES 0x4105a220
+#define CPU_ID_SA110 0x4401a100
+#define CPU_ID_SA1100 0x4401a110
+#define CPU_ID_TI925T 0x54029250
+#define CPU_ID_SA1110 0x6901b110
+#define CPU_ID_IXP1200 0x6901c120
+#define CPU_ID_80200 0x69052000
+#define CPU_ID_PXA250 0x69052100 /* sans core revision */
+#define CPU_ID_PXA210 0x69052120
+#define CPU_ID_PXA250A 0x69052100 /* 1st version Core */
+#define CPU_ID_PXA210A 0x69052120 /* 1st version Core */
+#define CPU_ID_PXA250B 0x69052900 /* 3rd version Core */
+#define CPU_ID_PXA210B 0x69052920 /* 3rd version Core */
+#define CPU_ID_PXA250C 0x69052d00 /* 4th version Core */
+#define CPU_ID_PXA210C 0x69052d20 /* 4th version Core */
+#define CPU_ID_80321_400 0x69052420
+#define CPU_ID_80321_600 0x69052430
+#define CPU_ID_80321_400_B0 0x69052c20
+#define CPU_ID_80321_600_B0 0x69052c30
+#define CPU_ID_IXP425_533 0x690541c0
+#define CPU_ID_IXP425_400 0x690541d0
+#define CPU_ID_IXP425_266 0x690541f0
+
+/* ARM3-specific coprocessor 15 registers */
+#define ARM3_CP15_FLUSH 1
+#define ARM3_CP15_CONTROL 2
+#define ARM3_CP15_CACHEABLE 3
+#define ARM3_CP15_UPDATEABLE 4
+#define ARM3_CP15_DISRUPTIVE 5
+
+/* ARM3 Control register bits */
+#define ARM3_CTL_CACHE_ON 0x00000001
+#define ARM3_CTL_SHARED 0x00000002
+#define ARM3_CTL_MONITOR 0x00000004
+
+/*
+ * Post-ARM3 CP15 registers:
+ *
+ * 1 Control register
+ *
+ * 2 Translation Table Base
+ *
+ * 3 Domain Access Control
+ *
+ * 4 Reserved
+ *
+ * 5 Fault Status
+ *
+ * 6 Fault Address
+ *
+ * 7 Cache/write-buffer Control
+ *
+ * 8 TLB Control
+ *
+ * 9 Cache Lockdown
+ *
+ * 10 TLB Lockdown
+ *
+ * 11 Reserved
+ *
+ * 12 Reserved
+ *
+ * 13 Process ID (for FCSE)
+ *
+ * 14 Reserved
+ *
+ * 15 Implementation Dependent
+ */
+
+/* Some of the definitions below need cleaning up for V3/V4 architectures */
+
+/* CPU control register (CP15 register 1) */
+#define CPU_CONTROL_MMU_ENABLE 0x00000001 /* M: MMU/Protection unit enable */
+#define CPU_CONTROL_AFLT_ENABLE 0x00000002 /* A: Alignment fault enable */
+#define CPU_CONTROL_DC_ENABLE 0x00000004 /* C: IDC/DC enable */
+#define CPU_CONTROL_WBUF_ENABLE 0x00000008 /* W: Write buffer enable */
+#define CPU_CONTROL_32BP_ENABLE 0x00000010 /* P: 32-bit exception handlers */
+#define CPU_CONTROL_32BD_ENABLE 0x00000020 /* D: 32-bit addressing */
+#define CPU_CONTROL_LABT_ENABLE 0x00000040 /* L: Late abort enable */
+#define CPU_CONTROL_BEND_ENABLE 0x00000080 /* B: Big-endian mode */
+#define CPU_CONTROL_SYST_ENABLE 0x00000100 /* S: System protection bit */
+#define CPU_CONTROL_ROM_ENABLE 0x00000200 /* R: ROM protection bit */
+#define CPU_CONTROL_CPCLK 0x00000400 /* F: Implementation defined */
+#define CPU_CONTROL_BPRD_ENABLE 0x00000800 /* Z: Branch prediction enable */
+#define CPU_CONTROL_IC_ENABLE 0x00001000 /* I: IC enable */
+#define CPU_CONTROL_VECRELOC 0x00002000 /* V: Vector relocation */
+#define CPU_CONTROL_ROUNDROBIN 0x00004000 /* RR: Predictable replacement */
+#define CPU_CONTROL_V4COMPAT 0x00008000 /* L4: ARMv4 compat LDR R15 etc */
+
+#define CPU_CONTROL_IDC_ENABLE CPU_CONTROL_DC_ENABLE
+
+/* XScale Auxillary Control Register (CP15 register 1, opcode2 1) */
+#define XSCALE_AUXCTL_K 0x00000001 /* dis. write buffer coalescing */
+#define XSCALE_AUXCTL_P 0x00000002 /* ECC protect page table access */
+#define XSCALE_AUXCTL_MD_WB_RA 0x00000000 /* mini-D$ wb, read-allocate */
+#define XSCALE_AUXCTL_MD_WB_RWA 0x00000010 /* mini-D$ wb, read/write-allocate */
+#define XSCALE_AUXCTL_MD_WT 0x00000020 /* mini-D$ wt, read-allocate */
+#define XSCALE_AUXCTL_MD_MASK 0x00000030
+
+/* Cache type register definitions */
+#define CPU_CT_ISIZE(x) ((x) & 0xfff) /* I$ info */
+#define CPU_CT_DSIZE(x) (((x) >> 12) & 0xfff) /* D$ info */
+#define CPU_CT_S (1U << 24) /* split cache */
+#define CPU_CT_CTYPE(x) (((x) >> 25) & 0xf) /* cache type */
+
+#define CPU_CT_CTYPE_WT 0 /* write-through */
+#define CPU_CT_CTYPE_WB1 1 /* write-back, clean w/ read */
+#define CPU_CT_CTYPE_WB2 2 /* w/b, clean w/ cp15,7 */
+#define CPU_CT_CTYPE_WB6 6 /* w/b, cp15,7, lockdown fmt A */
+#define CPU_CT_CTYPE_WB7 7 /* w/b, cp15,7, lockdown fmt B */
+
+#define CPU_CT_xSIZE_LEN(x) ((x) & 0x3) /* line size */
+#define CPU_CT_xSIZE_M (1U << 2) /* multiplier */
+#define CPU_CT_xSIZE_ASSOC(x) (((x) >> 3) & 0x7) /* associativity */
+#define CPU_CT_xSIZE_SIZE(x) (((x) >> 6) & 0x7) /* size */
+
+/* Fault status register definitions */
+
+#define FAULT_TYPE_MASK 0x0f
+#define FAULT_USER 0x10
+
+#define FAULT_WRTBUF_0 0x00 /* Vector Exception */
+#define FAULT_WRTBUF_1 0x02 /* Terminal Exception */
+#define FAULT_BUSERR_0 0x04 /* External Abort on Linefetch -- Section */
+#define FAULT_BUSERR_1 0x06 /* External Abort on Linefetch -- Page */
+#define FAULT_BUSERR_2 0x08 /* External Abort on Non-linefetch -- Section */
+#define FAULT_BUSERR_3 0x0a /* External Abort on Non-linefetch -- Page */
+#define FAULT_BUSTRNL1 0x0c /* External abort on Translation -- Level 1 */
+#define FAULT_BUSTRNL2 0x0e /* External abort on Translation -- Level 2 */
+#define FAULT_ALIGN_0 0x01 /* Alignment */
+#define FAULT_ALIGN_1 0x03 /* Alignment */
+#define FAULT_TRANS_S 0x05 /* Translation -- Section */
+#define FAULT_TRANS_P 0x07 /* Translation -- Page */
+#define FAULT_DOMAIN_S 0x09 /* Domain -- Section */
+#define FAULT_DOMAIN_P 0x0b /* Domain -- Page */
+#define FAULT_PERM_S 0x0d /* Permission -- Section */
+#define FAULT_PERM_P 0x0f /* Permission -- Page */
+
+#define FAULT_IMPRECISE 0x400 /* Imprecise exception (XSCALE) */
+
+/*
+ * Address of the vector page, low and high versions.
+ */
+#define ARM_VECTORS_LOW 0x00000000U
+#define ARM_VECTORS_HIGH 0xffff0000U
+
+/*
+ * ARM Instructions
+ *
+ * 3 3 2 2 2
+ * 1 0 9 8 7 0
+ * +-------+-------------------------------------------------------+
+ * | cond | instruction dependant |
+ * |c c c c| |
+ * +-------+-------------------------------------------------------+
+ */
+
+#define INSN_SIZE 4 /* Always 4 bytes */
+#define INSN_COND_MASK 0xf0000000 /* Condition mask */
+#define INSN_COND_AL 0xe0000000 /* Always condition */
+
+#endif /* !MACHINE_ARMREG_H */
diff --git a/libpixelflinger/codeflinger/blending.cpp b/libpixelflinger/codeflinger/blending.cpp
new file mode 100644
index 0000000..f10217b
--- /dev/null
+++ b/libpixelflinger/codeflinger/blending.cpp
@@ -0,0 +1,682 @@
+/* libs/pixelflinger/codeflinger/blending.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include <cutils/log.h>
+
+#include "codeflinger/GGLAssembler.h"
+
+
+namespace android {
+
+void GGLAssembler::build_fog(
+ component_t& temp, // incomming fragment / output
+ int component,
+ Scratch& regs)
+{
+ if (mInfo[component].fog) {
+ Scratch scratches(registerFile());
+ comment("fog");
+
+ integer_t fragment(temp.reg, temp.h, temp.flags);
+ if (!(temp.flags & CORRUPTIBLE)) {
+ temp.reg = regs.obtain();
+ temp.flags |= CORRUPTIBLE;
+ }
+
+ integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
+ LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
+ immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
+
+ integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
+ CONTEXT_LOAD(factor.reg, generated_vars.f);
+
+ // clamp fog factor (TODO: see if there is a way to guarantee
+ // we won't overflow, when setting the iterators)
+ BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
+ CMP(AL, factor.reg, imm( 0x10000 ));
+ MOV(HS, 0, factor.reg, imm( 0x10000 ));
+
+ build_blendFOneMinusF(temp, factor, fragment, fogColor);
+ }
+}
+
+void GGLAssembler::build_blending(
+ component_t& temp, // incomming fragment / output
+ const pixel_t& pixel, // framebuffer
+ int component,
+ Scratch& regs)
+{
+ if (!mInfo[component].blend)
+ return;
+
+ int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
+ int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
+ if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
+ fs = GGL_ONE;
+ const int blending = blending_codes(fs, fd);
+ if (!temp.size()) {
+ // here, blending will produce something which doesn't depend on
+ // that component (eg: GL_ZERO:GL_*), so the register has not been
+ // allocated yet. Will never be used as a source.
+ temp = component_t(regs.obtain(), CORRUPTIBLE);
+ }
+
+ // we are doing real blending...
+ // fb: extracted dst
+ // fragment: extracted src
+ // temp: component_t(fragment) and result
+
+ // scoped register allocator
+ Scratch scratches(registerFile());
+ comment("blending");
+
+ // we can optimize these cases a bit...
+ // (1) saturation is not needed
+ // (2) we can use only one multiply instead of 2
+ // (3) we can reduce the register pressure
+ // R = S*f + D*(1-f) = (S-D)*f + D
+ // R = S*(1-f) + D*f = (D-S)*f + S
+
+ const bool same_factor_opt1 =
+ (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
+ (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
+ (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
+ (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
+
+ const bool same_factor_opt2 =
+ (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
+ (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
+ (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
+ (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
+
+
+ // XXX: we could also optimize these cases:
+ // R = S*f + D*f = (S+D)*f
+ // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
+ // R = S*D + D*S = 2*S*D
+
+
+ // see if we need to extract 'component' from the destination (fb)
+ integer_t fb;
+ if (blending & (BLEND_DST|FACTOR_DST)) {
+ fb.setTo(scratches.obtain(), 32);
+ extract(fb, pixel, component);
+ if (mDithering) {
+ // XXX: maybe what we should do instead, is simply
+ // expand fb -or- fragment to the larger of the two
+ if (fb.size() < temp.size()) {
+ // for now we expand 'fb' to min(fragment, 8)
+ int new_size = temp.size() < 8 ? temp.size() : 8;
+ expand(fb, fb, new_size);
+ }
+ }
+ }
+
+
+ // convert input fragment to integer_t
+ if (temp.l && (temp.flags & CORRUPTIBLE)) {
+ MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
+ temp.h -= temp.l;
+ temp.l = 0;
+ }
+ integer_t fragment(temp.reg, temp.size(), temp.flags);
+
+ // if not done yet, convert input fragment to integer_t
+ if (temp.l) {
+ // here we know temp is not CORRUPTIBLE
+ fragment.reg = scratches.obtain();
+ MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
+ fragment.flags |= CORRUPTIBLE;
+ }
+
+ if (!(temp.flags & CORRUPTIBLE)) {
+ // temp is not corruptible, but since it's the destination it
+ // will be modified, so we need to allocate a new register.
+ temp.reg = regs.obtain();
+ temp.flags &= ~CORRUPTIBLE;
+ fragment.flags &= ~CORRUPTIBLE;
+ }
+
+ if ((blending & BLEND_SRC) && !same_factor_opt1) {
+ // source (fragment) is needed for the blending stage
+ // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
+ fragment.flags &= ~CORRUPTIBLE;
+ }
+
+
+ if (same_factor_opt1) {
+ // R = S*f + D*(1-f) = (S-D)*f + D
+ integer_t factor;
+ build_blend_factor(factor, fs,
+ component, pixel, fragment, fb, scratches);
+ // fb is always corruptible from this point
+ fb.flags |= CORRUPTIBLE;
+ build_blendFOneMinusF(temp, factor, fragment, fb);
+ } else if (same_factor_opt2) {
+ // R = S*(1-f) + D*f = (D-S)*f + S
+ integer_t factor;
+ // fb is always corrruptible here
+ fb.flags |= CORRUPTIBLE;
+ build_blend_factor(factor, fd,
+ component, pixel, fragment, fb, scratches);
+ build_blendOneMinusFF(temp, factor, fragment, fb);
+ } else {
+ integer_t src_factor;
+ integer_t dst_factor;
+
+ // if destination (fb) is not needed for the blending stage,
+ // then it can be marked as CORRUPTIBLE
+ if (!(blending & BLEND_DST)) {
+ fb.flags |= CORRUPTIBLE;
+ }
+
+ // XXX: try to mark some registers as CORRUPTIBLE
+ // in most case we could make those corruptible
+ // when we're processing the last component
+ // but not always, for instance
+ // when fragment is constant and not reloaded
+ // when fb is needed for logic-ops or masking
+ // when a register is aliased (for instance with mAlphaSource)
+
+ // blend away...
+ if (fs==GGL_ZERO) {
+ if (fd==GGL_ZERO) { // R = 0
+ // already taken care of
+ } else if (fd==GGL_ONE) { // R = D
+ // already taken care of
+ } else { // R = D*fd
+ // compute fd
+ build_blend_factor(dst_factor, fd,
+ component, pixel, fragment, fb, scratches);
+ mul_factor(temp, fb, dst_factor);
+ }
+ } else if (fs==GGL_ONE) {
+ if (fd==GGL_ZERO) { // R = S
+ // NOP, taken care of
+ } else if (fd==GGL_ONE) { // R = S + D
+ component_add(temp, fb, fragment); // args order matters
+ component_sat(temp);
+ } else { // R = S + D*fd
+ // compute fd
+ build_blend_factor(dst_factor, fd,
+ component, pixel, fragment, fb, scratches);
+ mul_factor_add(temp, fb, dst_factor, component_t(fragment));
+ if (fd==GGL_ONE_MINUS_SRC_ALPHA) {
+ // XXX: in theory this is not correct, we should
+ // saturate here. However, this mode is often
+ // used for displaying alpha-premultiplied graphics,
+ // in which case, saturation is not necessary.
+ // unfortunatelly, we have no way to know.
+ // This is a case, where we sacrifice correctness for
+ // performance. we should probably have some heuristics.
+ } else {
+ component_sat(temp);
+ }
+ }
+ } else {
+ // compute fs
+ build_blend_factor(src_factor, fs,
+ component, pixel, fragment, fb, scratches);
+ if (fd==GGL_ZERO) { // R = S*fs
+ mul_factor(temp, fragment, src_factor);
+ } else if (fd==GGL_ONE) { // R = S*fs + D
+ mul_factor_add(temp, fragment, src_factor, component_t(fb));
+ component_sat(temp);
+ } else { // R = S*fs + D*fd
+ mul_factor(temp, fragment, src_factor);
+ if (scratches.isUsed(src_factor.reg))
+ scratches.recycle(src_factor.reg);
+ // compute fd
+ build_blend_factor(dst_factor, fd,
+ component, pixel, fragment, fb, scratches);
+ mul_factor_add(temp, fb, dst_factor, temp);
+ if (!same_factor_opt1 && !same_factor_opt2) {
+ component_sat(temp);
+ }
+ }
+ }
+ }
+
+ // now we can be corrupted (it's the dest)
+ temp.flags |= CORRUPTIBLE;
+}
+
+void GGLAssembler::build_blend_factor(
+ integer_t& factor, int f, int component,
+ const pixel_t& dst_pixel,
+ integer_t& fragment,
+ integer_t& fb,
+ Scratch& scratches)
+{
+ integer_t src_alpha(fragment);
+
+ // src_factor/dst_factor won't be used after blending,
+ // so it's fine to mark them as CORRUPTIBLE (if not aliased)
+ factor.flags |= CORRUPTIBLE;
+
+ switch(f) {
+ case GGL_ONE_MINUS_SRC_ALPHA:
+ case GGL_SRC_ALPHA:
+ if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
+ // we're processing alpha, so we already have
+ // src-alpha in fragment, and we need src-alpha just this time.
+ } else {
+ // alpha-src will be needed for other components
+ if (!mBlendFactorCached || mBlendFactorCached==f) {
+ src_alpha = mAlphaSource;
+ factor = mAlphaSource;
+ factor.flags &= ~CORRUPTIBLE;
+ // we already computed the blend factor before, nothing to do.
+ if (mBlendFactorCached)
+ return;
+ // this is the first time, make sure to compute the blend
+ // factor properly.
+ mBlendFactorCached = f;
+ break;
+ } else {
+ // we have a cached alpha blend factor, but we want another one,
+ // this should really not happen because by construction,
+ // we cannot have BOTH source and destination
+ // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
+ // the blending stage uses the f/(1-f) optimization
+
+ // for completeness, we handle this case though. Since there
+ // are only 2 choices, this meens we want "the other one"
+ // (1-factor)
+ factor = mAlphaSource;
+ factor.flags &= ~CORRUPTIBLE;
+ RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
+ mBlendFactorCached = f;
+ return;
+ }
+ }
+ // fall-through...
+ case GGL_ONE_MINUS_DST_COLOR:
+ case GGL_DST_COLOR:
+ case GGL_ONE_MINUS_SRC_COLOR:
+ case GGL_SRC_COLOR:
+ case GGL_ONE_MINUS_DST_ALPHA:
+ case GGL_DST_ALPHA:
+ case GGL_SRC_ALPHA_SATURATE:
+ // help us find out what register we can use for the blend-factor
+ // CORRUPTIBLE registers are chosen first, or a new one is allocated.
+ if (fragment.flags & CORRUPTIBLE) {
+ factor.setTo(fragment.reg, 32, CORRUPTIBLE);
+ fragment.flags &= ~CORRUPTIBLE;
+ } else if (fb.flags & CORRUPTIBLE) {
+ factor.setTo(fb.reg, 32, CORRUPTIBLE);
+ fb.flags &= ~CORRUPTIBLE;
+ } else {
+ factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
+ }
+ break;
+ }
+
+ // XXX: doesn't work if size==1
+
+ switch(f) {
+ case GGL_ONE_MINUS_DST_COLOR:
+ case GGL_DST_COLOR:
+ factor.s = fb.s;
+ ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
+ break;
+ case GGL_ONE_MINUS_SRC_COLOR:
+ case GGL_SRC_COLOR:
+ factor.s = fragment.s;
+ ADD(AL, 0, factor.reg, fragment.reg,
+ reg_imm(fragment.reg, LSR, fragment.s-1));
+ break;
+ case GGL_ONE_MINUS_SRC_ALPHA:
+ case GGL_SRC_ALPHA:
+ factor.s = src_alpha.s;
+ ADD(AL, 0, factor.reg, src_alpha.reg,
+ reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
+ break;
+ case GGL_ONE_MINUS_DST_ALPHA:
+ case GGL_DST_ALPHA:
+ // XXX: should be precomputed
+ extract(factor, dst_pixel, GGLFormat::ALPHA);
+ ADD(AL, 0, factor.reg, factor.reg,
+ reg_imm(factor.reg, LSR, factor.s-1));
+ break;
+ case GGL_SRC_ALPHA_SATURATE:
+ // XXX: should be precomputed
+ // XXX: f = min(As, 1-Ad)
+ // btw, we're guaranteed that Ad's size is <= 8, because
+ // it's extracted from the framebuffer
+ break;
+ }
+
+ switch(f) {
+ case GGL_ONE_MINUS_DST_COLOR:
+ case GGL_ONE_MINUS_SRC_COLOR:
+ case GGL_ONE_MINUS_DST_ALPHA:
+ case GGL_ONE_MINUS_SRC_ALPHA:
+ RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
+ }
+
+ // don't need more than 8-bits for the blend factor
+ // and this will prevent overflows in the multiplies later
+ if (factor.s > 8) {
+ MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
+ factor.s = 8;
+ }
+}
+
+int GGLAssembler::blending_codes(int fs, int fd)
+{
+ int blending = 0;
+ switch(fs) {
+ case GGL_ONE:
+ blending |= BLEND_SRC;
+ break;
+
+ case GGL_ONE_MINUS_DST_COLOR:
+ case GGL_DST_COLOR:
+ blending |= FACTOR_DST|BLEND_SRC;
+ break;
+ case GGL_ONE_MINUS_DST_ALPHA:
+ case GGL_DST_ALPHA:
+ // no need to extract 'component' from the destination
+ // for the blend factor, because we need ALPHA only.
+ blending |= BLEND_SRC;
+ break;
+
+ case GGL_ONE_MINUS_SRC_COLOR:
+ case GGL_SRC_COLOR:
+ blending |= FACTOR_SRC|BLEND_SRC;
+ break;
+ case GGL_ONE_MINUS_SRC_ALPHA:
+ case GGL_SRC_ALPHA:
+ case GGL_SRC_ALPHA_SATURATE:
+ blending |= FACTOR_SRC|BLEND_SRC;
+ break;
+ }
+ switch(fd) {
+ case GGL_ONE:
+ blending |= BLEND_DST;
+ break;
+
+ case GGL_ONE_MINUS_DST_COLOR:
+ case GGL_DST_COLOR:
+ blending |= FACTOR_DST|BLEND_DST;
+ break;
+ case GGL_ONE_MINUS_DST_ALPHA:
+ case GGL_DST_ALPHA:
+ blending |= FACTOR_DST|BLEND_DST;
+ break;
+
+ case GGL_ONE_MINUS_SRC_COLOR:
+ case GGL_SRC_COLOR:
+ blending |= FACTOR_SRC|BLEND_DST;
+ break;
+ case GGL_ONE_MINUS_SRC_ALPHA:
+ case GGL_SRC_ALPHA:
+ // no need to extract 'component' from the source
+ // for the blend factor, because we need ALPHA only.
+ blending |= BLEND_DST;
+ break;
+ }
+ return blending;
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::build_blendFOneMinusF(
+ component_t& temp,
+ const integer_t& factor,
+ const integer_t& fragment,
+ const integer_t& fb)
+{
+ // R = S*f + D*(1-f) = (S-D)*f + D
+ Scratch scratches(registerFile());
+ // compute S-D
+ integer_t diff(fragment.flags & CORRUPTIBLE ?
+ fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
+ const int shift = fragment.size() - fb.size();
+ if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
+ else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
+ else RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
+ mul_factor_add(temp, diff, factor, component_t(fb));
+}
+
+void GGLAssembler::build_blendOneMinusFF(
+ component_t& temp,
+ const integer_t& factor,
+ const integer_t& fragment,
+ const integer_t& fb)
+{
+ // R = S*f + D*(1-f) = (S-D)*f + D
+ Scratch scratches(registerFile());
+ // compute D-S
+ integer_t diff(fb.flags & CORRUPTIBLE ?
+ fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
+ const int shift = fragment.size() - fb.size();
+ if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
+ else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
+ else SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
+ mul_factor_add(temp, diff, factor, component_t(fragment));
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::mul_factor( component_t& d,
+ const integer_t& v,
+ const integer_t& f)
+{
+ int vs = v.size();
+ int fs = f.size();
+ int ms = vs+fs;
+
+ // XXX: we could have special cases for 1 bit mul
+
+ // all this code below to use the best multiply instruction
+ // wrt the parameters size. We take advantage of the fact
+ // that the 16-bits multiplies allow a 16-bit shift
+ // The trick is that we just make sure that we have at least 8-bits
+ // per component (which is enough for a 8 bits display).
+
+ int xy;
+ int vshift = 0;
+ int fshift = 0;
+ int smulw = 0;
+
+ if (vs<16) {
+ if (fs<16) {
+ xy = xyBB;
+ } else if (GGL_BETWEEN(fs, 24, 31)) {
+ ms -= 16;
+ xy = xyTB;
+ } else {
+ // eg: 15 * 18 -> 15 * 15
+ fshift = fs - 15;
+ ms -= fshift;
+ xy = xyBB;
+ }
+ } else if (GGL_BETWEEN(vs, 24, 31)) {
+ if (fs<16) {
+ ms -= 16;
+ xy = xyTB;
+ } else if (GGL_BETWEEN(fs, 24, 31)) {
+ ms -= 32;
+ xy = xyTT;
+ } else {
+ // eg: 24 * 18 -> 8 * 18
+ fshift = fs - 15;
+ ms -= 16 + fshift;
+ xy = xyTB;
+ }
+ } else {
+ if (fs<16) {
+ // eg: 18 * 15 -> 15 * 15
+ vshift = vs - 15;
+ ms -= vshift;
+ xy = xyBB;
+ } else if (GGL_BETWEEN(fs, 24, 31)) {
+ // eg: 18 * 24 -> 15 * 8
+ vshift = vs - 15;
+ ms -= 16 + vshift;
+ xy = xyBT;
+ } else {
+ // eg: 18 * 18 -> (15 * 18)>>16
+ fshift = fs - 15;
+ ms -= 16 + fshift;
+ xy = yB; //XXX SMULWB
+ smulw = 1;
+ }
+ }
+
+ LOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
+
+ int vreg = v.reg;
+ int freg = f.reg;
+ if (vshift) {
+ MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
+ vreg = d.reg;
+ }
+ if (fshift) {
+ MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
+ freg = d.reg;
+ }
+ if (smulw) SMULW(AL, xy, d.reg, vreg, freg);
+ else SMUL(AL, xy, d.reg, vreg, freg);
+
+
+ d.h = ms;
+ if (mDithering) {
+ d.l = 0;
+ } else {
+ d.l = fs;
+ d.flags |= CLEAR_LO;
+ }
+}
+
+void GGLAssembler::mul_factor_add( component_t& d,
+ const integer_t& v,
+ const integer_t& f,
+ const component_t& a)
+{
+ // XXX: we could have special cases for 1 bit mul
+ Scratch scratches(registerFile());
+
+ int vs = v.size();
+ int fs = f.size();
+ int as = a.h;
+ int ms = vs+fs;
+
+ LOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
+
+ integer_t add(a.reg, a.h, a.flags);
+
+ // 'a' is a component_t but it is guaranteed to have
+ // its high bits set to 0. However in the dithering case,
+ // we can't get away with truncating the potentially bad bits
+ // so extraction is needed.
+
+ if ((mDithering) && (a.size() < ms)) {
+ // we need to expand a
+ if (!(a.flags & CORRUPTIBLE)) {
+ // ... but it's not corruptible, so we need to pick a
+ // temporary register.
+ // Try to uses the destination register first (it's likely
+ // to be usable, unless it aliases an input).
+ if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
+ add.reg = d.reg;
+ } else {
+ add.reg = scratches.obtain();
+ }
+ }
+ expand(add, a, ms); // extracts and expands
+ as = ms;
+ }
+
+ if (ms == as) {
+ if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
+ else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
+ } else {
+ int temp = d.reg;
+ if (temp == add.reg) {
+ // the mul will modify add.reg, we need an intermediary reg
+ if (v.flags & CORRUPTIBLE) temp = v.reg;
+ else if (f.flags & CORRUPTIBLE) temp = f.reg;
+ else temp = scratches.obtain();
+ }
+
+ if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
+ else MUL(AL, 0, temp, v.reg, f.reg);
+
+ if (ms>as) {
+ ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
+ } else if (ms<as) {
+ // not sure if we should expand the mul instead?
+ ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
+ }
+ }
+
+ d.h = ms;
+ if (mDithering) {
+ d.l = a.l;
+ } else {
+ d.l = fs>a.l ? fs : a.l;
+ d.flags |= CLEAR_LO;
+ }
+}
+
+void GGLAssembler::component_add(component_t& d,
+ const integer_t& dst, const integer_t& src)
+{
+ // here we're guaranteed that fragment.size() >= fb.size()
+ const int shift = src.size() - dst.size();
+ if (!shift) {
+ ADD(AL, 0, d.reg, src.reg, dst.reg);
+ } else {
+ ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
+ }
+
+ d.h = src.size();
+ if (mDithering) {
+ d.l = 0;
+ } else {
+ d.l = shift;
+ d.flags |= CLEAR_LO;
+ }
+}
+
+void GGLAssembler::component_sat(const component_t& v)
+{
+ const int one = ((1<<v.size())-1)<<v.l;
+ CMP(AL, v.reg, imm( 1<<v.h ));
+ if (isValidImmediate(one)) {
+ MOV(HS, 0, v.reg, imm( one ));
+ } else if (isValidImmediate(~one)) {
+ MVN(HS, 0, v.reg, imm( ~one ));
+ } else {
+ MOV(HS, 0, v.reg, imm( 1<<v.h ));
+ SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
+ }
+}
+
+// ----------------------------------------------------------------------------
+
+}; // namespace android
+
diff --git a/libpixelflinger/codeflinger/disassem.c b/libpixelflinger/codeflinger/disassem.c
new file mode 100644
index 0000000..4676da0
--- /dev/null
+++ b/libpixelflinger/codeflinger/disassem.c
@@ -0,0 +1,702 @@
+/* $NetBSD: disassem.c,v 1.14 2003/03/27 16:58:36 mycroft Exp $ */
+
+/*-
+ * Copyright (c) 1996 Mark Brinicombe.
+ * Copyright (c) 1996 Brini.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Brini.
+ * 4. The name of the company nor the name of the author may be used to
+ * endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * RiscBSD kernel project
+ *
+ * db_disasm.c
+ *
+ * Kernel disassembler
+ *
+ * Created : 10/02/96
+ *
+ * Structured after the sparc/sparc/db_disasm.c by David S. Miller &
+ * Paul Kranenburg
+ *
+ * This code is not complete. Not all instructions are disassembled.
+ */
+
+#include <sys/cdefs.h>
+//__FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/arm/arm/disassem.c,v 1.2 2005/01/05 21:58:47 imp Exp $");
+#include <sys/param.h>
+#include <stdio.h>
+
+#include "disassem.h"
+#include "armreg.h"
+//#include <ddb/ddb.h>
+
+/*
+ * General instruction format
+ *
+ * insn[cc][mod] [operands]
+ *
+ * Those fields with an uppercase format code indicate that the field
+ * follows directly after the instruction before the separator i.e.
+ * they modify the instruction rather than just being an operand to
+ * the instruction. The only exception is the writeback flag which
+ * follows a operand.
+ *
+ *
+ * 2 - print Operand 2 of a data processing instruction
+ * d - destination register (bits 12-15)
+ * n - n register (bits 16-19)
+ * s - s register (bits 8-11)
+ * o - indirect register rn (bits 16-19) (used by swap)
+ * m - m register (bits 0-3)
+ * a - address operand of ldr/str instruction
+ * e - address operand of ldrh/strh instruction
+ * l - register list for ldm/stm instruction
+ * f - 1st fp operand (register) (bits 12-14)
+ * g - 2nd fp operand (register) (bits 16-18)
+ * h - 3rd fp operand (register/immediate) (bits 0-4)
+ * b - branch address
+ * t - thumb branch address (bits 24, 0-23)
+ * k - breakpoint comment (bits 0-3, 8-19)
+ * X - block transfer type
+ * Y - block transfer type (r13 base)
+ * c - comment field bits(0-23)
+ * p - saved or current status register
+ * F - PSR transfer fields
+ * D - destination-is-r15 (P) flag on TST, TEQ, CMP, CMN
+ * L - co-processor transfer size
+ * S - set status flag
+ * P - fp precision
+ * Q - fp precision (for ldf/stf)
+ * R - fp rounding
+ * v - co-processor data transfer registers + addressing mode
+ * W - writeback flag
+ * x - instruction in hex
+ * # - co-processor number
+ * y - co-processor data processing registers
+ * z - co-processor register transfer registers
+ */
+
+struct arm32_insn {
+ u_int mask;
+ u_int pattern;
+ char* name;
+ char* format;
+};
+
+static const struct arm32_insn arm32_i[] = {
+ { 0x0fffffff, 0x0ff00000, "imb", "c" }, /* Before swi */
+ { 0x0fffffff, 0x0ff00001, "imbrange", "c" }, /* Before swi */
+ { 0x0f000000, 0x0f000000, "swi", "c" },
+ { 0xfe000000, 0xfa000000, "blx", "t" }, /* Before b and bl */
+ { 0x0f000000, 0x0a000000, "b", "b" },
+ { 0x0f000000, 0x0b000000, "bl", "b" },
+ { 0x0fe000f0, 0x00000090, "mul", "Snms" },
+ { 0x0fe000f0, 0x00200090, "mla", "Snmsd" },
+ { 0x0fe000f0, 0x00800090, "umull", "Sdnms" },
+ { 0x0fe000f0, 0x00c00090, "smull", "Sdnms" },
+ { 0x0fe000f0, 0x00a00090, "umlal", "Sdnms" },
+ { 0x0fe000f0, 0x00e00090, "smlal", "Sdnms" },
+ { 0x0d700000, 0x04200000, "strt", "daW" },
+ { 0x0d700000, 0x04300000, "ldrt", "daW" },
+ { 0x0d700000, 0x04600000, "strbt", "daW" },
+ { 0x0d700000, 0x04700000, "ldrbt", "daW" },
+ { 0x0c500000, 0x04000000, "str", "daW" },
+ { 0x0c500000, 0x04100000, "ldr", "daW" },
+ { 0x0c500000, 0x04400000, "strb", "daW" },
+ { 0x0c500000, 0x04500000, "ldrb", "daW" },
+ { 0x0e1f0000, 0x080d0000, "stm", "YnWl" },/* separate out r13 base */
+ { 0x0e1f0000, 0x081d0000, "ldm", "YnWl" },/* separate out r13 base */
+ { 0x0e100000, 0x08000000, "stm", "XnWl" },
+ { 0x0e100000, 0x08100000, "ldm", "XnWl" },
+ { 0x0e1000f0, 0x00100090, "ldrb", "deW" },
+ { 0x0e1000f0, 0x00000090, "strb", "deW" },
+ { 0x0e1000f0, 0x001000d0, "ldrsb", "deW" },
+ { 0x0e1000f0, 0x001000b0, "ldrh", "deW" },
+ { 0x0e1000f0, 0x000000b0, "strh", "deW" },
+ { 0x0e1000f0, 0x001000f0, "ldrsh", "deW" },
+ { 0x0f200090, 0x00200090, "und", "x" }, /* Before data processing */
+ { 0x0e1000d0, 0x000000d0, "und", "x" }, /* Before data processing */
+ { 0x0ff00ff0, 0x01000090, "swp", "dmo" },
+ { 0x0ff00ff0, 0x01400090, "swpb", "dmo" },
+ { 0x0fbf0fff, 0x010f0000, "mrs", "dp" }, /* Before data processing */
+ { 0x0fb0fff0, 0x0120f000, "msr", "pFm" },/* Before data processing */
+ { 0x0fb0f000, 0x0320f000, "msr", "pF2" },/* Before data processing */
+ { 0x0ffffff0, 0x012fff10, "bx", "m" },
+ { 0x0fff0ff0, 0x016f0f10, "clz", "dm" },
+ { 0x0ffffff0, 0x012fff30, "blx", "m" },
+ { 0xfff000f0, 0xe1200070, "bkpt", "k" },
+ { 0x0de00000, 0x00000000, "and", "Sdn2" },
+ { 0x0de00000, 0x00200000, "eor", "Sdn2" },
+ { 0x0de00000, 0x00400000, "sub", "Sdn2" },
+ { 0x0de00000, 0x00600000, "rsb", "Sdn2" },
+ { 0x0de00000, 0x00800000, "add", "Sdn2" },
+ { 0x0de00000, 0x00a00000, "adc", "Sdn2" },
+ { 0x0de00000, 0x00c00000, "sbc", "Sdn2" },
+ { 0x0de00000, 0x00e00000, "rsc", "Sdn2" },
+ { 0x0df00000, 0x01100000, "tst", "Dn2" },
+ { 0x0df00000, 0x01300000, "teq", "Dn2" },
+ { 0x0df00000, 0x01500000, "cmp", "Dn2" },
+ { 0x0df00000, 0x01700000, "cmn", "Dn2" },
+ { 0x0de00000, 0x01800000, "orr", "Sdn2" },
+ { 0x0de00000, 0x01a00000, "mov", "Sd2" },
+ { 0x0de00000, 0x01c00000, "bic", "Sdn2" },
+ { 0x0de00000, 0x01e00000, "mvn", "Sd2" },
+ { 0x0ff08f10, 0x0e000100, "adf", "PRfgh" },
+ { 0x0ff08f10, 0x0e100100, "muf", "PRfgh" },
+ { 0x0ff08f10, 0x0e200100, "suf", "PRfgh" },
+ { 0x0ff08f10, 0x0e300100, "rsf", "PRfgh" },
+ { 0x0ff08f10, 0x0e400100, "dvf", "PRfgh" },
+ { 0x0ff08f10, 0x0e500100, "rdf", "PRfgh" },
+ { 0x0ff08f10, 0x0e600100, "pow", "PRfgh" },
+ { 0x0ff08f10, 0x0e700100, "rpw", "PRfgh" },
+ { 0x0ff08f10, 0x0e800100, "rmf", "PRfgh" },
+ { 0x0ff08f10, 0x0e900100, "fml", "PRfgh" },
+ { 0x0ff08f10, 0x0ea00100, "fdv", "PRfgh" },
+ { 0x0ff08f10, 0x0eb00100, "frd", "PRfgh" },
+ { 0x0ff08f10, 0x0ec00100, "pol", "PRfgh" },
+ { 0x0f008f10, 0x0e000100, "fpbop", "PRfgh" },
+ { 0x0ff08f10, 0x0e008100, "mvf", "PRfh" },
+ { 0x0ff08f10, 0x0e108100, "mnf", "PRfh" },
+ { 0x0ff08f10, 0x0e208100, "abs", "PRfh" },
+ { 0x0ff08f10, 0x0e308100, "rnd", "PRfh" },
+ { 0x0ff08f10, 0x0e408100, "sqt", "PRfh" },
+ { 0x0ff08f10, 0x0e508100, "log", "PRfh" },
+ { 0x0ff08f10, 0x0e608100, "lgn", "PRfh" },
+ { 0x0ff08f10, 0x0e708100, "exp", "PRfh" },
+ { 0x0ff08f10, 0x0e808100, "sin", "PRfh" },
+ { 0x0ff08f10, 0x0e908100, "cos", "PRfh" },
+ { 0x0ff08f10, 0x0ea08100, "tan", "PRfh" },
+ { 0x0ff08f10, 0x0eb08100, "asn", "PRfh" },
+ { 0x0ff08f10, 0x0ec08100, "acs", "PRfh" },
+ { 0x0ff08f10, 0x0ed08100, "atn", "PRfh" },
+ { 0x0f008f10, 0x0e008100, "fpuop", "PRfh" },
+ { 0x0e100f00, 0x0c000100, "stf", "QLv" },
+ { 0x0e100f00, 0x0c100100, "ldf", "QLv" },
+ { 0x0ff00f10, 0x0e000110, "flt", "PRgd" },
+ { 0x0ff00f10, 0x0e100110, "fix", "PRdh" },
+ { 0x0ff00f10, 0x0e200110, "wfs", "d" },
+ { 0x0ff00f10, 0x0e300110, "rfs", "d" },
+ { 0x0ff00f10, 0x0e400110, "wfc", "d" },
+ { 0x0ff00f10, 0x0e500110, "rfc", "d" },
+ { 0x0ff0ff10, 0x0e90f110, "cmf", "PRgh" },
+ { 0x0ff0ff10, 0x0eb0f110, "cnf", "PRgh" },
+ { 0x0ff0ff10, 0x0ed0f110, "cmfe", "PRgh" },
+ { 0x0ff0ff10, 0x0ef0f110, "cnfe", "PRgh" },
+ { 0xff100010, 0xfe000010, "mcr2", "#z" },
+ { 0x0f100010, 0x0e000010, "mcr", "#z" },
+ { 0xff100010, 0xfe100010, "mrc2", "#z" },
+ { 0x0f100010, 0x0e100010, "mrc", "#z" },
+ { 0xff000010, 0xfe000000, "cdp2", "#y" },
+ { 0x0f000010, 0x0e000000, "cdp", "#y" },
+ { 0xfe100090, 0xfc100000, "ldc2", "L#v" },
+ { 0x0e100090, 0x0c100000, "ldc", "L#v" },
+ { 0xfe100090, 0xfc000000, "stc2", "L#v" },
+ { 0x0e100090, 0x0c000000, "stc", "L#v" },
+ { 0xf550f000, 0xf550f000, "pld", "ne" },
+ { 0x0ff00ff0, 0x01000050, "qaad", "dmn" },
+ { 0x0ff00ff0, 0x01400050, "qdaad", "dmn" },
+ { 0x0ff00ff0, 0x01600050, "qdsub", "dmn" },
+ { 0x0ff00ff0, 0x01200050, "dsub", "dmn" },
+ { 0x0ff000f0, 0x01000080, "smlabb", "nmsd" }, // d & n inverted!!
+ { 0x0ff000f0, 0x010000a0, "smlatb", "nmsd" }, // d & n inverted!!
+ { 0x0ff000f0, 0x010000c0, "smlabt", "nmsd" }, // d & n inverted!!
+ { 0x0ff000f0, 0x010000e0, "smlatt", "nmsd" }, // d & n inverted!!
+ { 0x0ff000f0, 0x01400080, "smlalbb","ndms" }, // d & n inverted!!
+ { 0x0ff000f0, 0x014000a0, "smlaltb","ndms" }, // d & n inverted!!
+ { 0x0ff000f0, 0x014000c0, "smlalbt","ndms" }, // d & n inverted!!
+ { 0x0ff000f0, 0x014000e0, "smlaltt","ndms" }, // d & n inverted!!
+ { 0x0ff000f0, 0x01200080, "smlawb", "nmsd" }, // d & n inverted!!
+ { 0x0ff0f0f0, 0x012000a0, "smulwb","nms" }, // d & n inverted!!
+ { 0x0ff000f0, 0x012000c0, "smlawt", "nmsd" }, // d & n inverted!!
+ { 0x0ff0f0f0, 0x012000e0, "smulwt","nms" }, // d & n inverted!!
+ { 0x0ff0f0f0, 0x01600080, "smulbb","nms" }, // d & n inverted!!
+ { 0x0ff0f0f0, 0x016000a0, "smultb","nms" }, // d & n inverted!!
+ { 0x0ff0f0f0, 0x016000c0, "smulbt","nms" }, // d & n inverted!!
+ { 0x0ff0f0f0, 0x016000e0, "smultt","nms" }, // d & n inverted!!
+ { 0x00000000, 0x00000000, NULL, NULL }
+};
+
+static char const arm32_insn_conditions[][4] = {
+ "eq", "ne", "cs", "cc",
+ "mi", "pl", "vs", "vc",
+ "hi", "ls", "ge", "lt",
+ "gt", "le", "", "nv"
+};
+
+static char const insn_block_transfers[][4] = {
+ "da", "ia", "db", "ib"
+};
+
+static char const insn_stack_block_transfers[][4] = {
+ "ed", "ea", "fd", "fa"
+};
+
+static char const op_shifts[][4] = {
+ "lsl", "lsr", "asr", "ror"
+};
+
+static char const insn_fpa_rounding[][2] = {
+ "", "p", "m", "z"
+};
+
+static char const insn_fpa_precision[][2] = {
+ "s", "d", "e", "p"
+};
+
+static char const insn_fpaconstants[][8] = {
+ "0.0", "1.0", "2.0", "3.0",
+ "4.0", "5.0", "0.5", "10.0"
+};
+
+#define insn_condition(x) arm32_insn_conditions[(x >> 28) & 0x0f]
+#define insn_blktrans(x) insn_block_transfers[(x >> 23) & 3]
+#define insn_stkblktrans(x) insn_stack_block_transfers[(x >> 23) & 3]
+#define op2_shift(x) op_shifts[(x >> 5) & 3]
+#define insn_fparnd(x) insn_fpa_rounding[(x >> 5) & 0x03]
+#define insn_fpaprec(x) insn_fpa_precision[(((x >> 18) & 2)|(x >> 7)) & 1]
+#define insn_fpaprect(x) insn_fpa_precision[(((x >> 21) & 2)|(x >> 15)) & 1]
+#define insn_fpaimm(x) insn_fpaconstants[x & 0x07]
+
+/* Local prototypes */
+static void disasm_register_shift(const disasm_interface_t *di, u_int insn);
+static void disasm_print_reglist(const disasm_interface_t *di, u_int insn);
+static void disasm_insn_ldrstr(const disasm_interface_t *di, u_int insn,
+ u_int loc);
+static void disasm_insn_ldrhstrh(const disasm_interface_t *di, u_int insn,
+ u_int loc);
+static void disasm_insn_ldcstc(const disasm_interface_t *di, u_int insn,
+ u_int loc);
+static u_int disassemble_readword(u_int address);
+static void disassemble_printaddr(u_int address);
+
+u_int
+disasm(const disasm_interface_t *di, u_int loc, int altfmt)
+{
+ const struct arm32_insn *i_ptr = &arm32_i[0];
+
+ u_int insn;
+ int matchp;
+ int branch;
+ char* f_ptr;
+ int fmt;
+
+ fmt = 0;
+ matchp = 0;
+ insn = di->di_readword(loc);
+
+/* di->di_printf("loc=%08x insn=%08x : ", loc, insn);*/
+
+ while (i_ptr->name) {
+ if ((insn & i_ptr->mask) == i_ptr->pattern) {
+ matchp = 1;
+ break;
+ }
+ i_ptr++;
+ }
+
+ if (!matchp) {
+ di->di_printf("und%s\t%08x\n", insn_condition(insn), insn);
+ return(loc + INSN_SIZE);
+ }
+
+ /* If instruction forces condition code, don't print it. */
+ if ((i_ptr->mask & 0xf0000000) == 0xf0000000)
+ di->di_printf("%s", i_ptr->name);
+ else
+ di->di_printf("%s%s", i_ptr->name, insn_condition(insn));
+
+ f_ptr = i_ptr->format;
+
+ /* Insert tab if there are no instruction modifiers */
+
+ if (*(f_ptr) < 'A' || *(f_ptr) > 'Z') {
+ ++fmt;
+ di->di_printf("\t");
+ }
+
+ while (*f_ptr) {
+ switch (*f_ptr) {
+ /* 2 - print Operand 2 of a data processing instruction */
+ case '2':
+ if (insn & 0x02000000) {
+ int rotate= ((insn >> 7) & 0x1e);
+
+ di->di_printf("#0x%08x",
+ (insn & 0xff) << (32 - rotate) |
+ (insn & 0xff) >> rotate);
+ } else {
+ disasm_register_shift(di, insn);
+ }
+ break;
+ /* d - destination register (bits 12-15) */
+ case 'd':
+ di->di_printf("r%d", ((insn >> 12) & 0x0f));
+ break;
+ /* D - insert 'p' if Rd is R15 */
+ case 'D':
+ if (((insn >> 12) & 0x0f) == 15)
+ di->di_printf("p");
+ break;
+ /* n - n register (bits 16-19) */
+ case 'n':
+ di->di_printf("r%d", ((insn >> 16) & 0x0f));
+ break;
+ /* s - s register (bits 8-11) */
+ case 's':
+ di->di_printf("r%d", ((insn >> 8) & 0x0f));
+ break;
+ /* o - indirect register rn (bits 16-19) (used by swap) */
+ case 'o':
+ di->di_printf("[r%d]", ((insn >> 16) & 0x0f));
+ break;
+ /* m - m register (bits 0-4) */
+ case 'm':
+ di->di_printf("r%d", ((insn >> 0) & 0x0f));
+ break;
+ /* a - address operand of ldr/str instruction */
+ case 'a':
+ disasm_insn_ldrstr(di, insn, loc);
+ break;
+ /* e - address operand of ldrh/strh instruction */
+ case 'e':
+ disasm_insn_ldrhstrh(di, insn, loc);
+ break;
+ /* l - register list for ldm/stm instruction */
+ case 'l':
+ disasm_print_reglist(di, insn);
+ break;
+ /* f - 1st fp operand (register) (bits 12-14) */
+ case 'f':
+ di->di_printf("f%d", (insn >> 12) & 7);
+ break;
+ /* g - 2nd fp operand (register) (bits 16-18) */
+ case 'g':
+ di->di_printf("f%d", (insn >> 16) & 7);
+ break;
+ /* h - 3rd fp operand (register/immediate) (bits 0-4) */
+ case 'h':
+ if (insn & (1 << 3))
+ di->di_printf("#%s", insn_fpaimm(insn));
+ else
+ di->di_printf("f%d", insn & 7);
+ break;
+ /* b - branch address */
+ case 'b':
+ branch = ((insn << 2) & 0x03ffffff);
+ if (branch & 0x02000000)
+ branch |= 0xfc000000;
+ di->di_printaddr(loc + 8 + branch);
+ break;
+ /* t - blx address */
+ case 't':
+ branch = ((insn << 2) & 0x03ffffff) |
+ (insn >> 23 & 0x00000002);
+ if (branch & 0x02000000)
+ branch |= 0xfc000000;
+ di->di_printaddr(loc + 8 + branch);
+ break;
+ /* X - block transfer type */
+ case 'X':
+ di->di_printf("%s", insn_blktrans(insn));
+ break;
+ /* Y - block transfer type (r13 base) */
+ case 'Y':
+ di->di_printf("%s", insn_stkblktrans(insn));
+ break;
+ /* c - comment field bits(0-23) */
+ case 'c':
+ di->di_printf("0x%08x", (insn & 0x00ffffff));
+ break;
+ /* k - breakpoint comment (bits 0-3, 8-19) */
+ case 'k':
+ di->di_printf("0x%04x",
+ (insn & 0x000fff00) >> 4 | (insn & 0x0000000f));
+ break;
+ /* p - saved or current status register */
+ case 'p':
+ if (insn & 0x00400000)
+ di->di_printf("spsr");
+ else
+ di->di_printf("cpsr");
+ break;
+ /* F - PSR transfer fields */
+ case 'F':
+ di->di_printf("_");
+ if (insn & (1 << 16))
+ di->di_printf("c");
+ if (insn & (1 << 17))
+ di->di_printf("x");
+ if (insn & (1 << 18))
+ di->di_printf("s");
+ if (insn & (1 << 19))
+ di->di_printf("f");
+ break;
+ /* B - byte transfer flag */
+ case 'B':
+ if (insn & 0x00400000)
+ di->di_printf("b");
+ break;
+ /* L - co-processor transfer size */
+ case 'L':
+ if (insn & (1 << 22))
+ di->di_printf("l");
+ break;
+ /* S - set status flag */
+ case 'S':
+ if (insn & 0x00100000)
+ di->di_printf("s");
+ break;
+ /* P - fp precision */
+ case 'P':
+ di->di_printf("%s", insn_fpaprec(insn));
+ break;
+ /* Q - fp precision (for ldf/stf) */
+ case 'Q':
+ break;
+ /* R - fp rounding */
+ case 'R':
+ di->di_printf("%s", insn_fparnd(insn));
+ break;
+ /* W - writeback flag */
+ case 'W':
+ if (insn & (1 << 21))
+ di->di_printf("!");
+ break;
+ /* # - co-processor number */
+ case '#':
+ di->di_printf("p%d", (insn >> 8) & 0x0f);
+ break;
+ /* v - co-processor data transfer registers+addressing mode */
+ case 'v':
+ disasm_insn_ldcstc(di, insn, loc);
+ break;
+ /* x - instruction in hex */
+ case 'x':
+ di->di_printf("0x%08x", insn);
+ break;
+ /* y - co-processor data processing registers */
+ case 'y':
+ di->di_printf("%d, ", (insn >> 20) & 0x0f);
+
+ di->di_printf("c%d, c%d, c%d", (insn >> 12) & 0x0f,
+ (insn >> 16) & 0x0f, insn & 0x0f);
+
+ di->di_printf(", %d", (insn >> 5) & 0x07);
+ break;
+ /* z - co-processor register transfer registers */
+ case 'z':
+ di->di_printf("%d, ", (insn >> 21) & 0x07);
+ di->di_printf("r%d, c%d, c%d, %d",
+ (insn >> 12) & 0x0f, (insn >> 16) & 0x0f,
+ insn & 0x0f, (insn >> 5) & 0x07);
+
+/* if (((insn >> 5) & 0x07) != 0)
+ di->di_printf(", %d", (insn >> 5) & 0x07);*/
+ break;
+ default:
+ di->di_printf("[%c - unknown]", *f_ptr);
+ break;
+ }
+ if (*(f_ptr+1) >= 'A' && *(f_ptr+1) <= 'Z')
+ ++f_ptr;
+ else if (*(++f_ptr)) {
+ ++fmt;
+ if (fmt == 1)
+ di->di_printf("\t");
+ else
+ di->di_printf(", ");
+ }
+ };
+
+ di->di_printf("\n");
+
+ return(loc + INSN_SIZE);
+}
+
+
+static void
+disasm_register_shift(const disasm_interface_t *di, u_int insn)
+{
+ di->di_printf("r%d", (insn & 0x0f));
+ if ((insn & 0x00000ff0) == 0)
+ ;
+ else if ((insn & 0x00000ff0) == 0x00000060)
+ di->di_printf(", rrx");
+ else {
+ if (insn & 0x10)
+ di->di_printf(", %s r%d", op2_shift(insn),
+ (insn >> 8) & 0x0f);
+ else
+ di->di_printf(", %s #%d", op2_shift(insn),
+ (insn >> 7) & 0x1f);
+ }
+}
+
+
+static void
+disasm_print_reglist(const disasm_interface_t *di, u_int insn)
+{
+ int loop;
+ int start;
+ int comma;
+
+ di->di_printf("{");
+ start = -1;
+ comma = 0;
+
+ for (loop = 0; loop < 17; ++loop) {
+ if (start != -1) {
+ if (loop == 16 || !(insn & (1 << loop))) {
+ if (comma)
+ di->di_printf(", ");
+ else
+ comma = 1;
+ if (start == loop - 1)
+ di->di_printf("r%d", start);
+ else
+ di->di_printf("r%d-r%d", start, loop - 1);
+ start = -1;
+ }
+ } else {
+ if (insn & (1 << loop))
+ start = loop;
+ }
+ }
+ di->di_printf("}");
+
+ if (insn & (1 << 22))
+ di->di_printf("^");
+}
+
+static void
+disasm_insn_ldrstr(const disasm_interface_t *di, u_int insn, u_int loc)
+{
+ int offset;
+
+ offset = insn & 0xfff;
+ if ((insn & 0x032f0000) == 0x010f0000) {
+ /* rA = pc, immediate index */
+ if (insn & 0x00800000)
+ loc += offset;
+ else
+ loc -= offset;
+ di->di_printaddr(loc + 8);
+ } else {
+ di->di_printf("[r%d", (insn >> 16) & 0x0f);
+ if ((insn & 0x03000fff) != 0x01000000) {
+ di->di_printf("%s, ", (insn & (1 << 24)) ? "" : "]");
+ if (!(insn & 0x00800000))
+ di->di_printf("-");
+ if (insn & (1 << 25))
+ disasm_register_shift(di, insn);
+ else
+ di->di_printf("#0x%03x", offset);
+ }
+ if (insn & (1 << 24))
+ di->di_printf("]");
+ }
+}
+
+static void
+disasm_insn_ldrhstrh(const disasm_interface_t *di, u_int insn, u_int loc)
+{
+ int offset;
+
+ offset = ((insn & 0xf00) >> 4) | (insn & 0xf);
+ if ((insn & 0x004f0000) == 0x004f0000) {
+ /* rA = pc, immediate index */
+ if (insn & 0x00800000)
+ loc += offset;
+ else
+ loc -= offset;
+ di->di_printaddr(loc + 8);
+ } else {
+ di->di_printf("[r%d", (insn >> 16) & 0x0f);
+ if ((insn & 0x01400f0f) != 0x01400000) {
+ di->di_printf("%s, ", (insn & (1 << 24)) ? "" : "]");
+ if (!(insn & 0x00800000))
+ di->di_printf("-");
+ if (insn & (1 << 22))
+ di->di_printf("#0x%02x", offset);
+ else
+ di->di_printf("r%d", (insn & 0x0f));
+ }
+ if (insn & (1 << 24))
+ di->di_printf("]");
+ }
+}
+
+static void
+disasm_insn_ldcstc(const disasm_interface_t *di, u_int insn, u_int loc)
+{
+ if (((insn >> 8) & 0xf) == 1)
+ di->di_printf("f%d, ", (insn >> 12) & 0x07);
+ else
+ di->di_printf("c%d, ", (insn >> 12) & 0x0f);
+
+ di->di_printf("[r%d", (insn >> 16) & 0x0f);
+
+ di->di_printf("%s, ", (insn & (1 << 24)) ? "" : "]");
+
+ if (!(insn & (1 << 23)))
+ di->di_printf("-");
+
+ di->di_printf("#0x%03x", (insn & 0xff) << 2);
+
+ if (insn & (1 << 24))
+ di->di_printf("]");
+
+ if (insn & (1 << 21))
+ di->di_printf("!");
+}
+
+static u_int
+disassemble_readword(u_int address)
+{
+ return(*((u_int *)address));
+}
+
+static void
+disassemble_printaddr(u_int address)
+{
+ printf("0x%08x", address);
+}
+
+static const disasm_interface_t disassemble_di = {
+ disassemble_readword, disassemble_printaddr, printf
+};
+
+void
+disassemble(u_int address)
+{
+
+ (void)disasm(&disassemble_di, address, 0);
+}
+
+/* End of disassem.c */
diff --git a/libpixelflinger/codeflinger/disassem.h b/libpixelflinger/codeflinger/disassem.h
new file mode 100644
index 0000000..02747cd
--- /dev/null
+++ b/libpixelflinger/codeflinger/disassem.h
@@ -0,0 +1,65 @@
+/* $NetBSD: disassem.h,v 1.4 2001/03/04 04:15:58 matt Exp $ */
+
+/*-
+ * Copyright (c) 1997 Mark Brinicombe.
+ * Copyright (c) 1997 Causality Limited.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Mark Brinicombe.
+ * 4. The name of the company nor the name of the author may be used to
+ * endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Define the interface structure required by the disassembler.
+ *
+ * $FreeBSD: /repoman/r/ncvs/src/sys/arm/include/disassem.h,v 1.2 2005/01/05 21:58:48 imp Exp $
+ */
+
+#ifndef ANDROID_MACHINE_DISASSEM_H
+#define ANDROID_MACHINE_DISASSEM_H
+
+#include <sys/types.h>
+
+#if __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ u_int (*di_readword)(u_int);
+ void (*di_printaddr)(u_int);
+ void (*di_printf)(const char *, ...);
+} disasm_interface_t;
+
+/* Prototypes for callable functions */
+
+u_int disasm(const disasm_interface_t *, u_int, int);
+void disassemble(u_int);
+
+#if __cplusplus
+}
+#endif
+
+#endif /* !ANDROID_MACHINE_DISASSEM_H */
diff --git a/libpixelflinger/codeflinger/load_store.cpp b/libpixelflinger/codeflinger/load_store.cpp
new file mode 100644
index 0000000..93c5825
--- /dev/null
+++ b/libpixelflinger/codeflinger/load_store.cpp
@@ -0,0 +1,378 @@
+/* libs/pixelflinger/codeflinger/load_store.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <cutils/log.h>
+
+#include "codeflinger/GGLAssembler.h"
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
+{
+ const int bits = addr.size;
+ const int inc = (flags & WRITE_BACK)?1:0;
+ switch (bits) {
+ case 32:
+ if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
+ else STR(AL, s.reg, addr.reg);
+ break;
+ case 24:
+ // 24 bits formats are a little special and used only for RGB
+ // 0x00BBGGRR is unpacked as R,G,B
+ STRB(AL, s.reg, addr.reg, immed12_pre(0));
+ MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
+ STRB(AL, s.reg, addr.reg, immed12_pre(1));
+ MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
+ STRB(AL, s.reg, addr.reg, immed12_pre(2));
+ if (!(s.flags & CORRUPTIBLE)) {
+ MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
+ }
+ if (inc)
+ ADD(AL, 0, addr.reg, addr.reg, imm(3));
+ break;
+ case 16:
+ if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
+ else STRH(AL, s.reg, addr.reg);
+ break;
+ case 8:
+ if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
+ else STRB(AL, s.reg, addr.reg);
+ break;
+ }
+}
+
+void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
+{
+ Scratch scratches(registerFile());
+ int s0;
+
+ const int bits = addr.size;
+ const int inc = (flags & WRITE_BACK)?1:0;
+ switch (bits) {
+ case 32:
+ if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
+ else LDR(AL, s.reg, addr.reg);
+ break;
+ case 24:
+ // 24 bits formats are a little special and used only for RGB
+ // R,G,B is packed as 0x00BBGGRR
+ s0 = scratches.obtain();
+ if (s.reg != addr.reg) {
+ LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
+ LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
+ ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
+ LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
+ ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
+ } else {
+ int s1 = scratches.obtain();
+ LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
+ LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
+ ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
+ LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
+ ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
+ }
+ if (inc)
+ ADD(AL, 0, addr.reg, addr.reg, imm(3));
+ break;
+ case 16:
+ if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
+ else LDRH(AL, s.reg, addr.reg);
+ break;
+ case 8:
+ if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
+ else LDRB(AL, s.reg, addr.reg);
+ break;
+ }
+}
+
+void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
+{
+ const int maskLen = h-l;
+
+ assert(maskLen<=8);
+ assert(h);
+
+ if (h != bits) {
+ const int mask = ((1<<maskLen)-1) << l;
+ if (isValidImmediate(mask)) {
+ AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
+ } else if (isValidImmediate(~mask)) {
+ BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
+ } else {
+ MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
+ l += 32-h;
+ h = 32;
+ }
+ s = d.reg;
+ }
+
+ if (l) {
+ MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
+ s = d.reg;
+ }
+
+ if (s != d.reg) {
+ MOV(AL, 0, d.reg, s);
+ }
+
+ d.s = maskLen;
+}
+
+void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
+{
+ extract(d, s.reg,
+ s.format.c[component].h,
+ s.format.c[component].l,
+ s.size());
+}
+
+void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
+{
+ integer_t r(d.reg, 32, d.flags);
+ extract(r, s.reg,
+ s.format.c[component].h,
+ s.format.c[component].l,
+ s.size());
+ d = component_t(r);
+}
+
+
+void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
+{
+ if (s.l || (s.flags & CLEAR_HI)) {
+ extract(d, s.reg, s.h, s.l, 32);
+ expand(d, d, dbits);
+ } else {
+ expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
+ }
+}
+
+void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
+{
+ integer_t r(d.reg, 32, d.flags);
+ expand(r, s, dbits);
+ d = component_t(r);
+}
+
+void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
+{
+ assert(src.size());
+
+ int sbits = src.size();
+ int s = src.reg;
+ int d = dst.reg;
+
+ // be sure to set 'dst' after we read 'src' as they may be identical
+ dst.s = dbits;
+ dst.flags = 0;
+
+ if (dbits<=sbits) {
+ if (s != d) {
+ MOV(AL, 0, d, s);
+ }
+ return;
+ }
+
+ if (sbits == 1) {
+ RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
+ // d = (s<<dbits) - s;
+ return;
+ }
+
+ if (dbits % sbits) {
+ MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
+ // d = s << (dbits-sbits);
+ dbits -= sbits;
+ do {
+ ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
+ // d |= d >> sbits;
+ dbits -= sbits;
+ sbits *= 2;
+ } while(dbits>0);
+ return;
+ }
+
+ dbits -= sbits;
+ do {
+ ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
+ // d |= d<<sbits;
+ s = d;
+ dbits -= sbits;
+ if (sbits*2 < dbits) {
+ sbits *= 2;
+ }
+ } while(dbits>0);
+}
+
+void GGLAssembler::downshift(
+ pixel_t& d, int component, component_t s, const reg_t& dither)
+{
+ const needs_t& needs = mBuilderContext.needs;
+ Scratch scratches(registerFile());
+
+ int sh = s.h;
+ int sl = s.l;
+ int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
+ int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
+ int sbits = sh - sl;
+
+ int dh = d.format.c[component].h;
+ int dl = d.format.c[component].l;
+ int dbits = dh - dl;
+ int dithering = 0;
+
+ LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
+
+ if (sbits>dbits) {
+ // see if we need to dither
+ dithering = mDithering;
+ }
+
+ int ireg = d.reg;
+ if (!(d.flags & FIRST)) {
+ if (s.flags & CORRUPTIBLE) {
+ ireg = s.reg;
+ } else {
+ ireg = scratches.obtain();
+ }
+ }
+ d.flags &= ~FIRST;
+
+ if (maskHiBits) {
+ // we need to mask the high bits (and possibly the lowbits too)
+ // and we might be able to use immediate mask.
+ if (!dithering) {
+ // we don't do this if we only have maskLoBits because we can
+ // do it more efficiently below (in the case where dl=0)
+ const int offset = sh - dbits;
+ if (dbits<=8 && offset >= 0) {
+ const uint32_t mask = ((1<<dbits)-1) << offset;
+ if (isValidImmediate(mask) || isValidImmediate(~mask)) {
+ build_and_immediate(ireg, s.reg, mask, 32);
+ sl = offset;
+ s.reg = ireg;
+ sbits = dbits;
+ maskLoBits = maskHiBits = 0;
+ }
+ }
+ } else {
+ // in the dithering case though, we need to preserve the lower bits
+ const uint32_t mask = ((1<<sbits)-1) << sl;
+ if (isValidImmediate(mask) || isValidImmediate(~mask)) {
+ build_and_immediate(ireg, s.reg, mask, 32);
+ s.reg = ireg;
+ maskLoBits = maskHiBits = 0;
+ }
+ }
+ }
+
+ // XXX: we could special case (maskHiBits & !maskLoBits)
+ // like we do for maskLoBits below, but it happens very rarely
+ // that we have maskHiBits only and the conditions necessary to lead
+ // to better code (like doing d |= s << 24)
+
+ if (maskHiBits) {
+ MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
+ sl += 32-sh;
+ sh = 32;
+ s.reg = ireg;
+ maskHiBits = 0;
+ }
+
+ // Downsampling should be performed as follows:
+ // V * ((1<<dbits)-1) / ((1<<sbits)-1)
+ // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
+ // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
+ // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
+ // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
+ //
+ // By approximating (1>>dbits) and (1>>sbits) to 0:
+ //
+ // V>>(sbits-dbits) - V>>sbits
+ //
+ // A good approximation is V>>(sbits-dbits),
+ // but better one (needed for dithering) is:
+ //
+ // (V>>(sbits-dbits)<<sbits - V)>>sbits
+ // (V<<dbits - V)>>sbits
+ // (V - V>>dbits)>>(sbits-dbits)
+
+ // Dithering is done here
+ if (dithering) {
+ comment("dithering");
+ if (sl) {
+ MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
+ sh -= sl;
+ sl = 0;
+ s.reg = ireg;
+ }
+ // scaling (V-V>>dbits)
+ SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
+ const int shift = (GGL_DITHER_BITS - (sbits-dbits));
+ if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
+ else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
+ else ADD(AL, 0, ireg, ireg, dither.reg);
+ s.reg = ireg;
+ }
+
+ if ((maskLoBits|dithering) && (sh > dbits)) {
+ int shift = sh-dbits;
+ if (dl) {
+ MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
+ if (ireg == d.reg) {
+ MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
+ } else {
+ ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
+ }
+ } else {
+ if (ireg == d.reg) {
+ MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
+ } else {
+ ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
+ }
+ }
+ } else {
+ int shift = sh-dh;
+ if (shift>0) {
+ if (ireg == d.reg) {
+ MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
+ } else {
+ ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
+ }
+ } else if (shift<0) {
+ if (ireg == d.reg) {
+ MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
+ } else {
+ ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
+ }
+ } else {
+ if (ireg == d.reg) {
+ if (s.reg != d.reg) {
+ MOV(AL, 0, d.reg, s.reg);
+ }
+ } else {
+ ORR(AL, 0, d.reg, d.reg, s.reg);
+ }
+ }
+ }
+}
+
+}; // namespace android
diff --git a/libpixelflinger/codeflinger/texturing.cpp b/libpixelflinger/codeflinger/texturing.cpp
new file mode 100644
index 0000000..90e6584
--- /dev/null
+++ b/libpixelflinger/codeflinger/texturing.cpp
@@ -0,0 +1,1251 @@
+/* libs/pixelflinger/codeflinger/texturing.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include <cutils/log.h>
+
+#include "codeflinger/GGLAssembler.h"
+
+
+namespace android {
+
+// ---------------------------------------------------------------------------
+
+// iterators are initialized like this:
+// (intToFixedCenter(x) * dx)>>16 + x0
+// ((x<<16 + 0x8000) * dx)>>16 + x0
+// ((x<<16)*dx + (0x8000*dx))>>16 + x0
+// ( (x*dx) + dx>>1 ) + x0
+// (x*dx) + (dx>>1 + x0)
+
+void GGLAssembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x)
+{
+ context_t const* c = mBuilderContext.c;
+ const needs_t& needs = mBuilderContext.needs;
+
+ if (mSmooth) {
+ // NOTE: we could take this case in the mDithering + !mSmooth case,
+ // but this would use up to 4 more registers for the color components
+ // for only a little added quality.
+ // Currently, this causes the system to run out of registers in
+ // some case (see issue #719496)
+
+ comment("compute initial iterated color (smooth and/or dither case)");
+
+ parts.iterated_packed = 0;
+ parts.packed = 0;
+
+ // 0x1: color component
+ // 0x2: iterators
+ const int optReload = mOptLevel >> 1;
+ if (optReload >= 3) parts.reload = 0; // reload nothing
+ else if (optReload == 2) parts.reload = 2; // reload iterators
+ else if (optReload == 1) parts.reload = 1; // reload colors
+ else if (optReload <= 0) parts.reload = 3; // reload both
+
+ if (!mSmooth) {
+ // we're not smoothing (just dithering), we never have to
+ // reload the iterators
+ parts.reload &= ~2;
+ }
+
+ Scratch scratches(registerFile());
+ const int t0 = (parts.reload & 1) ? scratches.obtain() : 0;
+ const int t1 = (parts.reload & 2) ? scratches.obtain() : 0;
+ for (int i=0 ; i<4 ; i++) {
+ if (!mInfo[i].iterated)
+ continue;
+
+ // this component exists in the destination and is not replaced
+ // by a texture unit.
+ const int c = (parts.reload & 1) ? t0 : obtainReg();
+ if (i==0) CONTEXT_LOAD(c, iterators.ydady);
+ if (i==1) CONTEXT_LOAD(c, iterators.ydrdy);
+ if (i==2) CONTEXT_LOAD(c, iterators.ydgdy);
+ if (i==3) CONTEXT_LOAD(c, iterators.ydbdy);
+ parts.argb[i].reg = c;
+
+ if (mInfo[i].smooth) {
+ parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg();
+ const int dvdx = parts.argb_dx[i].reg;
+ CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx);
+ MLA(AL, 0, c, x.reg, dvdx, c);
+
+ // adjust the color iterator to make sure it won't overflow
+ if (!mAA) {
+ // this is not needed when we're using anti-aliasing
+ // because we will (have to) clamp the components
+ // anyway.
+ int end = scratches.obtain();
+ MOV(AL, 0, end, reg_imm(parts.count.reg, LSR, 16));
+ MLA(AL, 1, end, dvdx, end, c);
+ SUB(MI, 0, c, c, end);
+ BIC(AL, 0, c, c, reg_imm(c, ASR, 31));
+ scratches.recycle(end);
+ }
+ }
+
+ if (parts.reload & 1) {
+ CONTEXT_STORE(c, generated_vars.argb[i].c);
+ }
+ }
+ } else {
+ // We're not smoothed, so we can
+ // just use a packed version of the color and extract the
+ // components as needed (or not at all if we don't blend)
+
+ // figure out if we need the iterated color
+ int load = 0;
+ for (int i=0 ; i<4 ; i++) {
+ component_info_t& info = mInfo[i];
+ if ((info.inDest || info.needed) && !info.replaced)
+ load |= 1;
+ }
+
+ parts.iterated_packed = 1;
+ parts.packed = (!mTextureMachine.mask && !mBlending
+ && !mFog && !mDithering);
+ parts.reload = 0;
+ if (load || parts.packed) {
+ if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) {
+ comment("load initial iterated color (8888 packed)");
+ parts.iterated.setTo(obtainReg(),
+ &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888]));
+ CONTEXT_LOAD(parts.iterated.reg, packed8888);
+ } else {
+ comment("load initial iterated color (dest format packed)");
+
+ parts.iterated.setTo(obtainReg(), &mCbFormat);
+
+ // pre-mask the iterated color
+ const int bits = parts.iterated.size();
+ const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
+ uint32_t mask = 0;
+ if (mMasking) {
+ for (int i=0 ; i<4 ; i++) {
+ const int component_mask = 1<<i;
+ const int h = parts.iterated.format.c[i].h;
+ const int l = parts.iterated.format.c[i].l;
+ if (h && (!(mMasking & component_mask))) {
+ mask |= ((1<<(h-l))-1) << l;
+ }
+ }
+ }
+
+ if (mMasking && ((mask & size)==0)) {
+ // none of the components are present in the mask
+ } else {
+ CONTEXT_LOAD(parts.iterated.reg, packed);
+ if (mCbFormat.size == 1) {
+ AND(AL, 0, parts.iterated.reg,
+ parts.iterated.reg, imm(0xFF));
+ } else if (mCbFormat.size == 2) {
+ MOV(AL, 0, parts.iterated.reg,
+ reg_imm(parts.iterated.reg, LSR, 16));
+ }
+ }
+
+ // pre-mask the iterated color
+ if (mMasking) {
+ build_and_immediate(parts.iterated.reg, parts.iterated.reg,
+ mask, bits);
+ }
+ }
+ }
+ }
+}
+
+void GGLAssembler::build_iterated_color(
+ component_t& fragment,
+ const fragment_parts_t& parts,
+ int component,
+ Scratch& regs)
+{
+ fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE);
+
+ if (!mInfo[component].iterated)
+ return;
+
+ if (parts.iterated_packed) {
+ // iterated colors are packed, extract the one we need
+ extract(fragment, parts.iterated, component);
+ } else {
+ fragment.h = GGL_COLOR_BITS;
+ fragment.l = GGL_COLOR_BITS - 8;
+ fragment.flags |= CLEAR_LO;
+ // iterated colors are held in their own register,
+ // (smooth and/or dithering case)
+ if (parts.reload==3) {
+ // this implies mSmooth
+ Scratch scratches(registerFile());
+ int dx = scratches.obtain();
+ CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
+ CONTEXT_LOAD(dx, generated_vars.argb[component].dx);
+ ADD(AL, 0, dx, fragment.reg, dx);
+ CONTEXT_STORE(dx, generated_vars.argb[component].c);
+ } else if (parts.reload & 1) {
+ CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c);
+ } else {
+ // we don't reload, so simply rename the register and mark as
+ // non CORRUPTIBLE so that the texture env or blending code
+ // won't modify this (renamed) register
+ regs.recycle(fragment.reg);
+ fragment.reg = parts.argb[component].reg;
+ fragment.flags &= ~CORRUPTIBLE;
+ }
+ if (mInfo[component].smooth && mAA) {
+ // when using smooth shading AND anti-aliasing, we need to clamp
+ // the iterators because there is always an extra pixel on the
+ // edges, which most of the time will cause an overflow
+ // (since technically its outside of the domain).
+ BIC(AL, 0, fragment.reg, fragment.reg,
+ reg_imm(fragment.reg, ASR, 31));
+ component_sat(fragment);
+ }
+ }
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::decodeLogicOpNeeds(const needs_t& needs)
+{
+ // gather some informations about the components we need to process...
+ const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
+ switch(opcode) {
+ case GGL_COPY:
+ mLogicOp = 0;
+ break;
+ case GGL_CLEAR:
+ case GGL_SET:
+ mLogicOp = LOGIC_OP;
+ break;
+ case GGL_AND:
+ case GGL_AND_REVERSE:
+ case GGL_AND_INVERTED:
+ case GGL_XOR:
+ case GGL_OR:
+ case GGL_NOR:
+ case GGL_EQUIV:
+ case GGL_OR_REVERSE:
+ case GGL_OR_INVERTED:
+ case GGL_NAND:
+ mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST;
+ break;
+ case GGL_NOOP:
+ case GGL_INVERT:
+ mLogicOp = LOGIC_OP|LOGIC_OP_DST;
+ break;
+ case GGL_COPY_INVERTED:
+ mLogicOp = LOGIC_OP|LOGIC_OP_SRC;
+ break;
+ };
+}
+
+void GGLAssembler::decodeTMUNeeds(const needs_t& needs, context_t const* c)
+{
+ uint8_t replaced=0;
+ mTextureMachine.mask = 0;
+ mTextureMachine.activeUnits = 0;
+ for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) {
+ texture_unit_t& tmu = mTextureMachine.tmu[i];
+ if (replaced == 0xF) {
+ // all components are replaced, skip this TMU.
+ tmu.format_idx = 0;
+ tmu.mask = 0;
+ tmu.replaced = replaced;
+ continue;
+ }
+ tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]);
+ tmu.format = c->formats[tmu.format_idx];
+ tmu.bits = tmu.format.size*8;
+ tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]);
+ tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]);
+ tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i]));
+ tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]);
+ tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i])
+ && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now
+
+ // 5551 linear filtering is not supported
+ if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551)
+ tmu.linear = 0;
+
+ tmu.mask = 0;
+ tmu.replaced = replaced;
+
+ if (tmu.format_idx) {
+ mTextureMachine.activeUnits++;
+ if (tmu.format.c[0].h) tmu.mask |= 0x1;
+ if (tmu.format.c[1].h) tmu.mask |= 0x2;
+ if (tmu.format.c[2].h) tmu.mask |= 0x4;
+ if (tmu.format.c[3].h) tmu.mask |= 0x8;
+ if (tmu.env == GGL_REPLACE) {
+ replaced |= tmu.mask;
+ } else if (tmu.env == GGL_DECAL) {
+ if (!tmu.format.c[GGLFormat::ALPHA].h) {
+ // if we don't have alpha, decal does nothing
+ tmu.mask = 0;
+ } else {
+ // decal always ignores At
+ tmu.mask &= ~(1<<GGLFormat::ALPHA);
+ }
+ }
+ }
+ mTextureMachine.mask |= tmu.mask;
+ //printf("%d: mask=%08lx, replaced=%08lx\n",
+ // i, int(tmu.mask), int(tmu.replaced));
+ }
+ mTextureMachine.replaced = replaced;
+ mTextureMachine.directTexture = 0;
+ //printf("replaced=%08lx\n", mTextureMachine.replaced);
+}
+
+
+void GGLAssembler::init_textures(
+ tex_coord_t* coords,
+ const reg_t& x, const reg_t& y)
+{
+ context_t const* c = mBuilderContext.c;
+ const needs_t& needs = mBuilderContext.needs;
+ int Rctx = mBuilderContext.Rctx;
+ int Rx = x.reg;
+ int Ry = y.reg;
+
+ if (mTextureMachine.mask) {
+ comment("compute texture coordinates");
+ }
+
+ // init texture coordinates for each tmu
+ const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n);
+ const bool multiTexture = mTextureMachine.activeUnits > 1;
+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
+ if (tmu.format_idx == 0)
+ continue;
+ if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
+ (tmu.twrap == GGL_NEEDS_WRAP_11))
+ {
+ // 1:1 texture
+ pointer_t& txPtr = coords[i].ptr;
+ txPtr.setTo(obtainReg(), tmu.bits);
+ CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy);
+ ADD(AL, 0, Rx, Rx, reg_imm(txPtr.reg, ASR, 16)); // x += (s>>16)
+ CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy);
+ ADD(AL, 0, Ry, Ry, reg_imm(txPtr.reg, ASR, 16)); // y += (t>>16)
+ // merge base & offset
+ CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride);
+ SMLABB(AL, Rx, Ry, txPtr.reg, Rx); // x+y*stride
+ CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
+ base_offset(txPtr, txPtr, Rx);
+ } else {
+ Scratch scratches(registerFile());
+ reg_t& s = coords[i].s;
+ reg_t& t = coords[i].t;
+ // s = (x * dsdx)>>16 + ydsdy
+ // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0
+ // t = (x * dtdx)>>16 + ydtdy
+ // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0
+ s.setTo(obtainReg());
+ t.setTo(obtainReg());
+ const int need_w = GGL_READ_NEEDS(W, needs.n);
+ if (need_w) {
+ CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy);
+ CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy);
+ } else {
+ int ydsdy = scratches.obtain();
+ int ydtdy = scratches.obtain();
+ CONTEXT_LOAD(s.reg, generated_vars.texture[i].dsdx);
+ CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy);
+ CONTEXT_LOAD(t.reg, generated_vars.texture[i].dtdx);
+ CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy);
+ MLA(AL, 0, s.reg, Rx, s.reg, ydsdy);
+ MLA(AL, 0, t.reg, Rx, t.reg, ydtdy);
+ }
+
+ if ((mOptLevel&1)==0) {
+ CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
+ CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
+ recycleReg(s.reg);
+ recycleReg(t.reg);
+ }
+ }
+
+ // direct texture?
+ if (!multiTexture && !mBlending && !mDithering && !mFog &&
+ cb_format_idx == tmu.format_idx && !tmu.linear &&
+ mTextureMachine.replaced == tmu.mask)
+ {
+ mTextureMachine.directTexture = i + 1;
+ }
+ }
+}
+
+void GGLAssembler::build_textures( fragment_parts_t& parts,
+ Scratch& regs)
+{
+ context_t const* c = mBuilderContext.c;
+ const needs_t& needs = mBuilderContext.needs;
+ int Rctx = mBuilderContext.Rctx;
+
+ // We don't have a way to spill registers automatically
+ // spill depth and AA regs, when we know we may have to.
+ // build the spill list...
+ uint32_t spill_list = 0;
+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
+ if (tmu.format_idx == 0)
+ continue;
+ if (tmu.linear) {
+ // we may run out of register if we have linear filtering
+ // at 1 or 4 bytes / pixel on any texture unit.
+ if (tmu.format.size == 1) {
+ // if depth and AA enabled, we'll run out of 1 register
+ if (parts.z.reg > 0 && parts.covPtr.reg > 0)
+ spill_list |= 1<<parts.covPtr.reg;
+ }
+ if (tmu.format.size == 4) {
+ // if depth or AA enabled, we'll run out of 1 or 2 registers
+ if (parts.z.reg > 0)
+ spill_list |= 1<<parts.z.reg;
+ if (parts.covPtr.reg > 0)
+ spill_list |= 1<<parts.covPtr.reg;
+ }
+ }
+ }
+
+ Spill spill(registerFile(), *this, spill_list);
+
+ const bool multiTexture = mTextureMachine.activeUnits > 1;
+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
+ if (tmu.format_idx == 0)
+ continue;
+
+ pointer_t& txPtr = parts.coords[i].ptr;
+ pixel_t& texel = parts.texel[i];
+
+ // repeat...
+ if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
+ (tmu.twrap == GGL_NEEDS_WRAP_11))
+ { // 1:1 textures
+ comment("fetch texel");
+ texel.setTo(regs.obtain(), &tmu.format);
+ load(txPtr, texel, WRITE_BACK);
+ } else {
+ Scratch scratches(registerFile());
+ reg_t& s = parts.coords[i].s;
+ reg_t& t = parts.coords[i].t;
+ if ((mOptLevel&1)==0) {
+ comment("reload s/t (multitexture or linear filtering)");
+ s.reg = scratches.obtain();
+ t.reg = scratches.obtain();
+ CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]);
+ CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]);
+ }
+
+ comment("compute repeat/clamp");
+ int u = scratches.obtain();
+ int v = scratches.obtain();
+ int width = scratches.obtain();
+ int height = scratches.obtain();
+ int U = 0;
+ int V = 0;
+
+ CONTEXT_LOAD(width, generated_vars.texture[i].width);
+ CONTEXT_LOAD(height, generated_vars.texture[i].height);
+
+ int FRAC_BITS = 0;
+ if (tmu.linear) {
+ // linear interpolation
+ if (tmu.format.size == 1) {
+ // for 8-bits textures, we can afford
+ // 7 bits of fractional precision at no
+ // additional cost (we can't do 8 bits
+ // because filter8 uses signed 16 bits muls)
+ FRAC_BITS = 7;
+ } else if (tmu.format.size == 2) {
+ // filter16() is internally limited to 4 bits, so:
+ // FRAC_BITS=2 generates less instructions,
+ // FRAC_BITS=3,4,5 creates unpleasant artifacts,
+ // FRAC_BITS=6+ looks good
+ FRAC_BITS = 6;
+ } else if (tmu.format.size == 4) {
+ // filter32() is internally limited to 8 bits, so:
+ // FRAC_BITS=4 looks good
+ // FRAC_BITS=5+ looks better, but generates 3 extra ipp
+ FRAC_BITS = 6;
+ } else {
+ // for all other cases we use 4 bits.
+ FRAC_BITS = 4;
+ }
+ }
+ wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS);
+ wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS);
+
+ if (tmu.linear) {
+ comment("compute linear filtering offsets");
+ // pixel size scale
+ const int shift = 31 - gglClz(tmu.format.size);
+ U = scratches.obtain();
+ V = scratches.obtain();
+
+ // sample the texel center
+ SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1)));
+ SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1)));
+
+ // get the fractionnal part of U,V
+ AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1));
+ AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1));
+
+ // compute width-1 and height-1
+ SUB(AL, 0, width, width, imm(1));
+ SUB(AL, 0, height, height, imm(1));
+
+ // get the integer part of U,V and clamp/wrap
+ // and compute offset to the next texel
+ if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) {
+ // u has already been REPEATed
+ MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS));
+ MOV(MI, 0, u, width);
+ CMP(AL, u, width);
+ MOV(LT, 0, width, imm(1 << shift));
+ if (shift)
+ MOV(GE, 0, width, reg_imm(width, LSL, shift));
+ RSB(GE, 0, width, width, imm(0));
+ } else {
+ // u has not been CLAMPed yet
+ // algorithm:
+ // if ((u>>4) >= width)
+ // u = width<<4
+ // width = 0
+ // else
+ // width = 1<<shift
+ // u = u>>4; // get integer part
+ // if (u<0)
+ // u = 0
+ // width = 0
+ // generated_vars.rt = width
+
+ CMP(AL, width, reg_imm(u, ASR, FRAC_BITS));
+ MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS));
+ MOV(LE, 0, width, imm(0));
+ MOV(GT, 0, width, imm(1 << shift));
+ MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS));
+ MOV(MI, 0, u, imm(0));
+ MOV(MI, 0, width, imm(0));
+ }
+ CONTEXT_STORE(width, generated_vars.rt);
+
+ const int stride = width;
+ CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
+ if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) {
+ // v has already been REPEATed
+ MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS));
+ MOV(MI, 0, v, height);
+ CMP(AL, v, height);
+ MOV(LT, 0, height, imm(1 << shift));
+ if (shift)
+ MOV(GE, 0, height, reg_imm(height, LSL, shift));
+ RSB(GE, 0, height, height, imm(0));
+ MUL(AL, 0, height, stride, height);
+ } else {
+ // u has not been CLAMPed yet
+ CMP(AL, height, reg_imm(v, ASR, FRAC_BITS));
+ MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS));
+ MOV(LE, 0, height, imm(0));
+ if (shift) {
+ MOV(GT, 0, height, reg_imm(stride, LSL, shift));
+ } else {
+ MOV(GT, 0, height, stride);
+ }
+ MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS));
+ MOV(MI, 0, v, imm(0));
+ MOV(MI, 0, height, imm(0));
+ }
+ CONTEXT_STORE(height, generated_vars.lb);
+ }
+
+ scratches.recycle(width);
+ scratches.recycle(height);
+
+ // iterate texture coordinates...
+ comment("iterate s,t");
+ int dsdx = scratches.obtain();
+ int dtdx = scratches.obtain();
+ CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
+ CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
+ ADD(AL, 0, s.reg, s.reg, dsdx);
+ ADD(AL, 0, t.reg, t.reg, dtdx);
+ if ((mOptLevel&1)==0) {
+ CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);
+ CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);
+ scratches.recycle(s.reg);
+ scratches.recycle(t.reg);
+ }
+ scratches.recycle(dsdx);
+ scratches.recycle(dtdx);
+
+ // merge base & offset...
+ comment("merge base & offset");
+ texel.setTo(regs.obtain(), &tmu.format);
+ txPtr.setTo(texel.reg, tmu.bits);
+ int stride = scratches.obtain();
+ CONTEXT_LOAD(stride, generated_vars.texture[i].stride);
+ CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
+ SMLABB(AL, u, v, stride, u); // u+v*stride
+ base_offset(txPtr, txPtr, u);
+
+ // load texel
+ if (!tmu.linear) {
+ comment("fetch texel");
+ load(txPtr, texel, 0);
+ } else {
+ // recycle registers we don't need anymore
+ scratches.recycle(u);
+ scratches.recycle(v);
+ scratches.recycle(stride);
+
+ comment("fetch texel, bilinear");
+ switch (tmu.format.size) {
+ case 1: filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
+ case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
+ case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
+ case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;
+ }
+ }
+ }
+ }
+}
+
+void GGLAssembler::build_iterate_texture_coordinates(
+ const fragment_parts_t& parts)
+{
+ const bool multiTexture = mTextureMachine.activeUnits > 1;
+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {
+ const texture_unit_t& tmu = mTextureMachine.tmu[i];
+ if (tmu.format_idx == 0)
+ continue;
+
+ if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&
+ (tmu.twrap == GGL_NEEDS_WRAP_11))
+ { // 1:1 textures
+ const pointer_t& txPtr = parts.coords[i].ptr;
+ ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3));
+ } else {
+ Scratch scratches(registerFile());
+ int s = parts.coords[i].s.reg;
+ int t = parts.coords[i].t.reg;
+ if ((mOptLevel&1)==0) {
+ s = scratches.obtain();
+ t = scratches.obtain();
+ CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]);
+ CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]);
+ }
+ int dsdx = scratches.obtain();
+ int dtdx = scratches.obtain();
+ CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
+ CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
+ ADD(AL, 0, s, s, dsdx);
+ ADD(AL, 0, t, t, dtdx);
+ if ((mOptLevel&1)==0) {
+ CONTEXT_STORE(s, generated_vars.texture[i].spill[0]);
+ CONTEXT_STORE(t, generated_vars.texture[i].spill[1]);
+ }
+ }
+ }
+}
+
+void GGLAssembler::filter8(
+ const fragment_parts_t& parts,
+ pixel_t& texel, const texture_unit_t& tmu,
+ int U, int V, pointer_t& txPtr,
+ int FRAC_BITS)
+{
+ if (tmu.format.components != GGL_ALPHA &&
+ tmu.format.components != GGL_LUMINANCE)
+ {
+ // this is a packed format, and we don't support
+ // linear filtering (it's probably RGB 332)
+ // Should not happen with OpenGL|ES
+ LDRB(AL, texel.reg, txPtr.reg);
+ return;
+ }
+
+ // ------------------------
+ // about ~22 cycles / pixel
+ Scratch scratches(registerFile());
+
+ int pixel= scratches.obtain();
+ int d = scratches.obtain();
+ int u = scratches.obtain();
+ int k = scratches.obtain();
+ int rt = scratches.obtain();
+ int lb = scratches.obtain();
+
+ // RB -> U * V
+
+ CONTEXT_LOAD(rt, generated_vars.rt);
+ CONTEXT_LOAD(lb, generated_vars.lb);
+
+ int offset = pixel;
+ ADD(AL, 0, offset, lb, rt);
+ LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset));
+ SMULBB(AL, u, U, V);
+ SMULBB(AL, d, pixel, u);
+ RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2)));
+
+ // LB -> (1-U) * V
+ RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
+ LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb));
+ SMULBB(AL, u, U, V);
+ SMLABB(AL, d, pixel, u, d);
+ SUB(AL, 0, k, k, u);
+
+ // LT -> (1-U)*(1-V)
+ RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
+ LDRB(AL, pixel, txPtr.reg);
+ SMULBB(AL, u, U, V);
+ SMLABB(AL, d, pixel, u, d);
+
+ // RT -> U*(1-V)
+ LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt));
+ SUB(AL, 0, u, k, u);
+ SMLABB(AL, texel.reg, pixel, u, d);
+
+ for (int i=0 ; i<4 ; i++) {
+ if (!texel.format.c[i].h) continue;
+ texel.format.c[i].h = FRAC_BITS*2+8;
+ texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough
+ }
+ texel.format.size = 4;
+ texel.format.bitsPerPixel = 32;
+ texel.flags |= CLEAR_LO;
+}
+
+void GGLAssembler::filter16(
+ const fragment_parts_t& parts,
+ pixel_t& texel, const texture_unit_t& tmu,
+ int U, int V, pointer_t& txPtr,
+ int FRAC_BITS)
+{
+ // compute the mask
+ // XXX: it would be nice if the mask below could be computed
+ // automatically.
+ uint32_t mask = 0;
+ int shift = 0;
+ int prec = 0;
+ switch (tmu.format_idx) {
+ case GGL_PIXEL_FORMAT_RGB_565:
+ // source: 00000ggg.ggg00000 | rrrrr000.000bbbbb
+ // result: gggggggg.gggrrrrr | rrrrr0bb.bbbbbbbb
+ mask = 0x07E0F81F;
+ shift = 16;
+ prec = 5;
+ break;
+ case GGL_PIXEL_FORMAT_RGBA_4444:
+ // 0000,1111,0000,1111 | 0000,1111,0000,1111
+ mask = 0x0F0F0F0F;
+ shift = 12;
+ prec = 4;
+ break;
+ case GGL_PIXEL_FORMAT_LA_88:
+ // 0000,0000,1111,1111 | 0000,0000,1111,1111
+ // AALL -> 00AA | 00LL
+ mask = 0x00FF00FF;
+ shift = 8;
+ prec = 8;
+ break;
+ default:
+ // unsupported format, do something sensical...
+ LOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx);
+ LDRH(AL, texel.reg, txPtr.reg);
+ return;
+ }
+
+ const int adjust = FRAC_BITS*2 - prec;
+ const int round = 0;
+
+ // update the texel format
+ texel.format.size = 4;
+ texel.format.bitsPerPixel = 32;
+ texel.flags |= CLEAR_HI|CLEAR_LO;
+ for (int i=0 ; i<4 ; i++) {
+ if (!texel.format.c[i].h) continue;
+ const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift;
+ texel.format.c[i].h = tmu.format.c[i].h + offset + prec;
+ texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec);
+ }
+
+ // ------------------------
+ // about ~40 cycles / pixel
+ Scratch scratches(registerFile());
+
+ int pixel= scratches.obtain();
+ int d = scratches.obtain();
+ int u = scratches.obtain();
+ int k = scratches.obtain();
+
+ // RB -> U * V
+ int offset = pixel;
+ CONTEXT_LOAD(offset, generated_vars.rt);
+ CONTEXT_LOAD(u, generated_vars.lb);
+ ADD(AL, 0, offset, offset, u);
+
+ LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
+ SMULBB(AL, u, U, V);
+ ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
+ build_and_immediate(pixel, pixel, mask, 32);
+ if (adjust) {
+ if (round)
+ ADD(AL, 0, u, u, imm(1<<(adjust-1)));
+ MOV(AL, 0, u, reg_imm(u, LSR, adjust));
+ }
+ MUL(AL, 0, d, pixel, u);
+ RSB(AL, 0, k, u, imm(1<<prec));
+
+ // LB -> (1-U) * V
+ CONTEXT_LOAD(offset, generated_vars.lb);
+ RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
+ LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
+ SMULBB(AL, u, U, V);
+ ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
+ build_and_immediate(pixel, pixel, mask, 32);
+ if (adjust) {
+ if (round)
+ ADD(AL, 0, u, u, imm(1<<(adjust-1)));
+ MOV(AL, 0, u, reg_imm(u, LSR, adjust));
+ }
+ MLA(AL, 0, d, pixel, u, d);
+ SUB(AL, 0, k, k, u);
+
+ // LT -> (1-U)*(1-V)
+ RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
+ LDRH(AL, pixel, txPtr.reg);
+ SMULBB(AL, u, U, V);
+ ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
+ build_and_immediate(pixel, pixel, mask, 32);
+ if (adjust) {
+ if (round)
+ ADD(AL, 0, u, u, imm(1<<(adjust-1)));
+ MOV(AL, 0, u, reg_imm(u, LSR, adjust));
+ }
+ MLA(AL, 0, d, pixel, u, d);
+
+ // RT -> U*(1-V)
+ CONTEXT_LOAD(offset, generated_vars.rt);
+ LDRH(AL, pixel, txPtr.reg, reg_pre(offset));
+ SUB(AL, 0, u, k, u);
+ ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift));
+ build_and_immediate(pixel, pixel, mask, 32);
+ MLA(AL, 0, texel.reg, pixel, u, d);
+}
+
+void GGLAssembler::filter24(
+ const fragment_parts_t& parts,
+ pixel_t& texel, const texture_unit_t& tmu,
+ int U, int V, pointer_t& txPtr,
+ int FRAC_BITS)
+{
+ // not supported yet (currently disabled)
+ load(txPtr, texel, 0);
+}
+
+void GGLAssembler::filter32(
+ const fragment_parts_t& parts,
+ pixel_t& texel, const texture_unit_t& tmu,
+ int U, int V, pointer_t& txPtr,
+ int FRAC_BITS)
+{
+ const int adjust = FRAC_BITS*2 - 8;
+ const int round = 0;
+
+ // ------------------------
+ // about ~38 cycles / pixel
+ Scratch scratches(registerFile());
+
+ int pixel= scratches.obtain();
+ int dh = scratches.obtain();
+ int u = scratches.obtain();
+ int k = scratches.obtain();
+
+ int temp = scratches.obtain();
+ int dl = scratches.obtain();
+ int mask = scratches.obtain();
+
+ MOV(AL, 0, mask, imm(0xFF));
+ ORR(AL, 0, mask, mask, imm(0xFF0000));
+
+ // RB -> U * V
+ int offset = pixel;
+ CONTEXT_LOAD(offset, generated_vars.rt);
+ CONTEXT_LOAD(u, generated_vars.lb);
+ ADD(AL, 0, offset, offset, u);
+
+ LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
+ SMULBB(AL, u, U, V);
+ AND(AL, 0, temp, mask, pixel);
+ if (adjust) {
+ if (round)
+ ADD(AL, 0, u, u, imm(1<<(adjust-1)));
+ MOV(AL, 0, u, reg_imm(u, LSR, adjust));
+ }
+ MUL(AL, 0, dh, temp, u);
+ AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
+ MUL(AL, 0, dl, temp, u);
+ RSB(AL, 0, k, u, imm(0x100));
+
+ // LB -> (1-U) * V
+ CONTEXT_LOAD(offset, generated_vars.lb);
+ RSB(AL, 0, U, U, imm(1<<FRAC_BITS));
+ LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
+ SMULBB(AL, u, U, V);
+ AND(AL, 0, temp, mask, pixel);
+ if (adjust) {
+ if (round)
+ ADD(AL, 0, u, u, imm(1<<(adjust-1)));
+ MOV(AL, 0, u, reg_imm(u, LSR, adjust));
+ }
+ MLA(AL, 0, dh, temp, u, dh);
+ AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
+ MLA(AL, 0, dl, temp, u, dl);
+ SUB(AL, 0, k, k, u);
+
+ // LT -> (1-U)*(1-V)
+ RSB(AL, 0, V, V, imm(1<<FRAC_BITS));
+ LDR(AL, pixel, txPtr.reg);
+ SMULBB(AL, u, U, V);
+ AND(AL, 0, temp, mask, pixel);
+ if (adjust) {
+ if (round)
+ ADD(AL, 0, u, u, imm(1<<(adjust-1)));
+ MOV(AL, 0, u, reg_imm(u, LSR, adjust));
+ }
+ MLA(AL, 0, dh, temp, u, dh);
+ AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
+ MLA(AL, 0, dl, temp, u, dl);
+
+ // RT -> U*(1-V)
+ CONTEXT_LOAD(offset, generated_vars.rt);
+ LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset));
+ SUB(AL, 0, u, k, u);
+ AND(AL, 0, temp, mask, pixel);
+ MLA(AL, 0, dh, temp, u, dh);
+ AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8));
+ MLA(AL, 0, dl, temp, u, dl);
+
+ AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8));
+ AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8));
+ ORR(AL, 0, texel.reg, dh, dl);
+}
+
+void GGLAssembler::build_texture_environment(
+ component_t& fragment,
+ const fragment_parts_t& parts,
+ int component,
+ Scratch& regs)
+{
+ const uint32_t component_mask = 1<<component;
+ const bool multiTexture = mTextureMachine.activeUnits > 1;
+ for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
+ texture_unit_t& tmu = mTextureMachine.tmu[i];
+
+ if (tmu.mask & component_mask) {
+ // replace or modulate with this texture
+ if ((tmu.replaced & component_mask) == 0) {
+ // not replaced by a later tmu...
+
+ Scratch scratches(registerFile());
+ pixel_t texel(parts.texel[i]);
+ if (multiTexture &&
+ tmu.swrap == GGL_NEEDS_WRAP_11 &&
+ tmu.twrap == GGL_NEEDS_WRAP_11)
+ {
+ texel.reg = scratches.obtain();
+ texel.flags |= CORRUPTIBLE;
+ comment("fetch texel (multitexture 1:1)");
+ load(parts.coords[i].ptr, texel, WRITE_BACK);
+ }
+
+ component_t incoming(fragment);
+ modify(fragment, regs);
+
+ switch (tmu.env) {
+ case GGL_REPLACE:
+ extract(fragment, texel, component);
+ break;
+ case GGL_MODULATE:
+ modulate(fragment, incoming, texel, component);
+ break;
+ case GGL_DECAL:
+ decal(fragment, incoming, texel, component);
+ break;
+ case GGL_BLEND:
+ blend(fragment, incoming, texel, component, i);
+ break;
+ case GGL_ADD:
+ add(fragment, incoming, texel, component);
+ break;
+ }
+ }
+ }
+ }
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::wrapping(
+ int d,
+ int coord, int size,
+ int tx_wrap, int tx_linear)
+{
+ // notes:
+ // if tx_linear is set, we need 4 extra bits of precision on the result
+ // SMULL/UMULL is 3 cycles
+ Scratch scratches(registerFile());
+ int c = coord;
+ if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) {
+ // UMULL takes 4 cycles (interlocked), and we can get away with
+ // 2 cycles using SMULWB, but we're loosing 16 bits of precision
+ // out of 32 (this is not a problem because the iterator keeps
+ // its full precision)
+ // UMULL(AL, 0, size, d, c, size);
+ // note: we can't use SMULTB because it's signed.
+ MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear));
+ SMULWB(AL, d, d, size);
+ } else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) {
+ if (tx_linear) {
+ // 1 cycle
+ MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear));
+ } else {
+ // 4 cycles (common case)
+ MOV(AL, 0, d, reg_imm(coord, ASR, 16));
+ BIC(AL, 0, d, d, reg_imm(d, ASR, 31));
+ CMP(AL, d, size);
+ SUB(GE, 0, d, size, imm(1));
+ }
+ }
+}
+
+// ---------------------------------------------------------------------------
+
+void GGLAssembler::modulate(
+ component_t& dest,
+ const component_t& incoming,
+ const pixel_t& incomingTexel, int component)
+{
+ Scratch locals(registerFile());
+ integer_t texel(locals.obtain(), 32, CORRUPTIBLE);
+ extract(texel, incomingTexel, component);
+
+ const int Nt = texel.size();
+ // Nt should always be less than 10 bits because it comes
+ // from the TMU.
+
+ int Ni = incoming.size();
+ // Ni could be big because it comes from previous MODULATEs
+
+ if (Nt == 1) {
+ // texel acts as a bit-mask
+ // dest = incoming & ((texel << incoming.h)-texel)
+ RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h));
+ AND(AL, 0, dest.reg, dest.reg, incoming.reg);
+ dest.l = incoming.l;
+ dest.h = incoming.h;
+ dest.flags |= (incoming.flags & CLEAR_LO);
+ } else if (Ni == 1) {
+ MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h));
+ AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31));
+ dest.l = 0;
+ dest.h = Nt;
+ } else {
+ int inReg = incoming.reg;
+ int shift = incoming.l;
+ if ((Nt + Ni) > 32) {
+ // we will overflow, reduce the precision of Ni to 8 bits
+ // (Note Nt cannot be more than 10 bits which happens with
+ // 565 textures and GGL_LINEAR)
+ shift += Ni-8;
+ Ni = 8;
+ }
+
+ // modulate by the component with the lowest precision
+ if (Nt >= Ni) {
+ if (shift) {
+ // XXX: we should be able to avoid this shift
+ // when shift==16 && Nt<16 && Ni<16, in which
+ // we could use SMULBT below.
+ MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift));
+ inReg = dest.reg;
+ shift = 0;
+ }
+ // operation: (Cf*Ct)/((1<<Ni)-1)
+ // approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni
+ // this operation doesn't change texel's size
+ ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1));
+ if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg);
+ else MUL(AL, 0, dest.reg, texel.reg, dest.reg);
+ dest.l = Ni;
+ dest.h = Nt + Ni;
+ } else {
+ if (shift && (shift != 16)) {
+ // if shift==16, we can use 16-bits mul instructions later
+ MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift));
+ inReg = dest.reg;
+ shift = 0;
+ }
+ // operation: (Cf*Ct)/((1<<Nt)-1)
+ // approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt
+ // this operation doesn't change incoming's size
+ Scratch scratches(registerFile());
+ int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg;
+ if (t == inReg)
+ t = scratches.obtain();
+ ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1));
+ if (Nt<16 && Ni<16) {
+ if (shift==16) SMULBT(AL, dest.reg, t, inReg);
+ else SMULBB(AL, dest.reg, t, inReg);
+ } else MUL(AL, 0, dest.reg, t, inReg);
+ dest.l = Nt;
+ dest.h = Nt + Ni;
+ }
+
+ // low bits are not valid
+ dest.flags |= CLEAR_LO;
+
+ // no need to keep more than 8 bits/component
+ if (dest.size() > 8)
+ dest.l = dest.h-8;
+ }
+}
+
+void GGLAssembler::decal(
+ component_t& dest,
+ const component_t& incoming,
+ const pixel_t& incomingTexel, int component)
+{
+ // RGBA:
+ // Cv = Cf*(1 - At) + Ct*At = Cf + (Ct - Cf)*At
+ // Av = Af
+ Scratch locals(registerFile());
+ integer_t texel(locals.obtain(), 32, CORRUPTIBLE);
+ integer_t factor(locals.obtain(), 32, CORRUPTIBLE);
+ extract(texel, incomingTexel, component);
+ extract(factor, incomingTexel, GGLFormat::ALPHA);
+
+ // no need to keep more than 8-bits for decal
+ int Ni = incoming.size();
+ int shift = incoming.l;
+ if (Ni > 8) {
+ shift += Ni-8;
+ Ni = 8;
+ }
+ integer_t incomingNorm(incoming.reg, Ni, incoming.flags);
+ if (shift) {
+ MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift));
+ incomingNorm.reg = dest.reg;
+ incomingNorm.flags |= CORRUPTIBLE;
+ }
+ ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1));
+ build_blendOneMinusFF(dest, factor, incomingNorm, texel);
+}
+
+void GGLAssembler::blend(
+ component_t& dest,
+ const component_t& incoming,
+ const pixel_t& incomingTexel, int component, int tmu)
+{
+ // RGBA:
+ // Cv = (1 - Ct)*Cf + Ct*Cc = Cf + (Cc - Cf)*Ct
+ // Av = At*Af
+
+ if (component == GGLFormat::ALPHA) {
+ modulate(dest, incoming, incomingTexel, component);
+ return;
+ }
+
+ Scratch locals(registerFile());
+ integer_t color(locals.obtain(), 8, CORRUPTIBLE);
+ integer_t factor(locals.obtain(), 32, CORRUPTIBLE);
+ LDRB(AL, color.reg, mBuilderContext.Rctx,
+ immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component])));
+ extract(factor, incomingTexel, component);
+
+ // no need to keep more than 8-bits for blend
+ int Ni = incoming.size();
+ int shift = incoming.l;
+ if (Ni > 8) {
+ shift += Ni-8;
+ Ni = 8;
+ }
+ integer_t incomingNorm(incoming.reg, Ni, incoming.flags);
+ if (shift) {
+ MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift));
+ incomingNorm.reg = dest.reg;
+ incomingNorm.flags |= CORRUPTIBLE;
+ }
+ ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1));
+ build_blendOneMinusFF(dest, factor, incomingNorm, color);
+}
+
+void GGLAssembler::add(
+ component_t& dest,
+ const component_t& incoming,
+ const pixel_t& incomingTexel, int component)
+{
+ // RGBA:
+ // Cv = Cf + Ct;
+ Scratch locals(registerFile());
+
+ component_t incomingTemp(incoming);
+
+ // use "dest" as a temporary for extracting the texel, unless "dest"
+ // overlaps "incoming".
+ integer_t texel(dest.reg, 32, CORRUPTIBLE);
+ if (dest.reg == incomingTemp.reg)
+ texel.reg = locals.obtain();
+ extract(texel, incomingTexel, component);
+
+ if (texel.s < incomingTemp.size()) {
+ expand(texel, texel, incomingTemp.size());
+ } else if (texel.s > incomingTemp.size()) {
+ if (incomingTemp.flags & CORRUPTIBLE) {
+ expand(incomingTemp, incomingTemp, texel.s);
+ } else {
+ incomingTemp.reg = locals.obtain();
+ expand(incomingTemp, incoming, texel.s);
+ }
+ }
+
+ if (incomingTemp.l) {
+ ADD(AL, 0, dest.reg, texel.reg,
+ reg_imm(incomingTemp.reg, LSR, incomingTemp.l));
+ } else {
+ ADD(AL, 0, dest.reg, texel.reg, incomingTemp.reg);
+ }
+ dest.l = 0;
+ dest.h = texel.size();
+ component_sat(dest);
+}
+
+// ----------------------------------------------------------------------------
+
+}; // namespace android
+