From 2bc2b792782b304b15d8c48b54916a9b3fa3a7ac Mon Sep 17 00:00:00 2001
From: Paul Lind <plind@mips.com>
Date: Wed, 1 Feb 2012 10:54:19 -0800
Subject: Add MIPS support to pixelflinger.

See the comment-block at the top of MIPSAssembler.cpp for
implementation overview.

Change-Id: Id492c10610574af8c89c38d19e12fafc3652c28a
---
 libpixelflinger/Android.mk                         |    2 +
 libpixelflinger/codeflinger/ARMAssembler.cpp       |  146 ++
 libpixelflinger/codeflinger/ARMAssembler.h         |   49 +-
 .../codeflinger/ARMAssemblerInterface.cpp          |  123 +-
 .../codeflinger/ARMAssemblerInterface.h            |   54 +-
 libpixelflinger/codeflinger/ARMAssemblerProxy.cpp  |   91 +
 libpixelflinger/codeflinger/ARMAssemblerProxy.h    |   45 +-
 libpixelflinger/codeflinger/CodeCache.cpp          |   12 +-
 libpixelflinger/codeflinger/GGLAssembler.cpp       |   80 +-
 libpixelflinger/codeflinger/GGLAssembler.h         |    8 +-
 libpixelflinger/codeflinger/MIPSAssembler.cpp      | 1957 ++++++++++++++++++++
 libpixelflinger/codeflinger/MIPSAssembler.h        |  555 ++++++
 libpixelflinger/codeflinger/load_store.cpp         |    4 +
 libpixelflinger/codeflinger/mips_disassem.c        |  590 ++++++
 libpixelflinger/codeflinger/mips_disassem.h        |   66 +
 libpixelflinger/codeflinger/mips_opcode.h          |  316 ++++
 libpixelflinger/codeflinger/texturing.cpp          |   18 +
 libpixelflinger/scanline.cpp                       |   19 +-
 libpixelflinger/tests/codegen/codegen.cpp          |   17 +-
 19 files changed, 3970 insertions(+), 182 deletions(-)
 create mode 100644 libpixelflinger/codeflinger/MIPSAssembler.cpp
 create mode 100644 libpixelflinger/codeflinger/MIPSAssembler.h
 create mode 100644 libpixelflinger/codeflinger/mips_disassem.c
 create mode 100644 libpixelflinger/codeflinger/mips_disassem.h
 create mode 100644 libpixelflinger/codeflinger/mips_opcode.h

(limited to 'libpixelflinger')

diff --git a/libpixelflinger/Android.mk b/libpixelflinger/Android.mk
index 1947c2d..488003f 100644
--- a/libpixelflinger/Android.mk
+++ b/libpixelflinger/Android.mk
@@ -44,6 +44,8 @@ PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer
 endif
 
 ifeq ($(TARGET_ARCH),mips)
+PIXELFLINGER_SRC_FILES += codeflinger/MIPSAssembler.cpp
+PIXELFLINGER_SRC_FILES += codeflinger/mips_disassem.c
 PIXELFLINGER_SRC_FILES += arch-mips/t32cb16blend.S
 PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer
 endif
diff --git a/libpixelflinger/codeflinger/ARMAssembler.cpp b/libpixelflinger/codeflinger/ARMAssembler.cpp
index 0dc5037..c4f42f5 100644
--- a/libpixelflinger/codeflinger/ARMAssembler.cpp
+++ b/libpixelflinger/codeflinger/ARMAssembler.cpp
@@ -76,6 +76,11 @@ void ARMAssembler::reset()
     mComments.clear();
 }
 
+int ARMAssembler::getCodegenArch()
+{
+    return CODEGEN_ARCH_ARM;
+}
+
 // ----------------------------------------------------------------------------
 
 void ARMAssembler::disassemble(const char* name)
@@ -444,5 +449,146 @@ void ARMAssembler::UBFX(int cc, int Rd, int Rn, int lsb, int width)
     *mPC++ = (cc<<28) | 0x7E00000 | ((width-1)<<16) | (Rd<<12) | (lsb<<7) | 0x50 | Rn;
 }
 
+#if 0
+#pragma mark -
+#pragma mark Addressing modes...
+#endif
+
+int ARMAssembler::buildImmediate(
+        uint32_t immediate, uint32_t& rot, uint32_t& imm)
+{
+    rot = 0;
+    imm = immediate;
+    if (imm > 0x7F) { // skip the easy cases
+        while (!(imm&3)  || (imm&0xFC000000)) {
+            uint32_t newval;
+            newval = imm >> 2;
+            newval |= (imm&3) << 30;
+            imm = newval;
+            rot += 2;
+            if (rot == 32) {
+                rot = 0;
+                break;
+            }
+        }
+    }
+    rot = (16 - (rot>>1)) & 0xF;
+
+    if (imm>=0x100)
+        return -EINVAL;
+
+    if (((imm>>(rot<<1)) | (imm<<(32-(rot<<1)))) != immediate)
+        return -1;
+
+    return 0;
+}
+
+// shifters...
+
+bool ARMAssembler::isValidImmediate(uint32_t immediate)
+{
+    uint32_t rot, imm;
+    return buildImmediate(immediate, rot, imm) == 0;
+}
+
+uint32_t ARMAssembler::imm(uint32_t immediate)
+{
+    uint32_t rot, imm;
+    int err = buildImmediate(immediate, rot, imm);
+
+    LOG_ALWAYS_FATAL_IF(err==-EINVAL,
+                        "immediate %08x cannot be encoded",
+                        immediate);
+
+    LOG_ALWAYS_FATAL_IF(err,
+                        "immediate (%08x) encoding bogus!",
+                        immediate);
+
+    return (1<<25) | (rot<<8) | imm;
+}
+
+uint32_t ARMAssembler::reg_imm(int Rm, int type, uint32_t shift)
+{
+    return ((shift&0x1F)<<7) | ((type&0x3)<<5) | (Rm&0xF);
+}
+
+uint32_t ARMAssembler::reg_rrx(int Rm)
+{
+    return (ROR<<5) | (Rm&0xF);
+}
+
+uint32_t ARMAssembler::reg_reg(int Rm, int type, int Rs)
+{
+    return ((Rs&0xF)<<8) | ((type&0x3)<<5) | (1<<4) | (Rm&0xF);
+}
+
+// addressing modes...
+// LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0)
+uint32_t ARMAssembler::immed12_pre(int32_t immed12, int W)
+{
+    LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
+                        "LDR(B)/STR(B)/PLD immediate too big (%08x)",
+                        immed12);
+    return (1<<24) | (((uint32_t(immed12)>>31)^1)<<23) |
+            ((W&1)<<21) | (abs(immed12)&0x7FF);
+}
+
+uint32_t ARMAssembler::immed12_post(int32_t immed12)
+{
+    LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
+                        "LDR(B)/STR(B)/PLD immediate too big (%08x)",
+                        immed12);
+
+    return (((uint32_t(immed12)>>31)^1)<<23) | (abs(immed12)&0x7FF);
+}
+
+uint32_t ARMAssembler::reg_scale_pre(int Rm, int type,
+        uint32_t shift, int W)
+{
+    return  (1<<25) | (1<<24) |
+            (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) |
+            reg_imm(abs(Rm), type, shift);
+}
+
+uint32_t ARMAssembler::reg_scale_post(int Rm, int type, uint32_t shift)
+{
+    return (1<<25) | (((uint32_t(Rm)>>31)^1)<<23) | reg_imm(abs(Rm), type, shift);
+}
+
+// LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0)
+uint32_t ARMAssembler::immed8_pre(int32_t immed8, int W)
+{
+    uint32_t offset = abs(immed8);
+
+    LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
+                        "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
+                        immed8);
+
+    return  (1<<24) | (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) |
+            ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF));
+}
+
+uint32_t ARMAssembler::immed8_post(int32_t immed8)
+{
+    uint32_t offset = abs(immed8);
+
+    LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
+                        "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
+                        immed8);
+
+    return (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) |
+            (((offset&0xF0)<<4) | (offset&0xF));
+}
+
+uint32_t ARMAssembler::reg_pre(int Rm, int W)
+{
+    return (1<<24) | (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | (abs(Rm)&0xF);
+}
+
+uint32_t ARMAssembler::reg_post(int Rm)
+{
+    return (((uint32_t(Rm)>>31)^1)<<23) | (abs(Rm)&0xF);
+}
+
 }; // namespace android
 
diff --git a/libpixelflinger/codeflinger/ARMAssembler.h b/libpixelflinger/codeflinger/ARMAssembler.h
index e7f038a..06c66dd 100644
--- a/libpixelflinger/codeflinger/ARMAssembler.h
+++ b/libpixelflinger/codeflinger/ARMAssembler.h
@@ -52,11 +52,42 @@ public:
     virtual void    reset();
 
     virtual int     generate(const char* name);
+    virtual int     getCodegenArch();
 
     virtual void    prolog();
     virtual void    epilog(uint32_t touched);
     virtual void    comment(const char* string);
 
+
+    // -----------------------------------------------------------------------
+    // shifters and addressing modes
+    // -----------------------------------------------------------------------
+
+    // shifters...
+    virtual bool        isValidImmediate(uint32_t immed);
+    virtual int         buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm);
+
+    virtual uint32_t    imm(uint32_t immediate);
+    virtual uint32_t    reg_imm(int Rm, int type, uint32_t shift);
+    virtual uint32_t    reg_rrx(int Rm);
+    virtual uint32_t    reg_reg(int Rm, int type, int Rs);
+
+    // addressing modes...
+    // LDR(B)/STR(B)/PLD
+    // (immediate and Rm can be negative, which indicates U=0)
+    virtual uint32_t    immed12_pre(int32_t immed12, int W=0);
+    virtual uint32_t    immed12_post(int32_t immed12);
+    virtual uint32_t    reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0);
+    virtual uint32_t    reg_scale_post(int Rm, int type=0, uint32_t shift=0);
+
+    // LDRH/LDRSB/LDRSH/STRH
+    // (immediate and Rm can be negative, which indicates U=0)
+    virtual uint32_t    immed8_pre(int32_t immed8, int W=0);
+    virtual uint32_t    immed8_post(int32_t immed8);
+    virtual uint32_t    reg_pre(int Rm, int W=0);
+    virtual uint32_t    reg_post(int Rm);
+
+
     virtual void    dataProcessing(int opcode, int cc, int s,
                                 int Rd, int Rn,
                                 uint32_t Op2);
@@ -83,21 +114,23 @@ public:
     virtual uint32_t* pcForLabel(const char* label);
 
     virtual void LDR (int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0));
+                int Rn, uint32_t offset = __immed12_pre(0));
     virtual void LDRB(int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0));
+                int Rn, uint32_t offset = __immed12_pre(0));
     virtual void STR (int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0));
+                int Rn, uint32_t offset = __immed12_pre(0));
     virtual void STRB(int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0));
+                int Rn, uint32_t offset = __immed12_pre(0));
     virtual void LDRH (int cc, int Rd,
-                int Rn, uint32_t offset = immed8_pre(0));
+                int Rn, uint32_t offset = __immed8_pre(0));
     virtual void LDRSB(int cc, int Rd, 
-                int Rn, uint32_t offset = immed8_pre(0));
+                int Rn, uint32_t offset = __immed8_pre(0));
     virtual void LDRSH(int cc, int Rd,
-                int Rn, uint32_t offset = immed8_pre(0));
+                int Rn, uint32_t offset = __immed8_pre(0));
     virtual void STRH (int cc, int Rd,
-                int Rn, uint32_t offset = immed8_pre(0));
+                int Rn, uint32_t offset = __immed8_pre(0));
+
+
     virtual void LDM(int cc, int dir,
                 int Rn, int W, uint32_t reg_list);
     virtual void STM(int cc, int dir,
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
index 7fa0de0..82180ee 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
@@ -32,77 +32,15 @@ ARMAssemblerInterface::~ARMAssemblerInterface()
 {
 }
 
-int ARMAssemblerInterface::buildImmediate(
-        uint32_t immediate, uint32_t& rot, uint32_t& imm)
-{
-    rot = 0;
-    imm = immediate;
-    if (imm > 0x7F) { // skip the easy cases
-        while (!(imm&3)  || (imm&0xFC000000)) {
-            uint32_t newval;
-            newval = imm >> 2;
-            newval |= (imm&3) << 30;
-            imm = newval;
-            rot += 2;
-            if (rot == 32) {
-                rot = 0;
-                break;
-            }
-        }
-    }
-    rot = (16 - (rot>>1)) & 0xF;
-
-    if (imm>=0x100)
-        return -EINVAL;
-
-    if (((imm>>(rot<<1)) | (imm<<(32-(rot<<1)))) != immediate)
-        return -1;
-
-    return 0;
-}
-
-// shifters...
-
-bool ARMAssemblerInterface::isValidImmediate(uint32_t immediate)
-{
-    uint32_t rot, imm;
-    return buildImmediate(immediate, rot, imm) == 0;
-}
-
-uint32_t ARMAssemblerInterface::imm(uint32_t immediate)
-{
-    uint32_t rot, imm;
-    int err = buildImmediate(immediate, rot, imm);
-
-    LOG_ALWAYS_FATAL_IF(err==-EINVAL,
-                        "immediate %08x cannot be encoded",
-                        immediate);
-
-    LOG_ALWAYS_FATAL_IF(err,
-                        "immediate (%08x) encoding bogus!",
-                        immediate);
+// --------------------------------------------------------------------
 
-    return (1<<25) | (rot<<8) | imm;
-}
-
-uint32_t ARMAssemblerInterface::reg_imm(int Rm, int type, uint32_t shift)
-{
-    return ((shift&0x1F)<<7) | ((type&0x3)<<5) | (Rm&0xF);
-}
-
-uint32_t ARMAssemblerInterface::reg_rrx(int Rm)
-{
-    return (ROR<<5) | (Rm&0xF);
-}
+// The following two functions are static and used for initializers
+// in the original ARM code. The above versions (without __), are now
+// virtual, and can be overridden in the MIPS code. But since these are
+// needed at initialization time, they must be static. Not thrilled with
+// this implementation, but it works...
 
-uint32_t ARMAssemblerInterface::reg_reg(int Rm, int type, int Rs)
-{
-    return ((Rs&0xF)<<8) | ((type&0x3)<<5) | (1<<4) | (Rm&0xF);
-}
-
-// addressing modes... 
-// LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0)
-uint32_t ARMAssemblerInterface::immed12_pre(int32_t immed12, int W)
+uint32_t ARMAssemblerInterface::__immed12_pre(int32_t immed12, int W)
 {
     LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
                         "LDR(B)/STR(B)/PLD immediate too big (%08x)",
@@ -111,30 +49,7 @@ uint32_t ARMAssemblerInterface::immed12_pre(int32_t immed12, int W)
             ((W&1)<<21) | (abs(immed12)&0x7FF);
 }
 
-uint32_t ARMAssemblerInterface::immed12_post(int32_t immed12)
-{
-    LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
-                        "LDR(B)/STR(B)/PLD immediate too big (%08x)",
-                        immed12);
-
-    return (((uint32_t(immed12)>>31)^1)<<23) | (abs(immed12)&0x7FF);
-}
-
-uint32_t ARMAssemblerInterface::reg_scale_pre(int Rm, int type, 
-        uint32_t shift, int W)
-{
-    return  (1<<25) | (1<<24) | 
-            (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) |
-            reg_imm(abs(Rm), type, shift);
-}
-
-uint32_t ARMAssemblerInterface::reg_scale_post(int Rm, int type, uint32_t shift)
-{
-    return (1<<25) | (((uint32_t(Rm)>>31)^1)<<23) | reg_imm(abs(Rm), type, shift);
-}
-
-// LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0)
-uint32_t ARMAssemblerInterface::immed8_pre(int32_t immed8, int W)
+uint32_t ARMAssemblerInterface::__immed8_pre(int32_t immed8, int W)
 {
     uint32_t offset = abs(immed8);
 
@@ -146,28 +61,6 @@ uint32_t ARMAssemblerInterface::immed8_pre(int32_t immed8, int W)
             ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF));
 }
 
-uint32_t ARMAssemblerInterface::immed8_post(int32_t immed8)
-{
-    uint32_t offset = abs(immed8);
-
-    LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
-                        "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
-                        immed8);
-
-    return (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) |
-            (((offset&0xF0)<<4) | (offset&0xF));
-}
-
-uint32_t ARMAssemblerInterface::reg_pre(int Rm, int W)
-{
-    return (1<<24) | (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | (abs(Rm)&0xF);
-}
-
-uint32_t ARMAssemblerInterface::reg_post(int Rm)
-{
-    return (((uint32_t(Rm)>>31)^1)<<23) | (abs(Rm)&0xF);
-}
-
 
 }; // namespace android
 
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
index 796342a..9991980 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerInterface.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
@@ -62,33 +62,40 @@ public:
         LSAVED = LR4|LR5|LR6|LR7|LR8|LR9|LR10|LR11 | LLR
     };
 
+    enum {
+        CODEGEN_ARCH_ARM = 1, CODEGEN_ARCH_MIPS
+    };
+
     // -----------------------------------------------------------------------
     // shifters and addressing modes
     // -----------------------------------------------------------------------
 
-    // shifters...
-    static bool        isValidImmediate(uint32_t immed);
-    static int         buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm);
+    // these static versions are used for initializers on LDxx/STxx below
+    static uint32_t    __immed12_pre(int32_t immed12, int W=0);
+    static uint32_t    __immed8_pre(int32_t immed12, int W=0);
+
+    virtual bool        isValidImmediate(uint32_t immed) = 0;
+    virtual int         buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm) = 0;
 
-    static uint32_t    imm(uint32_t immediate);
-    static uint32_t    reg_imm(int Rm, int type, uint32_t shift);
-    static uint32_t    reg_rrx(int Rm);
-    static uint32_t    reg_reg(int Rm, int type, int Rs);
+    virtual uint32_t    imm(uint32_t immediate) = 0;
+    virtual uint32_t    reg_imm(int Rm, int type, uint32_t shift) = 0;
+    virtual uint32_t    reg_rrx(int Rm) = 0;
+    virtual uint32_t    reg_reg(int Rm, int type, int Rs) = 0;
 
     // addressing modes... 
     // LDR(B)/STR(B)/PLD
     // (immediate and Rm can be negative, which indicates U=0)
-    static uint32_t    immed12_pre(int32_t immed12, int W=0);
-    static uint32_t    immed12_post(int32_t immed12);
-    static uint32_t    reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0);
-    static uint32_t    reg_scale_post(int Rm, int type=0, uint32_t shift=0);
+    virtual uint32_t    immed12_pre(int32_t immed12, int W=0) = 0;
+    virtual uint32_t    immed12_post(int32_t immed12) = 0;
+    virtual uint32_t    reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0) = 0;
+    virtual uint32_t    reg_scale_post(int Rm, int type=0, uint32_t shift=0) = 0;
 
     // LDRH/LDRSB/LDRSH/STRH
     // (immediate and Rm can be negative, which indicates U=0)
-    static uint32_t    immed8_pre(int32_t immed8, int W=0);
-    static uint32_t    immed8_post(int32_t immed8);
-    static uint32_t    reg_pre(int Rm, int W=0);
-    static uint32_t    reg_post(int Rm);
+    virtual uint32_t    immed8_pre(int32_t immed8, int W=0) = 0;
+    virtual uint32_t    immed8_post(int32_t immed8) = 0;
+    virtual uint32_t    reg_pre(int Rm, int W=0) = 0;
+    virtual uint32_t    reg_post(int Rm) = 0;
 
     // -----------------------------------------------------------------------
     // basic instructions & code generation
@@ -98,6 +105,7 @@ public:
     virtual void reset() = 0;
     virtual int  generate(const char* name) = 0;
     virtual void disassemble(const char* name) = 0;
+    virtual int  getCodegenArch() = 0;
     
     // construct prolog and epilog
     virtual void prolog() = 0;
@@ -143,22 +151,22 @@ public:
 
     // data transfer...
     virtual void LDR (int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0)) = 0;
+                int Rn, uint32_t offset = __immed12_pre(0)) = 0;
     virtual void LDRB(int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0)) = 0;
+                int Rn, uint32_t offset = __immed12_pre(0)) = 0;
     virtual void STR (int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0)) = 0;
+                int Rn, uint32_t offset = __immed12_pre(0)) = 0;
     virtual void STRB(int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0)) = 0;
+                int Rn, uint32_t offset = __immed12_pre(0)) = 0;
 
     virtual void LDRH (int cc, int Rd,
-                int Rn, uint32_t offset = immed8_pre(0)) = 0;
+                int Rn, uint32_t offset = __immed8_pre(0)) = 0;
     virtual void LDRSB(int cc, int Rd, 
-                int Rn, uint32_t offset = immed8_pre(0)) = 0;
+                int Rn, uint32_t offset = __immed8_pre(0)) = 0;
     virtual void LDRSH(int cc, int Rd,
-                int Rn, uint32_t offset = immed8_pre(0)) = 0;
+                int Rn, uint32_t offset = __immed8_pre(0)) = 0;
     virtual void STRH (int cc, int Rd,
-                int Rn, uint32_t offset = immed8_pre(0)) = 0;
+                int Rn, uint32_t offset = __immed8_pre(0)) = 0;
 
     // block data transfer...
     virtual void LDM(int cc, int dir,
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
index c57d7da..7feed62 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
@@ -55,6 +55,10 @@ int ARMAssemblerProxy::generate(const char* name) {
 void ARMAssemblerProxy::disassemble(const char* name) {
     return mTarget->disassemble(name);
 }
+int ARMAssemblerProxy::getCodegenArch()
+{
+    return mTarget->getCodegenArch();
+}
 void ARMAssemblerProxy::prolog() {
     mTarget->prolog();
 }
@@ -66,6 +70,93 @@ void ARMAssemblerProxy::comment(const char* string) {
 }
 
 
+
+// addressing modes
+
+bool ARMAssemblerProxy::isValidImmediate(uint32_t immed)
+{
+    return mTarget->isValidImmediate(immed);
+}
+
+int ARMAssemblerProxy::buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm)
+{
+    return mTarget->buildImmediate(i, rot, imm);
+}
+
+
+
+uint32_t ARMAssemblerProxy::imm(uint32_t immediate)
+{
+    return mTarget->imm(immediate);
+}
+
+uint32_t ARMAssemblerProxy::reg_imm(int Rm, int type, uint32_t shift)
+{
+    return mTarget->reg_imm(Rm, type, shift);
+}
+
+uint32_t ARMAssemblerProxy::reg_rrx(int Rm)
+{
+    return mTarget->reg_rrx(Rm);
+}
+
+uint32_t ARMAssemblerProxy::reg_reg(int Rm, int type, int Rs)
+{
+    return mTarget->reg_reg(Rm, type, Rs);
+}
+
+
+// addressing modes...
+// LDR(B)/STR(B)/PLD
+// (immediate and Rm can be negative, which indicates U=0)
+uint32_t ARMAssemblerProxy::immed12_pre(int32_t immed12, int W)
+{
+    return mTarget->immed12_pre(immed12, W);
+}
+
+uint32_t ARMAssemblerProxy::immed12_post(int32_t immed12)
+{
+    return mTarget->immed12_post(immed12);
+}
+
+uint32_t ARMAssemblerProxy::reg_scale_pre(int Rm, int type, uint32_t shift, int W)
+{
+    return mTarget->reg_scale_pre(Rm, type, shift, W);
+}
+
+uint32_t ARMAssemblerProxy::reg_scale_post(int Rm, int type, uint32_t shift)
+{
+    return mTarget->reg_scale_post(Rm, type, shift);
+}
+
+
+// LDRH/LDRSB/LDRSH/STRH
+// (immediate and Rm can be negative, which indicates U=0)
+uint32_t ARMAssemblerProxy::immed8_pre(int32_t immed8, int W)
+{
+    return mTarget->immed8_pre(immed8, W);
+}
+
+uint32_t ARMAssemblerProxy::immed8_post(int32_t immed8)
+{
+    return mTarget->immed8_post(immed8);
+}
+
+uint32_t ARMAssemblerProxy::reg_pre(int Rm, int W)
+{
+    return mTarget->reg_pre(Rm, W);
+}
+
+uint32_t ARMAssemblerProxy::reg_post(int Rm)
+{
+    return mTarget->reg_post(Rm);
+}
+
+
+//------------------------------------------------------------------------
+
+
+
 void ARMAssemblerProxy::dataProcessing( int opcode, int cc, int s,
                                         int Rd, int Rn, uint32_t Op2)
 {
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.h b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
index 8c7f270..5e3f763 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerProxy.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
@@ -42,11 +42,40 @@ public:
     virtual void    reset();
     virtual int     generate(const char* name);
     virtual void    disassemble(const char* name);
+    virtual int     getCodegenArch();
 
     virtual void    prolog();
     virtual void    epilog(uint32_t touched);
     virtual void    comment(const char* string);
 
+    // -----------------------------------------------------------------------
+    // shifters and addressing modes
+    // -----------------------------------------------------------------------
+
+    virtual bool        isValidImmediate(uint32_t immed);
+    virtual int         buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm);
+
+    virtual uint32_t    imm(uint32_t immediate);
+    virtual uint32_t    reg_imm(int Rm, int type, uint32_t shift);
+    virtual uint32_t    reg_rrx(int Rm);
+    virtual uint32_t    reg_reg(int Rm, int type, int Rs);
+
+    // addressing modes...
+    // LDR(B)/STR(B)/PLD
+    // (immediate and Rm can be negative, which indicates U=0)
+    virtual uint32_t    immed12_pre(int32_t immed12, int W=0);
+    virtual uint32_t    immed12_post(int32_t immed12);
+    virtual uint32_t    reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0);
+    virtual uint32_t    reg_scale_post(int Rm, int type=0, uint32_t shift=0);
+
+    // LDRH/LDRSB/LDRSH/STRH
+    // (immediate and Rm can be negative, which indicates U=0)
+    virtual uint32_t    immed8_pre(int32_t immed8, int W=0);
+    virtual uint32_t    immed8_post(int32_t immed8);
+    virtual uint32_t    reg_pre(int Rm, int W=0);
+    virtual uint32_t    reg_post(int Rm);
+
+
     virtual void    dataProcessing(int opcode, int cc, int s,
                                 int Rd, int Rn,
                                 uint32_t Op2);
@@ -73,21 +102,21 @@ public:
     uint32_t* pcForLabel(const char* label);
 
     virtual void LDR (int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0));
+                int Rn, uint32_t offset = __immed12_pre(0));
     virtual void LDRB(int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0));
+                int Rn, uint32_t offset = __immed12_pre(0));
     virtual void STR (int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0));
+                int Rn, uint32_t offset = __immed12_pre(0));
     virtual void STRB(int cc, int Rd,
-                int Rn, uint32_t offset = immed12_pre(0));
+                int Rn, uint32_t offset = __immed12_pre(0));
     virtual void LDRH (int cc, int Rd,
-                int Rn, uint32_t offset = immed8_pre(0));
+                int Rn, uint32_t offset = __immed8_pre(0));
     virtual void LDRSB(int cc, int Rd, 
-                int Rn, uint32_t offset = immed8_pre(0));
+                int Rn, uint32_t offset = __immed8_pre(0));
     virtual void LDRSH(int cc, int Rd,
-                int Rn, uint32_t offset = immed8_pre(0));
+                int Rn, uint32_t offset = __immed8_pre(0));
     virtual void STRH (int cc, int Rd,
-                int Rn, uint32_t offset = immed8_pre(0));
+                int Rn, uint32_t offset = __immed8_pre(0));
     virtual void LDM(int cc, int dir,
                 int Rn, int W, uint32_t reg_list);
     virtual void STM(int cc, int dir,
diff --git a/libpixelflinger/codeflinger/CodeCache.cpp b/libpixelflinger/codeflinger/CodeCache.cpp
index a713feb..4f2ede3 100644
--- a/libpixelflinger/codeflinger/CodeCache.cpp
+++ b/libpixelflinger/codeflinger/CodeCache.cpp
@@ -36,6 +36,12 @@ namespace android {
 #include <errno.h>
 #endif
 
+#if defined(__mips__)
+#include <asm/cachectl.h>
+#include <errno.h>
+#endif
+
+// ----------------------------------------------------------------------------
 // ----------------------------------------------------------------------------
 
 Assembly::Assembly(size_t size)
@@ -155,12 +161,12 @@ int CodeCache::cache(  const AssemblyKeyBase& keyBase,
         mCacheInUse += assemblySize;
         mWhen++;
         // synchronize caches...
-#if defined(__arm__)
+#if defined(__arm__) || defined(__mips__)
         const long base = long(assembly->base());
         const long curr = base + long(assembly->size());
         err = cacheflush(base, curr, 0);
-        ALOGE_IF(err, "__ARM_NR_cacheflush error %s\n",
-                strerror(errno));
+        ALOGE_IF(err, "cacheflush error %s\n",
+                 strerror(errno));
 #endif
     }
 
diff --git a/libpixelflinger/codeflinger/GGLAssembler.cpp b/libpixelflinger/codeflinger/GGLAssembler.cpp
index f1d81b2..1ddf93d 100644
--- a/libpixelflinger/codeflinger/GGLAssembler.cpp
+++ b/libpixelflinger/codeflinger/GGLAssembler.cpp
@@ -31,7 +31,8 @@ namespace android {
 // ----------------------------------------------------------------------------
 
 GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
-    : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7)
+    : ARMAssemblerProxy(target),
+      RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7)
 {
 }
 
@@ -230,7 +231,9 @@ int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
 
             // texel generation
             build_textures(parts, regs);
-        }        
+            if (registerFile().status())
+                return registerFile().status();
+        }
 
         if ((blending & (FACTOR_DST|BLEND_DST)) || 
                 (mMasking && !mAllMasked) ||
@@ -890,6 +893,15 @@ void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
         return;
     }
     
+    if (getCodegenArch() == CODEGEN_ARCH_MIPS) {
+        // MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr
+        // the below ' while (mask)' code is buggy on mips
+        // since mips returns true on isValidImmediate()
+        // then we get multiple AND instr (positive logic)
+        AND( AL, 0, d, s, imm(mask) );
+        return;
+    }
+
     int negative_logic = !isValidImmediate(mask);
     if (negative_logic) {
         mask = ~mask & size;
@@ -1002,6 +1014,15 @@ void GGLAssembler::base_offset(
 // cheezy register allocator...
 // ----------------------------------------------------------------------------
 
+// Modified to support MIPS processors, in a very simple way. We retain the
+// (Arm) limit of 16 total registers, but shift the mapping of those registers
+// from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and
+// register 1 has a traditional use as a temp).
+
+RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch)
+{
+}
+
 void RegisterAllocator::reset()
 {
     mRegs.reset();
@@ -1029,16 +1050,22 @@ RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
 
 // ----------------------------------------------------------------------------
 
-RegisterAllocator::RegisterFile::RegisterFile()
-    : mRegs(0), mTouched(0), mStatus(0)
+RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch)
+    : mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0)
 {
+    if (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) {
+        mRegisterOffset = 2;    // ARM has regs 0..15, MIPS offset to 2..17
+    }
     reserve(ARMAssemblerInterface::SP);
     reserve(ARMAssemblerInterface::PC);
 }
 
-RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs)
-    : mRegs(rhs.mRegs), mTouched(rhs.mTouched)
+RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch)
+    : mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0)
 {
+    if (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) {
+        mRegisterOffset = 2;    // ARM has regs 0..15, MIPS offset to 2..17
+    }
 }
 
 RegisterAllocator::RegisterFile::~RegisterFile()
@@ -1057,8 +1084,12 @@ void RegisterAllocator::RegisterFile::reset()
     reserve(ARMAssemblerInterface::PC);
 }
 
+// RegisterFile::reserve() take a register parameter in the
+// range 0-15 (Arm compatible), but on a Mips processor, will
+// return the actual allocated register in the range 2-17.
 int RegisterAllocator::RegisterFile::reserve(int reg)
 {
+    reg += mRegisterOffset;
     LOG_ALWAYS_FATAL_IF(isUsed(reg),
                         "reserving register %d, but already in use",
                         reg);
@@ -1067,6 +1098,7 @@ int RegisterAllocator::RegisterFile::reserve(int reg)
     return reg;
 }
 
+// This interface uses regMask in range 2-17 on MIPS, no translation.
 void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
 {
     mRegs |= regMask;
@@ -1075,7 +1107,7 @@ void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
 
 int RegisterAllocator::RegisterFile::isUsed(int reg) const
 {
-    LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg);
+    LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg);
     return mRegs & (1<<reg);
 }
 
@@ -1086,10 +1118,10 @@ int RegisterAllocator::RegisterFile::obtain()
                                      6,  7, 8, 9,
                                     10, 11 };
     const int nbreg = sizeof(priorityList);
-    int i, r;
+    int i, r, reg;
     for (i=0 ; i<nbreg ; i++) {
         r = priorityList[i];
-        if (!isUsed(r)) {
+        if (!isUsed(r + mRegisterOffset)) {
             break;
         }
     }
@@ -1102,18 +1134,20 @@ int RegisterAllocator::RegisterFile::obtain()
         // the code will never be run anyway.
         return ARMAssemblerInterface::SP; 
     }
-    reserve(r);
-    return r;
+    reg = reserve(r);  // Param in Arm range 0-15, returns range 2-17 on Mips.
+    return reg;
 }
 
 bool RegisterAllocator::RegisterFile::hasFreeRegs() const
 {
-    return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true;
+    uint32_t regs = mRegs >> mRegisterOffset;   // MIPS fix.
+    return ((regs & 0xFFFF) == 0xFFFF) ? false : true;
 }
 
 int RegisterAllocator::RegisterFile::countFreeRegs() const
 {
-    int f = ~mRegs & 0xFFFF;
+    uint32_t regs = mRegs >> mRegisterOffset;   // MIPS fix.
+    int f = ~regs & 0xFFFF;
     // now count number of 1
    f = (f & 0x5555) + ((f>>1) & 0x5555);
    f = (f & 0x3333) + ((f>>2) & 0x3333);
@@ -1124,18 +1158,24 @@ int RegisterAllocator::RegisterFile::countFreeRegs() const
 
 void RegisterAllocator::RegisterFile::recycle(int reg)
 {
-    LOG_FATAL_IF(!isUsed(reg),
-            "recycling unallocated register %d",
-            reg);
+    // commented out, since common failure of running out of regs
+    // triggers this assertion. Since the code is not execectued
+    // in that case, it does not matter. No reason to FATAL err.
+    // LOG_FATAL_IF(!isUsed(reg),
+    //         "recycling unallocated register %d",
+    //         reg);
     mRegs &= ~(1<<reg);
 }
 
 void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
 {
-    LOG_FATAL_IF((mRegs & regMask)!=regMask,
-            "recycling unallocated registers "
-            "(recycle=%08x, allocated=%08x, unallocated=%08x)",
-            regMask, mRegs, mRegs&regMask);
+    // commented out, since common failure of running out of regs
+    // triggers this assertion. Since the code is not execectued
+    // in that case, it does not matter. No reason to FATAL err.
+    // LOG_FATAL_IF((mRegs & regMask)!=regMask,
+    //         "recycling unallocated registers "
+    //         "(recycle=%08x, allocated=%08x, unallocated=%08x)",
+    //         regMask, mRegs, mRegs&regMask);
     mRegs &= ~regMask;
 }
 
diff --git a/libpixelflinger/codeflinger/GGLAssembler.h b/libpixelflinger/codeflinger/GGLAssembler.h
index d1d29f0..dd5f48e 100644
--- a/libpixelflinger/codeflinger/GGLAssembler.h
+++ b/libpixelflinger/codeflinger/GGLAssembler.h
@@ -43,6 +43,7 @@ class RegisterAllocator
 public:
     class RegisterFile;
     
+                    RegisterAllocator(int arch);
     RegisterFile&   registerFile();
     int             reserveReg(int reg);
     int             obtainReg();
@@ -52,8 +53,8 @@ public:
     class RegisterFile
     {
     public:
-                            RegisterFile();
-                            RegisterFile(const RegisterFile& rhs);
+                            RegisterFile(int arch);
+                            RegisterFile(const RegisterFile& rhs, int arch);
                             ~RegisterFile();
 
                 void        reset();
@@ -86,6 +87,9 @@ public:
         uint32_t    mRegs;
         uint32_t    mTouched;
         uint32_t    mStatus;
+        int         mArch;
+        uint32_t    mRegisterOffset;    // lets reg alloc use 2..17 for mips
+                                        // while arm uses 0..15
     };
  
     class Scratch
diff --git a/libpixelflinger/codeflinger/MIPSAssembler.cpp b/libpixelflinger/codeflinger/MIPSAssembler.cpp
new file mode 100644
index 0000000..7888a0e
--- /dev/null
+++ b/libpixelflinger/codeflinger/MIPSAssembler.cpp
@@ -0,0 +1,1957 @@
+/* libs/pixelflinger/codeflinger/MIPSAssembler.cpp
+**
+** Copyright 2012, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+
+/* MIPS assembler and ARM->MIPS assembly translator
+**
+** The approach is to leave the GGLAssembler and associated files largely
+** un-changed, still utilizing all Arm instruction generation. Via the
+** ArmToMipsAssembler (subclassed from ArmAssemblerInterface) each Arm
+** instruction is translated to one or more Mips instructions as necessary. This
+** is clearly less efficient than a direct implementation within the
+** GGLAssembler, but is far cleaner, more maintainable, and has yielded very
+** significant performance gains on Mips compared to the generic pixel pipeline.
+**
+**
+** GGLAssembler changes
+**
+** - The register allocator has been modified to re-map Arm registers 0-15 to mips
+** registers 2-17. Mips register 0 cannot be used as general-purpose register,
+** and register 1 has traditional uses as a short-term temporary.
+**
+** - Added some early bailouts for OUT_OF_REGISTERS in texturing.cpp and
+** GGLAssembler.cpp, since this is not fatal, and can be retried at lower
+** optimization level.
+**
+**
+** ARMAssembler and ARMAssemblerInterface changes
+**
+** Refactored ARM address-mode static functions (imm(), reg_imm(), imm12_pre(), etc.)
+** to virtual, so they can be overridden in MIPSAssembler. The implementation of these
+** functions on ARM is moved from ARMAssemblerInterface.cpp to ARMAssembler.cpp, and
+** is unchanged from the original. (This required duplicating 2 of these as static
+** functions in ARMAssemblerInterface.cpp so they could be used as static initializers).
+*/
+
+
+#define LOG_TAG "MIPSAssembler"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cutils/log.h>
+#include <cutils/properties.h>
+
+#if defined(WITH_LIB_HARDWARE)
+#include <hardware_legacy/qemu_tracing.h>
+#endif
+
+#include <private/pixelflinger/ggl_context.h>
+
+#include "codeflinger/MIPSAssembler.h"
+#include "codeflinger/CodeCache.h"
+#include "codeflinger/mips_disassem.h"
+
+// Choose MIPS arch variant following gcc flags
+#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2
+#define mips32r2 1
+#else
+#define mips32r2 0
+#endif
+
+
+#define NOT_IMPLEMENTED()  LOG_ALWAYS_FATAL("Arm instruction %s not yet implemented\n", __func__)
+
+
+
+// ----------------------------------------------------------------------------
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+#if 0
+#pragma mark -
+#pragma mark ArmToMipsAssembler...
+#endif
+
+ArmToMipsAssembler::ArmToMipsAssembler(const sp<Assembly>& assembly,
+                                       char *abuf, int linesz, int instr_count)
+    :   ARMAssemblerInterface(),
+        mArmDisassemblyBuffer(abuf),
+        mArmLineLength(linesz),
+        mArmInstrCount(instr_count),
+        mInum(0),
+        mAssembly(assembly)
+{
+    mMips = new MIPSAssembler(assembly, this);
+    mArmPC = (uint32_t **) malloc(ARM_MAX_INSTUCTIONS * sizeof(uint32_t *));
+    init_conditional_labels();
+}
+
+ArmToMipsAssembler::~ArmToMipsAssembler()
+{
+    delete mMips;
+    free((void *) mArmPC);
+}
+
+uint32_t* ArmToMipsAssembler::pc() const
+{
+    return mMips->pc();
+}
+
+uint32_t* ArmToMipsAssembler::base() const
+{
+    return mMips->base();
+}
+
+void ArmToMipsAssembler::reset()
+{
+    cond.labelnum = 0;
+    mInum = 0;
+    mMips->reset();
+}
+
+int ArmToMipsAssembler::getCodegenArch()
+{
+    return CODEGEN_ARCH_MIPS;
+}
+
+void ArmToMipsAssembler::comment(const char* string)
+{
+    mMips->comment(string);
+}
+
+void ArmToMipsAssembler::label(const char* theLabel)
+{
+    mMips->label(theLabel);
+}
+
+void ArmToMipsAssembler::disassemble(const char* name)
+{
+    mMips->disassemble(name);
+}
+
+void ArmToMipsAssembler::init_conditional_labels()
+{
+    int i;
+    for (i=0;i<99; ++i) {
+        sprintf(cond.label[i], "cond_%d", i);
+    }
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Prolog/Epilog & Generate...
+#endif
+
+void ArmToMipsAssembler::prolog()
+{
+    mArmPC[mInum++] = pc();  // save starting PC for this instr
+
+    mMips->ADDIU(R_sp, R_sp, -(5 * 4));
+    mMips->SW(R_s0, R_sp, 0);
+    mMips->SW(R_s1, R_sp, 4);
+    mMips->SW(R_s2, R_sp, 8);
+    mMips->SW(R_s3, R_sp, 12);
+    mMips->SW(R_s4, R_sp, 16);
+    mMips->MOVE(R_v0, R_a0);    // move context * passed in a0 to v0 (arm r0)
+}
+
+void ArmToMipsAssembler::epilog(uint32_t touched)
+{
+    mArmPC[mInum++] = pc();  // save starting PC for this instr
+
+    mMips->LW(R_s0, R_sp, 0);
+    mMips->LW(R_s1, R_sp, 4);
+    mMips->LW(R_s2, R_sp, 8);
+    mMips->LW(R_s3, R_sp, 12);
+    mMips->LW(R_s4, R_sp, 16);
+    mMips->ADDIU(R_sp, R_sp, (5 * 4));
+    mMips->JR(R_ra);
+
+}
+
+int ArmToMipsAssembler::generate(const char* name)
+{
+    return mMips->generate(name);
+}
+
+uint32_t* ArmToMipsAssembler::pcForLabel(const char* label)
+{
+    return mMips->pcForLabel(label);
+}
+
+
+
+//----------------------------------------------------------
+
+#if 0
+#pragma mark -
+#pragma mark Addressing modes & shifters...
+#endif
+
+
+// do not need this for MIPS, but it is in the Interface (virtual)
+int ArmToMipsAssembler::buildImmediate(
+        uint32_t immediate, uint32_t& rot, uint32_t& imm)
+{
+    // for MIPS, any 32-bit immediate is OK
+    rot = 0;
+    imm = immediate;
+    return 0;
+}
+
+// shifters...
+
+bool ArmToMipsAssembler::isValidImmediate(uint32_t immediate)
+{
+    // for MIPS, any 32-bit immediate is OK
+    return true;
+}
+
+uint32_t ArmToMipsAssembler::imm(uint32_t immediate)
+{
+    // ALOGW("immediate value %08x at pc %08x\n", immediate, (int)pc());
+    amode.value = immediate;
+    return AMODE_IMM;
+}
+
+uint32_t ArmToMipsAssembler::reg_imm(int Rm, int type, uint32_t shift)
+{
+    amode.reg = Rm;
+    amode.stype = type;
+    amode.value = shift;
+    return AMODE_REG_IMM;
+}
+
+uint32_t ArmToMipsAssembler::reg_rrx(int Rm)
+{
+    // reg_rrx mode is not used in the GLLAssember code at this time
+    return AMODE_UNSUPPORTED;
+}
+
+uint32_t ArmToMipsAssembler::reg_reg(int Rm, int type, int Rs)
+{
+    // reg_reg mode is not used in the GLLAssember code at this time
+    return AMODE_UNSUPPORTED;
+}
+
+
+// addressing modes...
+// LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0)
+uint32_t ArmToMipsAssembler::immed12_pre(int32_t immed12, int W)
+{
+    LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
+                        "LDR(B)/STR(B)/PLD immediate too big (%08x)",
+                        immed12);
+    amode.value = immed12;
+    amode.writeback = W;
+    return AMODE_IMM_12_PRE;
+}
+
+uint32_t ArmToMipsAssembler::immed12_post(int32_t immed12)
+{
+    LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
+                        "LDR(B)/STR(B)/PLD immediate too big (%08x)",
+                        immed12);
+
+    amode.value = immed12;
+    return AMODE_IMM_12_POST;
+}
+
+uint32_t ArmToMipsAssembler::reg_scale_pre(int Rm, int type,
+        uint32_t shift, int W)
+{
+    LOG_ALWAYS_FATAL_IF(W | type | shift, "reg_scale_pre adv modes not yet implemented");
+
+    amode.reg = Rm;
+    // amode.stype = type;      // more advanced modes not used in GGLAssembler yet
+    // amode.value = shift;
+    // amode.writeback = W;
+    return AMODE_REG_SCALE_PRE;
+}
+
+uint32_t ArmToMipsAssembler::reg_scale_post(int Rm, int type, uint32_t shift)
+{
+    LOG_ALWAYS_FATAL("adr mode reg_scale_post not yet implemented\n");
+    return AMODE_UNSUPPORTED;
+}
+
+// LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0)
+uint32_t ArmToMipsAssembler::immed8_pre(int32_t immed8, int W)
+{
+    // uint32_t offset = abs(immed8);
+
+    LOG_ALWAYS_FATAL("adr mode immed8_pre not yet implemented\n");
+
+    LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
+                        "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
+                        immed8);
+    return AMODE_IMM_8_PRE;
+}
+
+uint32_t ArmToMipsAssembler::immed8_post(int32_t immed8)
+{
+    // uint32_t offset = abs(immed8);
+
+    LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
+                        "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
+                        immed8);
+    amode.value = immed8;
+    return AMODE_IMM_8_POST;
+}
+
+uint32_t ArmToMipsAssembler::reg_pre(int Rm, int W)
+{
+    LOG_ALWAYS_FATAL_IF(W, "reg_pre writeback not yet implemented");
+    amode.reg = Rm;
+    return AMODE_REG_PRE;
+}
+
+uint32_t ArmToMipsAssembler::reg_post(int Rm)
+{
+    LOG_ALWAYS_FATAL("adr mode reg_post not yet implemented\n");
+    return AMODE_UNSUPPORTED;
+}
+
+
+
+// ----------------------------------------------------------------------------
+
+#if 0
+#pragma mark -
+#pragma mark Data Processing...
+#endif
+
+
+static const char * const dpOpNames[] = {
+    "AND", "EOR", "SUB", "RSB", "ADD", "ADC", "SBC", "RSC",
+    "TST", "TEQ", "CMP", "CMN", "ORR", "MOV", "BIC", "MVN"
+};
+
+// check if the operand registers from a previous CMP or S-bit instruction
+// would be overwritten by this instruction. If so, move the value to a
+// safe register.
+// Note that we cannot tell at _this_ instruction time if a future (conditional)
+// instruction will _also_ use this value (a defect of the simple 1-pass, one-
+// instruction-at-a-time translation). Therefore we must be conservative and
+// save the value before it is overwritten. This costs an extra MOVE instr.
+
+void ArmToMipsAssembler::protectConditionalOperands(int Rd)
+{
+    if (Rd == cond.r1) {
+        mMips->MOVE(R_cmp, cond.r1);
+        cond.r1 = R_cmp;
+    }
+    if (cond.type == CMP_COND && Rd == cond.r2) {
+        mMips->MOVE(R_cmp2, cond.r2);
+        cond.r2 = R_cmp2;
+    }
+}
+
+
+// interprets the addressing mode, and generates the common code
+// used by the majority of data-processing ops. Many MIPS instructions
+// have a register-based form and a different immediate form. See
+// opAND below for an example. (this could be inlined)
+//
+// this works with the imm(), reg_imm() methods above, which are directly
+// called by the GLLAssembler.
+// note: _signed parameter defaults to false (un-signed)
+// note: tmpReg parameter defaults to 1, MIPS register AT
+int ArmToMipsAssembler::dataProcAdrModes(int op, int& source, bool _signed, int tmpReg)
+{
+    if (op < AMODE_REG) {
+        source = op;
+        return SRC_REG;
+    } else if (op == AMODE_IMM) {
+        if ((!_signed && amode.value > 0xffff)
+                || (_signed && ((int)amode.value < -32768 || (int)amode.value > 32767) )) {
+            mMips->LUI(tmpReg, (amode.value >> 16));
+            if (amode.value & 0x0000ffff) {
+                mMips->ORI(tmpReg, tmpReg, (amode.value & 0x0000ffff));
+            }
+            source = tmpReg;
+            return SRC_REG;
+        } else {
+            source = amode.value;
+            return SRC_IMM;
+        }
+    } else if (op == AMODE_REG_IMM) {
+        switch (amode.stype) {
+            case LSL: mMips->SLL(tmpReg, amode.reg, amode.value); break;
+            case LSR: mMips->SRL(tmpReg, amode.reg, amode.value); break;
+            case ASR: mMips->SRA(tmpReg, amode.reg, amode.value); break;
+            case ROR: if (mips32r2) {
+                          mMips->ROTR(tmpReg, amode.reg, amode.value);
+                      } else {
+                          mMips->RORIsyn(tmpReg, amode.reg, amode.value);
+                      }
+                      break;
+        }
+        source = tmpReg;
+        return SRC_REG;
+    } else {  // adr mode RRX is not used in GGL Assembler at this time
+        // we are screwed, this should be exception, assert-fail or something
+        LOG_ALWAYS_FATAL("adr mode reg_rrx not yet implemented\n");
+        return SRC_ERROR;
+    }
+}
+
+
+void ArmToMipsAssembler::dataProcessing(int opcode, int cc,
+        int s, int Rd, int Rn, uint32_t Op2)
+{
+    int src;    // src is modified by dataProcAdrModes() - passed as int&
+
+
+    if (cc != AL) {
+        protectConditionalOperands(Rd);
+        // the branch tests register(s) set by prev CMP or instr with 'S' bit set
+        // inverse the condition to jump past this conditional instruction
+        ArmToMipsAssembler::B(cc^1, cond.label[++cond.labelnum]);
+    } else {
+        mArmPC[mInum++] = pc();  // save starting PC for this instr
+    }
+
+    switch (opcode) {
+    case opAND:
+        if (dataProcAdrModes(Op2, src) == SRC_REG) {
+            mMips->AND(Rd, Rn, src);
+        } else {                        // adr mode was SRC_IMM
+            mMips->ANDI(Rd, Rn, src);
+        }
+        break;
+
+    case opADD:
+        // set "signed" to true for adr modes
+        if (dataProcAdrModes(Op2, src, true) == SRC_REG) {
+            mMips->ADDU(Rd, Rn, src);
+        } else {                        // adr mode was SRC_IMM
+            mMips->ADDIU(Rd, Rn, src);
+        }
+        break;
+
+    case opSUB:
+        // set "signed" to true for adr modes
+        if (dataProcAdrModes(Op2, src, true) == SRC_REG) {
+            mMips->SUBU(Rd, Rn, src);
+        } else {                        // adr mode was SRC_IMM
+            mMips->SUBIU(Rd, Rn, src);
+        }
+        break;
+
+    case opEOR:
+        if (dataProcAdrModes(Op2, src) == SRC_REG) {
+            mMips->XOR(Rd, Rn, src);
+        } else {                        // adr mode was SRC_IMM
+            mMips->XORI(Rd, Rn, src);
+        }
+        break;
+
+    case opORR:
+        if (dataProcAdrModes(Op2, src) == SRC_REG) {
+            mMips->OR(Rd, Rn, src);
+        } else {                        // adr mode was SRC_IMM
+            mMips->ORI(Rd, Rn, src);
+        }
+        break;
+
+    case opBIC:
+        if (dataProcAdrModes(Op2, src) == SRC_IMM) {
+            // if we are 16-bit imnmediate, load to AT reg
+            mMips->ORI(R_at, 0, src);
+            src = R_at;
+        }
+        mMips->NOT(R_at, src);
+        mMips->AND(Rd, Rn, R_at);
+        break;
+
+    case opRSB:
+        if (dataProcAdrModes(Op2, src) == SRC_IMM) {
+            // if we are 16-bit imnmediate, load to AT reg
+            mMips->ORI(R_at, 0, src);
+            src = R_at;
+        }
+        mMips->SUBU(Rd, src, Rn);   // subu with the parameters reversed
+        break;
+
+    case opMOV:
+        if (Op2 < AMODE_REG) {  // op2 is reg # in this case
+            mMips->MOVE(Rd, Op2);
+        } else if (Op2 == AMODE_IMM) {
+            if (amode.value > 0xffff) {
+                mMips->LUI(Rd, (amode.value >> 16));
+                if (amode.value & 0x0000ffff) {
+                    mMips->ORI(Rd, Rd, (amode.value & 0x0000ffff));
+                }
+             } else {
+                mMips->ORI(Rd, 0, amode.value);
+            }
+        } else if (Op2 == AMODE_REG_IMM) {
+            switch (amode.stype) {
+            case LSL: mMips->SLL(Rd, amode.reg, amode.value); break;
+            case LSR: mMips->SRL(Rd, amode.reg, amode.value); break;
+            case ASR: mMips->SRA(Rd, amode.reg, amode.value); break;
+            case ROR: if (mips32r2) {
+                          mMips->ROTR(Rd, amode.reg, amode.value);
+                      } else {
+                          mMips->RORIsyn(Rd, amode.reg, amode.value);
+                      }
+                      break;
+            }
+        }
+        else {
+            // adr mode RRX is not used in GGL Assembler at this time
+            mMips->UNIMPL();
+        }
+        break;
+
+    case opMVN:     // this is a 1's complement: NOT
+        if (Op2 < AMODE_REG) {  // op2 is reg # in this case
+            mMips->NOR(Rd, Op2, 0);     // NOT is NOR with 0
+            break;
+        } else if (Op2 == AMODE_IMM) {
+            if (amode.value > 0xffff) {
+                mMips->LUI(Rd, (amode.value >> 16));
+                if (amode.value & 0x0000ffff) {
+                    mMips->ORI(Rd, Rd, (amode.value & 0x0000ffff));
+                }
+             } else {
+                mMips->ORI(Rd, 0, amode.value);
+             }
+        } else if (Op2 == AMODE_REG_IMM) {
+            switch (amode.stype) {
+            case LSL: mMips->SLL(Rd, amode.reg, amode.value); break;
+            case LSR: mMips->SRL(Rd, amode.reg, amode.value); break;
+            case ASR: mMips->SRA(Rd, amode.reg, amode.value); break;
+            case ROR: if (mips32r2) {
+                          mMips->ROTR(Rd, amode.reg, amode.value);
+                      } else {
+                          mMips->RORIsyn(Rd, amode.reg, amode.value);
+                      }
+                      break;
+            }
+        }
+        else {
+            // adr mode RRX is not used in GGL Assembler at this time
+            mMips->UNIMPL();
+        }
+        mMips->NOR(Rd, Rd, 0);     // NOT is NOR with 0
+        break;
+
+    case opCMP:
+        // Either operand of a CMP instr could get overwritten by a subsequent
+        // conditional instruction, which is ok, _UNLESS_ there is a _second_
+        // conditional instruction. Under MIPS, this requires doing the comparison
+        // again (SLT), and the original operands must be available. (and this
+        // pattern of multiple conditional instructions from same CMP _is_ used
+        // in GGL-Assembler)
+        //
+        // For now, if a conditional instr overwrites the operands, we will
+        // move them to dedicated temp regs. This is ugly, and inefficient,
+        // and should be optimized.
+        //
+        // WARNING: making an _Assumption_ that CMP operand regs will NOT be
+        // trashed by intervening NON-conditional instructions. In the general
+        // case this is legal, but it is NOT currently done in GGL-Assembler.
+
+        cond.type = CMP_COND;
+        cond.r1 = Rn;
+        if (dataProcAdrModes(Op2, src, false, R_cmp2) == SRC_REG) {
+            cond.r2 = src;
+        } else {                        // adr mode was SRC_IMM
+            mMips->ORI(R_cmp2, R_zero, src);
+            cond.r2 = R_cmp2;
+        }
+
+        break;
+
+
+    case opTST:
+    case opTEQ:
+    case opCMN:
+    case opADC:
+    case opSBC:
+    case opRSC:
+        mMips->UNIMPL(); // currently unused in GGL Assembler code
+        break;
+    }
+
+    if (cc != AL) {
+        mMips->label(cond.label[cond.labelnum]);
+    }
+    if (s && opcode != opCMP) {
+        cond.type = SBIT_COND;
+        cond.r1 = Rd;
+    }
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Multiply...
+#endif
+
+// multiply, accumulate
+void ArmToMipsAssembler::MLA(int cc, int s,
+        int Rd, int Rm, int Rs, int Rn) {
+
+    mArmPC[mInum++] = pc();  // save starting PC for this instr
+
+    mMips->MUL(R_at, Rm, Rs);
+    mMips->ADDU(Rd, R_at, Rn);
+    if (s) {
+        cond.type = SBIT_COND;
+        cond.r1 = Rd;
+    }
+}
+
+void ArmToMipsAssembler::MUL(int cc, int s,
+        int Rd, int Rm, int Rs) {
+    mArmPC[mInum++] = pc();
+    mMips->MUL(Rd, Rm, Rs);
+    if (s) {
+        cond.type = SBIT_COND;
+        cond.r1 = Rd;
+    }
+}
+
+void ArmToMipsAssembler::UMULL(int cc, int s,
+        int RdLo, int RdHi, int Rm, int Rs) {
+    mArmPC[mInum++] = pc();
+    mMips->MULT(Rm, Rs);
+    mMips->MFHI(RdHi);
+    mMips->MFLO(RdLo);
+    if (s) {
+        cond.type = SBIT_COND;
+        cond.r1 = RdHi;     // BUG...
+        LOG_ALWAYS_FATAL("Condition on UMULL must be on 64-bit result\n");
+    }
+}
+
+void ArmToMipsAssembler::UMUAL(int cc, int s,
+        int RdLo, int RdHi, int Rm, int Rs) {
+    LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
+                        "UMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
+    // *mPC++ =    (cc<<28) | (1<<23) | (1<<21) | (s<<20) |
+    //             (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+    if (s) {
+        cond.type = SBIT_COND;
+        cond.r1 = RdHi;     // BUG...
+        LOG_ALWAYS_FATAL("Condition on UMULL must be on 64-bit result\n");
+    }
+}
+
+void ArmToMipsAssembler::SMULL(int cc, int s,
+        int RdLo, int RdHi, int Rm, int Rs) {
+    LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
+                        "SMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
+    // *mPC++ =    (cc<<28) | (1<<23) | (1<<22) | (s<<20) |
+    //             (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+    if (s) {
+        cond.type = SBIT_COND;
+        cond.r1 = RdHi;     // BUG...
+        LOG_ALWAYS_FATAL("Condition on SMULL must be on 64-bit result\n");
+    }
+}
+void ArmToMipsAssembler::SMUAL(int cc, int s,
+        int RdLo, int RdHi, int Rm, int Rs) {
+    LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
+                        "SMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
+    // *mPC++ =    (cc<<28) | (1<<23) | (1<<22) | (1<<21) | (s<<20) |
+    //             (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+    if (s) {
+        cond.type = SBIT_COND;
+        cond.r1 = RdHi;     // BUG...
+        LOG_ALWAYS_FATAL("Condition on SMUAL must be on 64-bit result\n");
+    }
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Branches...
+#endif
+
+// branches...
+
+void ArmToMipsAssembler::B(int cc, const char* label)
+{
+    mArmPC[mInum++] = pc();
+    if (cond.type == SBIT_COND) { cond.r2 = R_zero; }
+
+    switch(cc) {
+        case EQ: mMips->BEQ(cond.r1, cond.r2, label); break;
+        case NE: mMips->BNE(cond.r1, cond.r2, label); break;
+        case HS: mMips->BGEU(cond.r1, cond.r2, label); break;
+        case LO: mMips->BLTU(cond.r1, cond.r2, label); break;
+        case MI: mMips->BLT(cond.r1, cond.r2, label); break;
+        case PL: mMips->BGE(cond.r1, cond.r2, label); break;
+
+        case HI: mMips->BGTU(cond.r1, cond.r2, label); break;
+        case LS: mMips->BLEU(cond.r1, cond.r2, label); break;
+        case GE: mMips->BGE(cond.r1, cond.r2, label); break;
+        case LT: mMips->BLT(cond.r1, cond.r2, label); break;
+        case GT: mMips->BGT(cond.r1, cond.r2, label); break;
+        case LE: mMips->BLE(cond.r1, cond.r2, label); break;
+        case AL: mMips->B(label); break;
+        case NV: /* B Never - no instruction */ break;
+
+        case VS:
+        case VC:
+        default:
+            LOG_ALWAYS_FATAL("Unsupported cc: %02x\n", cc);
+            break;
+    }
+}
+
+void ArmToMipsAssembler::BL(int cc, const char* label)
+{
+    LOG_ALWAYS_FATAL("branch-and-link not supported yet\n");
+    mArmPC[mInum++] = pc();
+}
+
+// no use for Branches with integer PC, but they're in the Interface class ....
+void ArmToMipsAssembler::B(int cc, uint32_t* to_pc)
+{
+    LOG_ALWAYS_FATAL("branch to absolute PC not supported, use Label\n");
+    mArmPC[mInum++] = pc();
+}
+
+void ArmToMipsAssembler::BL(int cc, uint32_t* to_pc)
+{
+    LOG_ALWAYS_FATAL("branch to absolute PC not supported, use Label\n");
+    mArmPC[mInum++] = pc();
+}
+
+void ArmToMipsAssembler::BX(int cc, int Rn)
+{
+    LOG_ALWAYS_FATAL("branch to absolute PC not supported, use Label\n");
+    mArmPC[mInum++] = pc();
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Data Transfer...
+#endif
+
+// data transfer...
+void ArmToMipsAssembler::LDR(int cc, int Rd, int Rn, uint32_t offset)
+{
+    mArmPC[mInum++] = pc();
+    // work-around for ARM default address mode of immed12_pre(0)
+    if (offset > AMODE_UNSUPPORTED) offset = 0;
+    switch (offset) {
+        case 0:
+            amode.value = 0;
+            amode.writeback = 0;
+            // fall thru to next case ....
+        case AMODE_IMM_12_PRE:
+            if (Rn == ARMAssemblerInterface::SP) {
+                Rn = R_sp;      // convert LDR via Arm SP to LW via Mips SP
+            }
+            mMips->LW(Rd, Rn, amode.value);
+            if (amode.writeback) {      // OPTIONAL writeback on pre-index mode
+                mMips->ADDIU(Rn, Rn, amode.value);
+            }
+            break;
+        case AMODE_IMM_12_POST:
+            if (Rn == ARMAssemblerInterface::SP) {
+                Rn = R_sp;      // convert STR thru Arm SP to STR thru Mips SP
+            }
+            mMips->LW(Rd, Rn, 0);
+            mMips->ADDIU(Rn, Rn, amode.value);
+            break;
+        case AMODE_REG_SCALE_PRE:
+            // we only support simple base + index, no advanced modes for this one yet
+            mMips->ADDU(R_at, Rn, amode.reg);
+            mMips->LW(Rd, R_at, 0);
+            break;
+    }
+}
+
+void ArmToMipsAssembler::LDRB(int cc, int Rd, int Rn, uint32_t offset)
+{
+    mArmPC[mInum++] = pc();
+    // work-around for ARM default address mode of immed12_pre(0)
+    if (offset > AMODE_UNSUPPORTED) offset = 0;
+    switch (offset) {
+        case 0:
+            amode.value = 0;
+            amode.writeback = 0;
+            // fall thru to next case ....
+        case AMODE_IMM_12_PRE:
+            mMips->LBU(Rd, Rn, amode.value);
+            if (amode.writeback) {      // OPTIONAL writeback on pre-index mode
+                mMips->ADDIU(Rn, Rn, amode.value);
+            }
+            break;
+        case AMODE_IMM_12_POST:
+            mMips->LBU(Rd, Rn, 0);
+            mMips->ADDIU(Rn, Rn, amode.value);
+            break;
+        case AMODE_REG_SCALE_PRE:
+            // we only support simple base + index, no advanced modes for this one yet
+            mMips->ADDU(R_at, Rn, amode.reg);
+            mMips->LBU(Rd, R_at, 0);
+            break;
+    }
+
+}
+
+void ArmToMipsAssembler::STR(int cc, int Rd, int Rn, uint32_t offset)
+{
+    mArmPC[mInum++] = pc();
+    // work-around for ARM default address mode of immed12_pre(0)
+    if (offset > AMODE_UNSUPPORTED) offset = 0;
+    switch (offset) {
+        case 0:
+            amode.value = 0;
+            amode.writeback = 0;
+            // fall thru to next case ....
+        case AMODE_IMM_12_PRE:
+            if (Rn == ARMAssemblerInterface::SP) {
+                Rn = R_sp;  // convert STR thru Arm SP to SW thru Mips SP
+            }
+            if (amode.writeback) {      // OPTIONAL writeback on pre-index mode
+                // If we will writeback, then update the index reg, then store.
+                // This correctly handles stack-push case.
+                mMips->ADDIU(Rn, Rn, amode.value);
+                mMips->SW(Rd, Rn, 0);
+            } else {
+                // No writeback so store offset by value
+                mMips->SW(Rd, Rn, amode.value);
+            }
+            break;
+        case AMODE_IMM_12_POST:
+            mMips->SW(Rd, Rn, 0);
+            mMips->ADDIU(Rn, Rn, amode.value);  // post index always writes back
+            break;
+        case AMODE_REG_SCALE_PRE:
+            // we only support simple base + index, no advanced modes for this one yet
+            mMips->ADDU(R_at, Rn, amode.reg);
+            mMips->SW(Rd, R_at, 0);
+            break;
+    }
+}
+
+void ArmToMipsAssembler::STRB(int cc, int Rd, int Rn, uint32_t offset)
+{
+    mArmPC[mInum++] = pc();
+    // work-around for ARM default address mode of immed12_pre(0)
+    if (offset > AMODE_UNSUPPORTED) offset = 0;
+    switch (offset) {
+        case 0:
+            amode.value = 0;
+            amode.writeback = 0;
+            // fall thru to next case ....
+        case AMODE_IMM_12_PRE:
+            mMips->SB(Rd, Rn, amode.value);
+            if (amode.writeback) {      // OPTIONAL writeback on pre-index mode
+                mMips->ADDIU(Rn, Rn, amode.value);
+            }
+            break;
+        case AMODE_IMM_12_POST:
+            mMips->SB(Rd, Rn, 0);
+            mMips->ADDIU(Rn, Rn, amode.value);
+            break;
+        case AMODE_REG_SCALE_PRE:
+            // we only support simple base + index, no advanced modes for this one yet
+            mMips->ADDU(R_at, Rn, amode.reg);
+            mMips->SB(Rd, R_at, 0);
+            break;
+    }
+}
+
+void ArmToMipsAssembler::LDRH(int cc, int Rd, int Rn, uint32_t offset)
+{
+    mArmPC[mInum++] = pc();
+    // work-around for ARM default address mode of immed8_pre(0)
+    if (offset > AMODE_UNSUPPORTED) offset = 0;
+    switch (offset) {
+        case 0:
+            amode.value = 0;
+            // fall thru to next case ....
+        case AMODE_IMM_8_PRE:      // no support yet for writeback
+            mMips->LHU(Rd, Rn, amode.value);
+            break;
+        case AMODE_IMM_8_POST:
+            mMips->LHU(Rd, Rn, 0);
+            mMips->ADDIU(Rn, Rn, amode.value);
+            break;
+        case AMODE_REG_PRE:
+            // we only support simple base +/- index
+            if (amode.reg >= 0) {
+                mMips->ADDU(R_at, Rn, amode.reg);
+            } else {
+                mMips->SUBU(R_at, Rn, abs(amode.reg));
+            }
+            mMips->LHU(Rd, R_at, 0);
+            break;
+    }
+}
+
+void ArmToMipsAssembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset)
+{
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+void ArmToMipsAssembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset)
+{
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+void ArmToMipsAssembler::STRH(int cc, int Rd, int Rn, uint32_t offset)
+{
+    mArmPC[mInum++] = pc();
+    // work-around for ARM default address mode of immed8_pre(0)
+    if (offset > AMODE_UNSUPPORTED) offset = 0;
+    switch (offset) {
+        case 0:
+            amode.value = 0;
+            // fall thru to next case ....
+        case AMODE_IMM_8_PRE:      // no support yet for writeback
+            mMips->SH(Rd, Rn, amode.value);
+            break;
+        case AMODE_IMM_8_POST:
+            mMips->SH(Rd, Rn, 0);
+            mMips->ADDIU(Rn, Rn, amode.value);
+            break;
+        case AMODE_REG_PRE:
+            // we only support simple base +/- index
+            if (amode.reg >= 0) {
+                mMips->ADDU(R_at, Rn, amode.reg);
+            } else {
+                mMips->SUBU(R_at, Rn, abs(amode.reg));
+            }
+            mMips->SH(Rd, R_at, 0);
+            break;
+    }
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Block Data Transfer...
+#endif
+
+// block data transfer...
+void ArmToMipsAssembler::LDM(int cc, int dir,
+        int Rn, int W, uint32_t reg_list)
+{   //                        ED FD EA FA      IB IA DB DA
+    // const uint8_t P[8] = { 1, 0, 1, 0,      1, 0, 1, 0 };
+    // const uint8_t U[8] = { 1, 1, 0, 0,      1, 1, 0, 0 };
+    // *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
+    //         (uint32_t(U[dir])<<23) | (1<<20) | (W<<21) | (Rn<<16) | reg_list;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+void ArmToMipsAssembler::STM(int cc, int dir,
+        int Rn, int W, uint32_t reg_list)
+{   //                        FA EA FD ED      IB IA DB DA
+    // const uint8_t P[8] = { 0, 1, 0, 1,      1, 0, 1, 0 };
+    // const uint8_t U[8] = { 0, 0, 1, 1,      1, 1, 0, 0 };
+    // *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
+    //         (uint32_t(U[dir])<<23) | (0<<20) | (W<<21) | (Rn<<16) | reg_list;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Special...
+#endif
+
+// special...
+void ArmToMipsAssembler::SWP(int cc, int Rn, int Rd, int Rm) {
+    // *mPC++ = (cc<<28) | (2<<23) | (Rn<<16) | (Rd << 12) | 0x90 | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+void ArmToMipsAssembler::SWPB(int cc, int Rn, int Rd, int Rm) {
+    // *mPC++ = (cc<<28) | (2<<23) | (1<<22) | (Rn<<16) | (Rd << 12) | 0x90 | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+void ArmToMipsAssembler::SWI(int cc, uint32_t comment) {
+    // *mPC++ = (cc<<28) | (0xF<<24) | comment;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+
+#if 0
+#pragma mark -
+#pragma mark DSP instructions...
+#endif
+
+// DSP instructions...
+void ArmToMipsAssembler::PLD(int Rn, uint32_t offset) {
+    LOG_ALWAYS_FATAL_IF(!((offset&(1<<24)) && !(offset&(1<<21))),
+                        "PLD only P=1, W=0");
+    // *mPC++ = 0xF550F000 | (Rn<<16) | offset;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+void ArmToMipsAssembler::CLZ(int cc, int Rd, int Rm)
+{
+    mArmPC[mInum++] = pc();
+    mMips->CLZ(Rd, Rm);
+}
+
+void ArmToMipsAssembler::QADD(int cc,  int Rd, int Rm, int Rn)
+{
+    // *mPC++ = (cc<<28) | 0x1000050 | (Rn<<16) | (Rd<<12) | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+void ArmToMipsAssembler::QDADD(int cc,  int Rd, int Rm, int Rn)
+{
+    // *mPC++ = (cc<<28) | 0x1400050 | (Rn<<16) | (Rd<<12) | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+void ArmToMipsAssembler::QSUB(int cc,  int Rd, int Rm, int Rn)
+{
+    // *mPC++ = (cc<<28) | 0x1200050 | (Rn<<16) | (Rd<<12) | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+void ArmToMipsAssembler::QDSUB(int cc,  int Rd, int Rm, int Rn)
+{
+    // *mPC++ = (cc<<28) | 0x1600050 | (Rn<<16) | (Rd<<12) | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+// 16 x 16 signed multiply (like SMLAxx without the accumulate)
+void ArmToMipsAssembler::SMUL(int cc, int xy,
+                int Rd, int Rm, int Rs)
+{
+    mArmPC[mInum++] = pc();
+
+    // the 16 bits may be in the top or bottom half of 32-bit source reg,
+    // as defined by the codes BB, BT, TB, TT (compressed param xy)
+    // where x corresponds to Rm and y to Rs
+
+    // select half-reg for Rm
+    if (xy & xyTB) {
+        // use top 16-bits
+        mMips->SRA(R_at, Rm, 16);
+    } else {
+        // use bottom 16, but sign-extend to 32
+        if (mips32r2) {
+            mMips->SEH(R_at, Rm);
+        } else {
+            mMips->SLL(R_at, Rm, 16);
+            mMips->SRA(R_at, R_at, 16);
+        }
+    }
+    // select half-reg for Rs
+    if (xy & xyBT) {
+        // use top 16-bits
+        mMips->SRA(R_at2, Rs, 16);
+    } else {
+        // use bottom 16, but sign-extend to 32
+        if (mips32r2) {
+            mMips->SEH(R_at2, Rs);
+        } else {
+            mMips->SLL(R_at2, Rs, 16);
+            mMips->SRA(R_at2, R_at2, 16);
+        }
+    }
+    mMips->MUL(Rd, R_at, R_at2);
+}
+
+// signed 32b x 16b multiple, save top 32-bits of 48-bit result
+void ArmToMipsAssembler::SMULW(int cc, int y,
+                int Rd, int Rm, int Rs)
+{
+    mArmPC[mInum++] = pc();
+
+    // the selector yT or yB refers to reg Rs
+    if (y & yT) {
+        // zero the bottom 16-bits, with 2 shifts, it can affect result
+        mMips->SRL(R_at, Rs, 16);
+        mMips->SLL(R_at, R_at, 16);
+
+    } else {
+        // move low 16-bit half, to high half
+        mMips->SLL(R_at, Rs, 16);
+    }
+    mMips->MULT(Rm, R_at);
+    mMips->MFHI(Rd);
+}
+
+// 16 x 16 signed multiply, accumulate: Rd = Rm{16} * Rs{16} + Rn
+void ArmToMipsAssembler::SMLA(int cc, int xy,
+                int Rd, int Rm, int Rs, int Rn)
+{
+    mArmPC[mInum++] = pc();
+
+    // the 16 bits may be in the top or bottom half of 32-bit source reg,
+    // as defined by the codes BB, BT, TB, TT (compressed param xy)
+    // where x corresponds to Rm and y to Rs
+
+    // select half-reg for Rm
+    if (xy & xyTB) {
+        // use top 16-bits
+        mMips->SRA(R_at, Rm, 16);
+    } else {
+        // use bottom 16, but sign-extend to 32
+        if (mips32r2) {
+            mMips->SEH(R_at, Rm);
+        } else {
+            mMips->SLL(R_at, Rm, 16);
+            mMips->SRA(R_at, R_at, 16);
+        }
+    }
+    // select half-reg for Rs
+    if (xy & xyBT) {
+        // use top 16-bits
+        mMips->SRA(R_at2, Rs, 16);
+    } else {
+        // use bottom 16, but sign-extend to 32
+        if (mips32r2) {
+            mMips->SEH(R_at2, Rs);
+        } else {
+            mMips->SLL(R_at2, Rs, 16);
+            mMips->SRA(R_at2, R_at2, 16);
+        }
+    }
+
+    mMips->MUL(R_at, R_at, R_at2);
+    mMips->ADDU(Rd, R_at, Rn);
+}
+
+void ArmToMipsAssembler::SMLAL(int cc, int xy,
+                int RdHi, int RdLo, int Rs, int Rm)
+{
+    // *mPC++ = (cc<<28) | 0x1400080 | (RdHi<<16) | (RdLo<<12) | (Rs<<8) | (xy<<4) | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+void ArmToMipsAssembler::SMLAW(int cc, int y,
+                int Rd, int Rm, int Rs, int Rn)
+{
+    // *mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm;
+    mArmPC[mInum++] = pc();
+    mMips->NOP2();
+    NOT_IMPLEMENTED();
+}
+
+// used by ARMv6 version of GGLAssembler::filter32
+void ArmToMipsAssembler::UXTB16(int cc, int Rd, int Rm, int rotate)
+{
+    mArmPC[mInum++] = pc();
+
+    //Rd[31:16] := ZeroExtend((Rm ROR (8 * sh))[23:16]),
+    //Rd[15:0] := ZeroExtend((Rm ROR (8 * sh))[7:0]). sh 0-3.
+
+    mMips->ROTR(Rm, Rm, rotate * 8);
+    mMips->AND(Rd, Rm, 0x00FF00FF);
+}
+
+void ArmToMipsAssembler::UBFX(int cc, int Rd, int Rn, int lsb, int width)
+{
+     /* Placeholder for UBFX */
+     mArmPC[mInum++] = pc();
+
+     mMips->NOP2();
+     NOT_IMPLEMENTED();
+}
+
+
+
+
+
+#if 0
+#pragma mark -
+#pragma mark MIPS Assembler...
+#endif
+
+
+//**************************************************************************
+//**************************************************************************
+//**************************************************************************
+
+
+/* mips assembler
+** this is a subset of mips32r2, targeted specifically at ARM instruction
+** replacement in the pixelflinger/codeflinger code.
+**
+** To that end, there is no need for floating point, or priviledged
+** instructions. This all runs in user space, no float.
+**
+** The syntax makes no attempt to be as complete as the assember, with
+** synthetic instructions, and automatic recognition of immedate operands
+** (use the immediate form of the instruction), etc.
+**
+** We start with mips32r1, and may add r2 and dsp extensions if cpu
+** supports. Decision will be made at compile time, based on gcc
+** options. (makes sense since android will be built for a a specific
+** device)
+*/
+
+MIPSAssembler::MIPSAssembler(const sp<Assembly>& assembly, ArmToMipsAssembler *parent)
+    : mParent(parent),
+    mAssembly(assembly)
+{
+    mBase = mPC = (uint32_t *)assembly->base();
+    mDuration = ggl_system_time();
+}
+
+MIPSAssembler::~MIPSAssembler()
+{
+}
+
+
+uint32_t* MIPSAssembler::pc() const
+{
+    return mPC;
+}
+
+uint32_t* MIPSAssembler::base() const
+{
+    return mBase;
+}
+
+void MIPSAssembler::reset()
+{
+    mBase = mPC = (uint32_t *)mAssembly->base();
+    mBranchTargets.clear();
+    mLabels.clear();
+    mLabelsInverseMapping.clear();
+    mComments.clear();
+}
+
+
+// convert tabs to spaces, and remove any newline
+// works with strings of limited size (makes a temp copy)
+#define TABSTOP 8
+void MIPSAssembler::string_detab(char *s)
+{
+    char *os = s;
+    char temp[100];
+    char *t = temp;
+    int len = 99;
+    int i = TABSTOP;
+
+    while (*s && len-- > 0) {
+        if (*s == '\n') { s++; continue; }
+        if (*s == '\t') {
+            s++;
+            for ( ; i>0; i--) {*t++ = ' '; len--; }
+        } else {
+            *t++ = *s++;
+        }
+        if (i <= 0) i = TABSTOP;
+        i--;
+    }
+    *t = '\0';
+    strcpy(os, temp);
+}
+
+void MIPSAssembler::string_pad(char *s, int padded_len)
+{
+    int len = strlen(s);
+    s += len;
+    for (int i = padded_len - len; i > 0; --i) {
+        *s++ = ' ';
+    }
+    *s = '\0';
+}
+
+// ----------------------------------------------------------------------------
+
+void MIPSAssembler::disassemble(const char* name)
+{
+    char di_buf[140];
+
+    if (name) {
+        ALOGW("%s:\n", name);
+    }
+
+    bool arm_disasm_fmt = (mParent->mArmDisassemblyBuffer == NULL) ? false : true;
+
+    typedef char dstr[40];
+    dstr *lines = (dstr *)mParent->mArmDisassemblyBuffer;
+
+    if (mParent->mArmDisassemblyBuffer != NULL) {
+        for (int i=0; i<mParent->mArmInstrCount; ++i) {
+            string_detab(lines[i]);
+        }
+    }
+
+    // iArm is an index to Arm instructions 1...n for this assembly sequence
+    // mArmPC[iArm] holds the value of the Mips-PC for the first MIPS
+    // instruction corresponding to that Arm instruction number
+
+    int iArm = 0;
+    size_t count = pc()-base();
+    uint32_t* mipsPC = base();
+    while (count--) {
+        ssize_t label = mLabelsInverseMapping.indexOfKey(mipsPC);
+        if (label >= 0) {
+            ALOGW("%s:\n", mLabelsInverseMapping.valueAt(label));
+        }
+        ssize_t comment = mComments.indexOfKey(mipsPC);
+        if (comment >= 0) {
+            ALOGW("; %s\n", mComments.valueAt(comment));
+        }
+        // ALOGW("%08x:    %08x    ", int(i), int(i[0]));
+        ::mips_disassem(mipsPC, di_buf, arm_disasm_fmt);
+        string_detab(di_buf);
+        string_pad(di_buf, 30);
+        ALOGW("%08x:    %08x    %s", uint32_t(mipsPC), uint32_t(*mipsPC), di_buf);
+        mipsPC++;
+    }
+}
+
+void MIPSAssembler::comment(const char* string)
+{
+    mComments.add(pc(), string);
+}
+
+void MIPSAssembler::label(const char* theLabel)
+{
+    mLabels.add(theLabel, pc());
+    mLabelsInverseMapping.add(pc(), theLabel);
+}
+
+
+void MIPSAssembler::prolog()
+{
+    // empty - done in ArmToMipsAssembler
+}
+
+void MIPSAssembler::epilog(uint32_t touched)
+{
+    // empty - done in ArmToMipsAssembler
+}
+
+int MIPSAssembler::generate(const char* name)
+{
+    // fixup all the branches
+    size_t count = mBranchTargets.size();
+    while (count--) {
+        const branch_target_t& bt = mBranchTargets[count];
+        uint32_t* target_pc = mLabels.valueFor(bt.label);
+        LOG_ALWAYS_FATAL_IF(!target_pc,
+                "error resolving branch targets, target_pc is null");
+        int32_t offset = int32_t(target_pc - (bt.pc+1));
+        *bt.pc |= offset & 0x00FFFF;
+    }
+
+    mAssembly->resize( int(pc()-base())*4 );
+
+    // the instruction & data caches are flushed by CodeCache
+    const int64_t duration = ggl_system_time() - mDuration;
+    const char * const format = "generated %s (%d ins) at [%p:%p] in %lld ns\n";
+    ALOGI(format, name, int(pc()-base()), base(), pc(), duration);
+
+#if defined(WITH_LIB_HARDWARE)
+    if (__builtin_expect(mQemuTracing, 0)) {
+        int err = qemu_add_mapping(int(base()), name);
+        mQemuTracing = (err >= 0);
+    }
+#endif
+
+    char value[PROPERTY_VALUE_MAX];
+    value[0] = '\0';
+
+    property_get("debug.pf.disasm", value, "0");
+
+    if (atoi(value) != 0) {
+        disassemble(name);
+    }
+
+    return NO_ERROR;
+}
+
+uint32_t* MIPSAssembler::pcForLabel(const char* label)
+{
+    return mLabels.valueFor(label);
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Arithmetic...
+#endif
+
+void MIPSAssembler::ADDU(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (addu_fn<<FUNC_SHF)
+                    | (Rs<<RS_SHF) | (Rt<<RT_SHF) | (Rd<<RD_SHF);
+}
+
+// MD00086 pdf says this is: ADDIU rt, rs, imm -- they do not use Rd
+void MIPSAssembler::ADDIU(int Rt, int Rs, int16_t imm)
+{
+    *mPC++ = (addiu_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16);
+}
+
+
+void MIPSAssembler::SUBU(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (subu_fn<<FUNC_SHF) |
+                        (Rs<<RS_SHF) | (Rt<<RT_SHF) | (Rd<<RD_SHF) ;
+}
+
+
+void MIPSAssembler::SUBIU(int Rt, int Rs, int16_t imm)   // really addiu(d, s, -j)
+{
+    *mPC++ = (addiu_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | ((-imm) & MSK_16);
+}
+
+
+void MIPSAssembler::NEGU(int Rd, int Rs)    // really subu(d, zero, s)
+{
+    MIPSAssembler::SUBU(Rd, 0, Rs);
+}
+
+void MIPSAssembler::MUL(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec2_op<<OP_SHF) | (mul_fn<<FUNC_SHF) |
+                        (Rs<<RS_SHF) | (Rt<<RT_SHF) | (Rd<<RD_SHF) ;
+}
+
+void MIPSAssembler::MULT(int Rs, int Rt)    // dest is hi,lo
+{
+    *mPC++ = (spec_op<<OP_SHF) | (mult_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF);
+}
+
+void MIPSAssembler::MULTU(int Rs, int Rt)    // dest is hi,lo
+{
+    *mPC++ = (spec_op<<OP_SHF) | (multu_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF);
+}
+
+void MIPSAssembler::MADD(int Rs, int Rt)    // hi,lo = hi,lo + Rs * Rt
+{
+    *mPC++ = (spec2_op<<OP_SHF) | (madd_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF);
+}
+
+void MIPSAssembler::MADDU(int Rs, int Rt)    // hi,lo = hi,lo + Rs * Rt
+{
+    *mPC++ = (spec2_op<<OP_SHF) | (maddu_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF);
+}
+
+
+void MIPSAssembler::MSUB(int Rs, int Rt)    // hi,lo = hi,lo - Rs * Rt
+{
+    *mPC++ = (spec2_op<<OP_SHF) | (msub_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF);
+}
+
+void MIPSAssembler::MSUBU(int Rs, int Rt)    // hi,lo = hi,lo - Rs * Rt
+{
+    *mPC++ = (spec2_op<<OP_SHF) | (msubu_fn<<FUNC_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF);
+}
+
+
+void MIPSAssembler::SEB(int Rd, int Rt)    // sign-extend byte (mips32r2)
+{
+    *mPC++ = (spec3_op<<OP_SHF) | (bshfl_fn<<FUNC_SHF) | (seb_fn << SA_SHF) |
+                    (Rt<<RT_SHF) | (Rd<<RD_SHF);
+}
+
+void MIPSAssembler::SEH(int Rd, int Rt)    // sign-extend half-word (mips32r2)
+{
+    *mPC++ = (spec3_op<<OP_SHF) | (bshfl_fn<<FUNC_SHF) | (seh_fn << SA_SHF) |
+                    (Rt<<RT_SHF) | (Rd<<RD_SHF);
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Comparisons...
+#endif
+
+void MIPSAssembler::SLT(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (slt_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::SLTI(int Rt, int Rs, int16_t imm)
+{
+    *mPC++ = (slti_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16);
+}
+
+
+void MIPSAssembler::SLTU(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (sltu_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::SLTIU(int Rt, int Rs, int16_t imm)
+{
+    *mPC++ = (sltiu_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16);
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Logical...
+#endif
+
+void MIPSAssembler::AND(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (and_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::ANDI(int Rt, int Rs, uint16_t imm)      // todo: support larger immediate
+{
+    *mPC++ = (andi_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16);
+}
+
+
+void MIPSAssembler::OR(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (or_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::ORI(int Rt, int Rs, uint16_t imm)
+{
+    *mPC++ = (ori_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16);
+}
+
+void MIPSAssembler::NOR(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (nor_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::NOT(int Rd, int Rs)
+{
+    MIPSAssembler::NOR(Rd, Rs, 0);  // NOT(d,s) = NOR(d,s,zero)
+}
+
+void MIPSAssembler::XOR(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (xor_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::XORI(int Rt, int Rs, uint16_t imm)  // todo: support larger immediate
+{
+    *mPC++ = (xori_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | (imm & MSK_16);
+}
+
+void MIPSAssembler::SLL(int Rd, int Rt, int shft)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (sll_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rt<<RT_SHF) | (shft<<RE_SHF);
+}
+
+void MIPSAssembler::SLLV(int Rd, int Rt, int Rs)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (sllv_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::SRL(int Rd, int Rt, int shft)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (srl_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rt<<RT_SHF) | (shft<<RE_SHF);
+}
+
+void MIPSAssembler::SRLV(int Rd, int Rt, int Rs)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (srlv_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::SRA(int Rd, int Rt, int shft)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (sra_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rt<<RT_SHF) | (shft<<RE_SHF);
+}
+
+void MIPSAssembler::SRAV(int Rd, int Rt, int Rs)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (srav_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::ROTR(int Rd, int Rt, int shft)      // mips32r2
+{
+    // note weird encoding (SRL + 1)
+    *mPC++ = (spec_op<<OP_SHF) | (srl_fn<<FUNC_SHF) |
+                        (1<<RS_SHF) | (Rd<<RD_SHF) | (Rt<<RT_SHF) | (shft<<RE_SHF);
+}
+
+void MIPSAssembler::ROTRV(int Rd, int Rt, int Rs)       // mips32r2
+{
+    // note weird encoding (SRLV + 1)
+    *mPC++ = (spec_op<<OP_SHF) | (srlv_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF) | (1<<RE_SHF);
+}
+
+// uses at2 register (mapped to some appropriate mips reg)
+void MIPSAssembler::RORsyn(int Rd, int Rt, int Rs)
+{
+    // synthetic: d = t rotated by s
+    MIPSAssembler::NEGU(R_at2, Rs);
+    MIPSAssembler::SLLV(R_at2, Rt, R_at2);
+    MIPSAssembler::SRLV(Rd, Rt, Rs);
+    MIPSAssembler::OR(Rd, Rd, R_at2);
+}
+
+// immediate version - uses at2 register (mapped to some appropriate mips reg)
+void MIPSAssembler::RORIsyn(int Rd, int Rt, int rot)
+{
+    // synthetic: d = t rotated by immed rot
+    // d = s >> rot | s << (32-rot)
+    MIPSAssembler::SLL(R_at2, Rt, 32-rot);
+    MIPSAssembler::SRL(Rd, Rt, rot);
+    MIPSAssembler::OR(Rd, Rd, R_at2);
+}
+
+void MIPSAssembler::CLO(int Rd, int Rs)
+{
+    // Rt field must have same gpr # as Rd
+    *mPC++ = (spec2_op<<OP_SHF) | (clo_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rd<<RT_SHF);
+}
+
+void MIPSAssembler::CLZ(int Rd, int Rs)
+{
+    // Rt field must have same gpr # as Rd
+    *mPC++ = (spec2_op<<OP_SHF) | (clz_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rd<<RT_SHF);
+}
+
+void MIPSAssembler::WSBH(int Rd, int Rt)      // mips32r2
+{
+    *mPC++ = (spec3_op<<OP_SHF) | (bshfl_fn<<FUNC_SHF) | (wsbh_fn << SA_SHF) |
+                        (Rt<<RT_SHF) | (Rd<<RD_SHF);
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Load/store...
+#endif
+
+void MIPSAssembler::LW(int Rt, int Rbase, int16_t offset)
+{
+    *mPC++ = (lw_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16);
+}
+
+void MIPSAssembler::SW(int Rt, int Rbase, int16_t offset)
+{
+    *mPC++ = (sw_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16);
+}
+
+// lb is sign-extended
+void MIPSAssembler::LB(int Rt, int Rbase, int16_t offset)
+{
+    *mPC++ = (lb_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16);
+}
+
+void MIPSAssembler::LBU(int Rt, int Rbase, int16_t offset)
+{
+    *mPC++ = (lbu_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16);
+}
+
+void MIPSAssembler::SB(int Rt, int Rbase, int16_t offset)
+{
+    *mPC++ = (sb_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16);
+}
+
+// lh is sign-extended
+void MIPSAssembler::LH(int Rt, int Rbase, int16_t offset)
+{
+    *mPC++ = (lh_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16);
+}
+
+void MIPSAssembler::LHU(int Rt, int Rbase, int16_t offset)
+{
+    *mPC++ = (lhu_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16);
+}
+
+void MIPSAssembler::SH(int Rt, int Rbase, int16_t offset)
+{
+    *mPC++ = (sh_op<<OP_SHF) | (Rbase<<RS_SHF) | (Rt<<RT_SHF) | (offset & MSK_16);
+}
+
+void MIPSAssembler::LUI(int Rt, int16_t offset)
+{
+    *mPC++ = (lui_op<<OP_SHF) | (Rt<<RT_SHF) | (offset & MSK_16);
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Register move...
+#endif
+
+void MIPSAssembler::MOVE(int Rd, int Rs)
+{
+    // encoded as "or rd, rs, zero"
+    *mPC++ = (spec_op<<OP_SHF) | (or_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (0<<RT_SHF);
+}
+
+void MIPSAssembler::MOVN(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (movn_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::MOVZ(int Rd, int Rs, int Rt)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (movz_fn<<FUNC_SHF) |
+                        (Rd<<RD_SHF) | (Rs<<RS_SHF) | (Rt<<RT_SHF);
+}
+
+void MIPSAssembler::MFHI(int Rd)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (mfhi_fn<<FUNC_SHF) | (Rd<<RD_SHF);
+}
+
+void MIPSAssembler::MFLO(int Rd)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (mflo_fn<<FUNC_SHF) | (Rd<<RD_SHF);
+}
+
+void MIPSAssembler::MTHI(int Rs)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (mthi_fn<<FUNC_SHF) | (Rs<<RS_SHF);
+}
+
+void MIPSAssembler::MTLO(int Rs)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (mtlo_fn<<FUNC_SHF) | (Rs<<RS_SHF);
+}
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Branch...
+#endif
+
+// temporarily forcing a NOP into branch-delay slot, just to be safe
+// todo: remove NOP, optimze use of delay slots
+void MIPSAssembler::B(const char* label)
+{
+    mBranchTargets.add(branch_target_t(label, mPC));
+
+    // encoded as BEQ zero, zero, offset
+    *mPC++ = (beq_op<<OP_SHF) | (0<<RT_SHF)
+                        | (0<<RS_SHF) | 0;  // offset filled in later
+
+    MIPSAssembler::NOP();
+}
+
+void MIPSAssembler::BEQ(int Rs, int Rt, const char* label)
+{
+    mBranchTargets.add(branch_target_t(label, mPC));
+    *mPC++ = (beq_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | 0;
+    MIPSAssembler::NOP();
+}
+
+void MIPSAssembler::BNE(int Rs, int Rt, const char* label)
+{
+    mBranchTargets.add(branch_target_t(label, mPC));
+    *mPC++ = (bne_op<<OP_SHF) | (Rt<<RT_SHF) | (Rs<<RS_SHF) | 0;
+    MIPSAssembler::NOP();
+}
+
+void MIPSAssembler::BLEZ(int Rs, const char* label)
+{
+    mBranchTargets.add(branch_target_t(label, mPC));
+    *mPC++ = (blez_op<<OP_SHF) | (0<<RT_SHF) | (Rs<<RS_SHF) | 0;
+    MIPSAssembler::NOP();
+}
+
+void MIPSAssembler::BLTZ(int Rs, const char* label)
+{
+    mBranchTargets.add(branch_target_t(label, mPC));
+    *mPC++ = (regimm_op<<OP_SHF) | (bltz_fn<<RT_SHF) | (Rs<<RS_SHF) | 0;
+    MIPSAssembler::NOP();
+}
+
+void MIPSAssembler::BGTZ(int Rs, const char* label)
+{
+    mBranchTargets.add(branch_target_t(label, mPC));
+    *mPC++ = (bgtz_op<<OP_SHF) | (0<<RT_SHF) | (Rs<<RS_SHF) | 0;
+    MIPSAssembler::NOP();
+}
+
+
+void MIPSAssembler::BGEZ(int Rs, const char* label)
+{
+    mBranchTargets.add(branch_target_t(label, mPC));
+    *mPC++ = (regimm_op<<OP_SHF) | (bgez_fn<<RT_SHF) | (Rs<<RS_SHF) | 0;
+    MIPSAssembler::NOP();
+}
+
+void MIPSAssembler::JR(int Rs)
+{
+    *mPC++ = (spec_op<<OP_SHF) | (Rs<<RS_SHF) | (jr_fn << FUNC_SHF);
+    MIPSAssembler::NOP();
+}
+
+
+#if 0
+#pragma mark -
+#pragma mark Synthesized Branch...
+#endif
+
+// synthetic variants of branches (using slt & friends)
+void MIPSAssembler::BEQZ(int Rs, const char* label)
+{
+    BEQ(Rs, R_zero, label);
+}
+
+void MIPSAssembler::BNEZ(int Rs, const char* label)
+{
+    BNE(R_at, R_zero, label);
+}
+
+void MIPSAssembler::BGE(int Rs, int Rt, const char* label)
+{
+    SLT(R_at, Rs, Rt);
+    BEQ(R_at, R_zero, label);
+}
+
+void MIPSAssembler::BGEU(int Rs, int Rt, const char* label)
+{
+    SLTU(R_at, Rs, Rt);
+    BEQ(R_at, R_zero, label);
+}
+
+void MIPSAssembler::BGT(int Rs, int Rt, const char* label)
+{
+    SLT(R_at, Rt, Rs);   // rev
+    BNE(R_at, R_zero, label);
+}
+
+void MIPSAssembler::BGTU(int Rs, int Rt, const char* label)
+{
+    SLTU(R_at, Rt, Rs);   // rev
+    BNE(R_at, R_zero, label);
+}
+
+void MIPSAssembler::BLE(int Rs, int Rt, const char* label)
+{
+    SLT(R_at, Rt, Rs);   // rev
+    BEQ(R_at, R_zero, label);
+}
+
+void MIPSAssembler::BLEU(int Rs, int Rt, const char* label)
+{
+    SLTU(R_at, Rt, Rs);  // rev
+    BEQ(R_at, R_zero, label);
+}
+
+void MIPSAssembler::BLT(int Rs, int Rt, const char* label)
+{
+    SLT(R_at, Rs, Rt);
+    BNE(R_at, R_zero, label);
+}
+
+void MIPSAssembler::BLTU(int Rs, int Rt, const char* label)
+{
+    SLTU(R_at, Rs, Rt);
+    BNE(R_at, R_zero, label);
+}
+
+
+
+
+#if 0
+#pragma mark -
+#pragma mark Misc...
+#endif
+
+void MIPSAssembler::NOP(void)
+{
+    // encoded as "sll zero, zero, 0", which is all zero
+    *mPC++ = (spec_op<<OP_SHF) | (sll_fn<<FUNC_SHF);
+}
+
+// using this as special opcode for not-yet-implemented ARM instruction
+void MIPSAssembler::NOP2(void)
+{
+    // encoded as "sll zero, zero, 2", still a nop, but a unique code
+    *mPC++ = (spec_op<<OP_SHF) | (sll_fn<<FUNC_SHF) | (2 << RE_SHF);
+}
+
+// using this as special opcode for purposefully NOT implemented ARM instruction
+void MIPSAssembler::UNIMPL(void)
+{
+    // encoded as "sll zero, zero, 3", still a nop, but a unique code
+    *mPC++ = (spec_op<<OP_SHF) | (sll_fn<<FUNC_SHF) | (3 << RE_SHF);
+}
+
+
+}; // namespace android:
+
+
diff --git a/libpixelflinger/codeflinger/MIPSAssembler.h b/libpixelflinger/codeflinger/MIPSAssembler.h
new file mode 100644
index 0000000..d8e8165
--- /dev/null
+++ b/libpixelflinger/codeflinger/MIPSAssembler.h
@@ -0,0 +1,555 @@
+/* libs/pixelflinger/codeflinger/MIPSAssembler.h
+**
+** Copyright 2012, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+#ifndef ANDROID_MIPSASSEMBLER_H
+#define ANDROID_MIPSASSEMBLER_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <utils/Vector.h>
+#include <utils/KeyedVector.h>
+
+#include "tinyutils/smartpointer.h"
+#include "codeflinger/ARMAssemblerInterface.h"
+#include "codeflinger/CodeCache.h"
+
+namespace android {
+
+class MIPSAssembler;    // forward reference
+
+// this class mimics ARMAssembler interface
+//  intent is to translate each ARM instruction to 1 or more MIPS instr
+//  implementation calls MIPSAssembler class to generate mips code
+class ArmToMipsAssembler : public ARMAssemblerInterface
+{
+public:
+                ArmToMipsAssembler(const sp<Assembly>& assembly,
+                        char *abuf = 0, int linesz = 0, int instr_count = 0);
+    virtual     ~ArmToMipsAssembler();
+
+    uint32_t*   base() const;
+    uint32_t*   pc() const;
+    void        disassemble(const char* name);
+
+    virtual void    reset();
+
+    virtual int     generate(const char* name);
+    virtual int     getCodegenArch();
+
+    virtual void    prolog();
+    virtual void    epilog(uint32_t touched);
+    virtual void    comment(const char* string);
+
+
+    // -----------------------------------------------------------------------
+    // shifters and addressing modes
+    // -----------------------------------------------------------------------
+
+    // shifters...
+    virtual bool        isValidImmediate(uint32_t immed);
+    virtual int         buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm);
+
+    virtual uint32_t    imm(uint32_t immediate);
+    virtual uint32_t    reg_imm(int Rm, int type, uint32_t shift);
+    virtual uint32_t    reg_rrx(int Rm);
+    virtual uint32_t    reg_reg(int Rm, int type, int Rs);
+
+    // addressing modes...
+    // LDR(B)/STR(B)/PLD
+    // (immediate and Rm can be negative, which indicates U=0)
+    virtual uint32_t    immed12_pre(int32_t immed12, int W=0);
+    virtual uint32_t    immed12_post(int32_t immed12);
+    virtual uint32_t    reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0);
+    virtual uint32_t    reg_scale_post(int Rm, int type=0, uint32_t shift=0);
+
+    // LDRH/LDRSB/LDRSH/STRH
+    // (immediate and Rm can be negative, which indicates U=0)
+    virtual uint32_t    immed8_pre(int32_t immed8, int W=0);
+    virtual uint32_t    immed8_post(int32_t immed8);
+    virtual uint32_t    reg_pre(int Rm, int W=0);
+    virtual uint32_t    reg_post(int Rm);
+
+
+
+
+    virtual void    dataProcessing(int opcode, int cc, int s,
+                                int Rd, int Rn,
+                                uint32_t Op2);
+    virtual void MLA(int cc, int s,
+                int Rd, int Rm, int Rs, int Rn);
+    virtual void MUL(int cc, int s,
+                int Rd, int Rm, int Rs);
+    virtual void UMULL(int cc, int s,
+                int RdLo, int RdHi, int Rm, int Rs);
+    virtual void UMUAL(int cc, int s,
+                int RdLo, int RdHi, int Rm, int Rs);
+    virtual void SMULL(int cc, int s,
+                int RdLo, int RdHi, int Rm, int Rs);
+    virtual void SMUAL(int cc, int s,
+                int RdLo, int RdHi, int Rm, int Rs);
+
+    virtual void B(int cc, uint32_t* pc);
+    virtual void BL(int cc, uint32_t* pc);
+    virtual void BX(int cc, int Rn);
+    virtual void label(const char* theLabel);
+    virtual void B(int cc, const char* label);
+    virtual void BL(int cc, const char* label);
+
+    virtual uint32_t* pcForLabel(const char* label);
+
+    virtual void LDR (int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void LDRB(int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void STR (int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void STRB(int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void LDRH (int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void LDRSB(int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void LDRSH(int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void STRH (int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+
+    virtual void LDM(int cc, int dir,
+                int Rn, int W, uint32_t reg_list);
+    virtual void STM(int cc, int dir,
+                int Rn, int W, uint32_t reg_list);
+
+    virtual void SWP(int cc, int Rn, int Rd, int Rm);
+    virtual void SWPB(int cc, int Rn, int Rd, int Rm);
+    virtual void SWI(int cc, uint32_t comment);
+
+    virtual void PLD(int Rn, uint32_t offset);
+    virtual void CLZ(int cc, int Rd, int Rm);
+    virtual void QADD(int cc, int Rd, int Rm, int Rn);
+    virtual void QDADD(int cc, int Rd, int Rm, int Rn);
+    virtual void QSUB(int cc, int Rd, int Rm, int Rn);
+    virtual void QDSUB(int cc, int Rd, int Rm, int Rn);
+    virtual void SMUL(int cc, int xy,
+                int Rd, int Rm, int Rs);
+    virtual void SMULW(int cc, int y,
+                int Rd, int Rm, int Rs);
+    virtual void SMLA(int cc, int xy,
+                int Rd, int Rm, int Rs, int Rn);
+    virtual void SMLAL(int cc, int xy,
+                int RdHi, int RdLo, int Rs, int Rm);
+    virtual void SMLAW(int cc, int y,
+                int Rd, int Rm, int Rs, int Rn);
+
+    // byte/half word extract...
+    virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
+
+    // bit manipulation...
+    virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width);
+
+    // this is some crap to share is MIPSAssembler class for debug
+    char *      mArmDisassemblyBuffer;
+    int         mArmLineLength;
+    int         mArmInstrCount;
+
+    int         mInum;      // current arm instuction number (0..n)
+    uint32_t**  mArmPC;     // array: PC for 1st mips instr of
+                            //      each translated ARM instr
+
+
+private:
+    ArmToMipsAssembler(const ArmToMipsAssembler& rhs);
+    ArmToMipsAssembler& operator = (const ArmToMipsAssembler& rhs);
+
+    void init_conditional_labels(void);
+
+    void protectConditionalOperands(int Rd);
+
+    // reg__tmp set to MIPS AT, reg 1
+    int dataProcAdrModes(int op, int& source, bool sign = false, int reg_tmp = 1);
+
+    sp<Assembly>        mAssembly;
+    MIPSAssembler*      mMips;
+
+
+    enum misc_constants_t {
+        ARM_MAX_INSTUCTIONS = 512  // based on ASSEMBLY_SCRATCH_SIZE
+    };
+
+    enum {
+        SRC_REG = 0,
+        SRC_IMM,
+        SRC_ERROR = -1
+    };
+
+    enum addr_modes {
+        // start above the range of legal mips reg #'s (0-31)
+        AMODE_REG = 0x20,
+        AMODE_IMM, AMODE_REG_IMM,               // for data processing
+        AMODE_IMM_12_PRE, AMODE_IMM_12_POST,    // for load/store
+        AMODE_REG_SCALE_PRE, AMODE_IMM_8_PRE,
+        AMODE_IMM_8_POST, AMODE_REG_PRE,
+        AMODE_UNSUPPORTED
+    };
+
+    struct addr_mode_t {    // address modes for current ARM instruction
+        int         reg;
+        int         stype;
+        uint32_t    value;
+        bool        writeback;  // writeback the adr reg after modification
+    } amode;
+
+    enum cond_types {
+        CMP_COND = 1,
+        SBIT_COND
+    };
+
+    struct cond_mode_t {    // conditional-execution info for current ARM instruction
+        cond_types  type;
+        int         r1;
+        int         r2;
+        int         labelnum;
+        char        label[100][10];
+    } cond;
+
+};
+
+
+
+
+// ----------------------------------------------------------------------------
+// ----------------------------------------------------------------------------
+// ----------------------------------------------------------------------------
+
+// This is the basic MIPS assembler, which just creates the opcodes in memory.
+// All the more complicated work is done in ArmToMipsAssember above.
+
+class MIPSAssembler
+{
+public:
+                MIPSAssembler(const sp<Assembly>& assembly, ArmToMipsAssembler *parent);
+    virtual     ~MIPSAssembler();
+
+    uint32_t*   base() const;
+    uint32_t*   pc() const;
+    void        reset();
+
+    void        disassemble(const char* name);
+
+    void        prolog();
+    void        epilog(uint32_t touched);
+    int         generate(const char* name);
+    void        comment(const char* string);
+    void        label(const char* string);
+
+    // valid only after generate() has been called
+    uint32_t*   pcForLabel(const char* label);
+
+
+    // ------------------------------------------------------------------------
+    // MIPSAssemblerInterface...
+    // ------------------------------------------------------------------------
+
+#if 0
+#pragma mark -
+#pragma mark Arithmetic...
+#endif
+
+    void ADDU(int Rd, int Rs, int Rt);
+    void ADDIU(int Rt, int Rs, int16_t imm);
+    void SUBU(int Rd, int Rs, int Rt);
+    void SUBIU(int Rt, int Rs, int16_t imm);
+    void NEGU(int Rd, int Rs);
+    void MUL(int Rd, int Rs, int Rt);
+    void MULT(int Rs, int Rt);      // dest is hi,lo
+    void MULTU(int Rs, int Rt);     // dest is hi,lo
+    void MADD(int Rs, int Rt);      // hi,lo = hi,lo + Rs * Rt
+    void MADDU(int Rs, int Rt);     // hi,lo = hi,lo + Rs * Rt
+    void MSUB(int Rs, int Rt);      // hi,lo = hi,lo - Rs * Rt
+    void MSUBU(int Rs, int Rt);     // hi,lo = hi,lo - Rs * Rt
+    void SEB(int Rd, int Rt);       // sign-extend byte (mips32r2)
+    void SEH(int Rd, int Rt);       // sign-extend half-word (mips32r2)
+
+
+#if 0
+#pragma mark -
+#pragma mark Comparisons...
+#endif
+
+    void SLT(int Rd, int Rs, int Rt);
+    void SLTI(int Rt, int Rs, int16_t imm);
+    void SLTU(int Rd, int Rs, int Rt);
+    void SLTIU(int Rt, int Rs, int16_t imm);
+
+
+#if 0
+#pragma mark -
+#pragma mark Logical...
+#endif
+
+    void AND(int Rd, int Rs, int Rt);
+    void ANDI(int Rd, int Rs, uint16_t imm);
+    void OR(int Rd, int Rs, int Rt);
+    void ORI(int Rt, int Rs, uint16_t imm);
+    void NOR(int Rd, int Rs, int Rt);
+    void NOT(int Rd, int Rs);
+    void XOR(int Rd, int Rs, int Rt);
+    void XORI(int Rt, int Rs, uint16_t imm);
+
+    void SLL(int Rd, int Rt, int shft);
+    void SLLV(int Rd, int Rt, int Rs);
+    void SRL(int Rd, int Rt, int shft);
+    void SRLV(int Rd, int Rt, int Rs);
+    void SRA(int Rd, int Rt, int shft);
+    void SRAV(int Rd, int Rt, int Rs);
+    void ROTR(int Rd, int Rt, int shft);    // mips32r2
+    void ROTRV(int Rd, int Rt, int Rs);     // mips32r2
+    void RORsyn(int Rd, int Rs, int Rt);    // synthetic: d = s rotated by t
+    void RORIsyn(int Rd, int Rt, int rot);  // synthetic: d = s rotated by immed
+
+    void CLO(int Rd, int Rs);
+    void CLZ(int Rd, int Rs);
+    void WSBH(int Rd, int Rt);
+
+
+#if 0
+#pragma mark -
+#pragma mark Load/store...
+#endif
+
+    void LW(int Rt, int Rbase, int16_t offset);
+    void SW(int Rt, int Rbase, int16_t offset);
+    void LB(int Rt, int Rbase, int16_t offset);
+    void LBU(int Rt, int Rbase, int16_t offset);
+    void SB(int Rt, int Rbase, int16_t offset);
+    void LH(int Rt, int Rbase, int16_t offset);
+    void LHU(int Rt, int Rbase, int16_t offset);
+    void SH(int Rt, int Rbase, int16_t offset);
+    void LUI(int Rt, int16_t offset);
+
+#if 0
+#pragma mark -
+#pragma mark Register moves...
+#endif
+
+    void MOVE(int Rd, int Rs);
+    void MOVN(int Rd, int Rs, int Rt);
+    void MOVZ(int Rd, int Rs, int Rt);
+    void MFHI(int Rd);
+    void MFLO(int Rd);
+    void MTHI(int Rs);
+    void MTLO(int Rs);
+
+#if 0
+#pragma mark -
+#pragma mark Branch...
+#endif
+
+    void B(const char* label);
+    void BEQ(int Rs, int Rt, const char* label);
+    void BNE(int Rs, int Rt, const char* label);
+    void BGEZ(int Rs, const char* label);
+    void BGTZ(int Rs, const char* label);
+    void BLEZ(int Rs, const char* label);
+    void BLTZ(int Rs, const char* label);
+    void JR(int Rs);
+
+
+#if 0
+#pragma mark -
+#pragma mark Synthesized Branch...
+#endif
+
+    // synthetic variants of above (using slt & friends)
+    void BEQZ(int Rs, const char* label);
+    void BNEZ(int Rs, const char* label);
+    void BGE(int Rs, int Rt, const char* label);
+    void BGEU(int Rs, int Rt, const char* label);
+    void BGT(int Rs, int Rt, const char* label);
+    void BGTU(int Rs, int Rt, const char* label);
+    void BLE(int Rs, int Rt, const char* label);
+    void BLEU(int Rs, int Rt, const char* label);
+    void BLT(int Rs, int Rt, const char* label);
+    void BLTU(int Rs, int Rt, const char* label);
+
+#if 0
+#pragma mark -
+#pragma mark Misc...
+#endif
+
+    void NOP(void);
+    void NOP2(void);
+    void UNIMPL(void);
+
+
+
+
+
+private:
+    void string_detab(char *s);
+    void string_pad(char *s, int padded_len);
+
+    ArmToMipsAssembler *mParent;
+    sp<Assembly>    mAssembly;
+    uint32_t*       mBase;
+    uint32_t*       mPC;
+    uint32_t*       mPrologPC;
+    int64_t         mDuration;
+#if defined(WITH_LIB_HARDWARE)
+    bool            mQemuTracing;
+#endif
+
+    struct branch_target_t {
+        inline branch_target_t() : label(0), pc(0) { }
+        inline branch_target_t(const char* l, uint32_t* p)
+            : label(l), pc(p) { }
+        const char* label;
+        uint32_t*   pc;
+    };
+
+    Vector<branch_target_t>                 mBranchTargets;
+    KeyedVector< const char*, uint32_t* >   mLabels;
+    KeyedVector< uint32_t*, const char* >   mLabelsInverseMapping;
+    KeyedVector< uint32_t*, const char* >   mComments;
+
+
+
+
+    // opcode field of all instructions
+    enum opcode_field {
+        spec_op, regimm_op, j_op, jal_op,           // 00
+        beq_op, bne_op, blez_op, bgtz_op,
+        addi_op, addiu_op, slti_op, sltiu_op,       // 08
+        andi_op, ori_op, xori_op, lui_op,
+        cop0_op, cop1_op, cop2_op, cop1x_op,        // 10
+        beql_op, bnel_op, blezl_op, bgtzl_op,
+        daddi_op, daddiu_op, ldl_op, ldr_op,        // 18
+        spec2_op, jalx_op, mdmx_op, spec3_op,
+        lb_op, lh_op, lwl_op, lw_op,                // 20
+        lbu_op, lhu_op, lwr_op, lwu_op,
+        sb_op, sh_op, swl_op, sw_op,                // 28
+        sdl_op, sdr_op, swr_op, cache_op,
+        ll_op, lwc1_op, lwc2_op, pref_op,           // 30
+        lld_op, ldc1_op, ldc2_op, ld_op,
+        sc_op, swc1_op, swc2_op, rsrv_3b_op,        // 38
+        scd_op, sdc1_op, sdc2_op, sd_op
+    };
+
+
+    // func field for special opcode
+    enum func_spec_op {
+        sll_fn, movc_fn, srl_fn, sra_fn,            // 00
+        sllv_fn, pmon_fn, srlv_fn, srav_fn,
+        jr_fn, jalr_fn, movz_fn, movn_fn,           // 08
+        syscall_fn, break_fn, spim_fn, sync_fn,
+        mfhi_fn, mthi_fn, mflo_fn, mtlo_fn,         // 10
+        dsllv_fn, rsrv_spec_2, dsrlv_fn, dsrav_fn,
+        mult_fn, multu_fn, div_fn, divu_fn,         // 18
+        dmult_fn, dmultu_fn, ddiv_fn, ddivu_fn,
+        add_fn, addu_fn, sub_fn, subu_fn,           // 20
+        and_fn, or_fn, xor_fn, nor_fn,
+        rsrv_spec_3, rsrv_spec_4, slt_fn, sltu_fn,  // 28
+        dadd_fn, daddu_fn, dsub_fn, dsubu_fn,
+        tge_fn, tgeu_fn, tlt_fn, tltu_fn,           // 30
+        teq_fn, rsrv_spec_5, tne_fn, rsrv_spec_6,
+        dsll_fn, rsrv_spec_7, dsrl_fn, dsra_fn,     // 38
+        dsll32_fn, rsrv_spec_8, dsrl32_fn, dsra32_fn
+    };
+
+    // func field for spec2 opcode
+    enum func_spec2_op {
+        madd_fn, maddu_fn, mul_fn, rsrv_spec2_3,
+        msub_fn, msubu_fn,
+        clz_fn = 0x20, clo_fn,
+        dclz_fn = 0x24, dclo_fn,
+        sdbbp_fn = 0x3f
+    };
+
+    // func field for spec3 opcode
+    enum func_spec3_op {
+        ext_fn, dextm_fn, dextu_fn, dext_fn,
+        ins_fn, dinsm_fn, dinsu_fn, dins_fn,
+        bshfl_fn = 0x20,
+        dbshfl_fn = 0x24,
+        rdhwr_fn = 0x3b
+    };
+
+    // sa field for spec3 opcodes, with BSHFL function
+    enum func_spec3_bshfl {
+        wsbh_fn = 0x02,
+        seb_fn = 0x10,
+        seh_fn = 0x18
+    };
+
+    // rt field of regimm opcodes.
+    enum regimm_fn {
+        bltz_fn, bgez_fn, bltzl_fn, bgezl_fn,
+        rsrv_ri_fn4, rsrv_ri_fn5, rsrv_ri_fn6, rsrv_ri_fn7,
+        tgei_fn, tgeiu_fn, tlti_fn, tltiu_fn,
+        teqi_fn, rsrv_ri_fn_0d, tnei_fn, rsrv_ri_fn0f,
+        bltzal_fn, bgezal_fn, bltzall_fn, bgezall_fn,
+        bposge32_fn= 0x1c,
+        synci_fn = 0x1f
+    };
+
+
+    // func field for mad opcodes (MIPS IV).
+    enum mad_func {
+        madd_fp_op      = 0x08, msub_fp_op      = 0x0a,
+        nmadd_fp_op     = 0x0c, nmsub_fp_op     = 0x0e
+    };
+
+
+    enum mips_inst_shifts {
+        OP_SHF       = 26,
+        JTARGET_SHF  = 0,
+        RS_SHF       = 21,
+        RT_SHF       = 16,
+        RD_SHF       = 11,
+        RE_SHF       = 6,
+        SA_SHF       = RE_SHF,  // synonym
+        IMM_SHF      = 0,
+        FUNC_SHF     = 0,
+
+        // mask values
+        MSK_16       = 0xffff,
+
+
+        CACHEOP_SHF  = 18,
+        CACHESEL_SHF = 16,
+    };
+};
+
+enum mips_regnames {
+    R_zero = 0,
+            R_at,   R_v0,   R_v1,   R_a0,   R_a1,   R_a2,   R_a3,
+    R_t0,   R_t1,   R_t2,   R_t3,   R_t4,   R_t5,   R_t6,   R_t7,
+    R_s0,   R_s1,   R_s2,   R_s3,   R_s4,   R_s5,   R_s6,   R_s7,
+    R_t8,   R_t9,   R_k0,   R_k1,   R_gp,   R_sp,   R_s8,   R_ra,
+    R_lr = R_s8,
+
+    // arm regs 0-15 are mips regs 2-17 (meaning s0 & s1 are used)
+    R_at2  = R_s2,    // R_at2 = 18 = s2
+    R_cmp  = R_s3,    // R_cmp = 19 = s3
+    R_cmp2 = R_s4     // R_cmp2 = 20 = s4
+};
+
+
+
+}; // namespace android
+
+#endif //ANDROID_MIPSASSEMBLER_H
diff --git a/libpixelflinger/codeflinger/load_store.cpp b/libpixelflinger/codeflinger/load_store.cpp
index 62aa05c..146fa52 100644
--- a/libpixelflinger/codeflinger/load_store.cpp
+++ b/libpixelflinger/codeflinger/load_store.cpp
@@ -110,7 +110,11 @@ void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
 {
     const int maskLen = h-l;
 
+#ifdef __mips__
+    assert(maskLen<=11);
+#else
     assert(maskLen<=8);
+#endif
     assert(h);
     
 #if __ARM_ARCH__ >= 7
diff --git a/libpixelflinger/codeflinger/mips_disassem.c b/libpixelflinger/codeflinger/mips_disassem.c
new file mode 100644
index 0000000..4ab9bd3
--- /dev/null
+++ b/libpixelflinger/codeflinger/mips_disassem.c
@@ -0,0 +1,590 @@
+/*  $NetBSD: db_disasm.c,v 1.19 2007/02/28 04:21:53 thorpej Exp $   */
+
+/*-
+ * Copyright (c) 1991, 1993
+ *  The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Ralph Campbell.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *  from: @(#)kadb.c    8.1 (Berkeley) 6/10/93
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include "mips_opcode.h"
+
+
+// #include <sys/systm.h>
+// #include <sys/param.h>
+
+// #include <machine/reg.h>
+// #include <machine/cpu.h>
+/*#include <machine/param.h>*/
+// #include <machine/db_machdep.h>
+
+// #include <ddb/db_interface.h>
+// #include <ddb/db_output.h>
+// #include <ddb/db_extern.h>
+// #include <ddb/db_sym.h>
+
+
+static char *sprintf_buffer;
+static int sprintf_buf_len;
+
+
+typedef uint32_t db_addr_t;
+static void db_printf(const char* fmt, ...);
+
+static const char * const op_name[64] = {
+/* 0 */ "spec", "bcond","j  ",    "jal",  "beq",  "bne",  "blez", "bgtz",
+/* 8 */ "addi", "addiu","slti", "sltiu","andi", "ori",  "xori", "lui",
+/*16 */ "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "blezl","bgtzl",
+/*24 */ "daddi","daddiu","ldl", "ldr",  "op34", "op35", "op36", "op37",
+/*32 */ "lb ",   "lh ",   "lwl",  "lw ",   "lbu",  "lhu",  "lwr",  "lwu",
+/*40 */ "sb ",   "sh ",   "swl",  "sw ",   "sdl",  "sdr",  "swr",  "cache",
+/*48 */ "ll ",   "lwc1", "lwc2", "lwc3", "lld",  "ldc1", "ldc2", "ld ",
+/*56 */ "sc ",   "swc1", "swc2", "swc3", "scd",  "sdc1", "sdc2", "sd "
+};
+
+static const char * const spec_name[64] = {
+/* 0 */ "sll",  "spec01","srl", "sra",  "sllv", "spec05","srlv","srav",
+/* 8 */ "jr",   "jalr", "movz","movn","syscall","break","spec16","sync",
+/*16 */ "mfhi", "mthi", "mflo", "mtlo", "dsllv","spec25","dsrlv","dsrav",
+/*24 */ "mult", "multu","div",  "divu", "dmult","dmultu","ddiv","ddivu",
+/*32 */ "add",  "addu", "sub",  "subu", "and",  "or ",   "xor",  "nor",
+/*40 */ "spec50","spec51","slt","sltu", "dadd","daddu","dsub","dsubu",
+/*48 */ "tge","tgeu","tlt","tltu","teq","spec65","tne","spec67",
+/*56 */ "dsll","spec71","dsrl","dsra","dsll32","spec75","dsrl32","dsra32"
+};
+
+static const char * const spec2_name[64] = {     /* QED RM4650, R5000, etc. */
+/* 0x00 */ "madd", "maddu", "mul", "spec3", "msub", "msubu", "rsrv6", "rsrv7",
+/* 0x08 */ "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv",
+/* 0x10 */ "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv",
+/* 0x18 */ "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv",
+/* 0x20 */ "clz",  "clo",  "rsrv", "rsrv", "dclz", "dclo", "rsrv", "rsrv",
+/* 0x28 */ "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv",
+/* 0x30 */ "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv", "rsrv",
+/* 0x38 */ "rsrv", "rsrv", "rsrv", "resv", "rsrv", "rsrv", "rsrv", "sdbbp"
+};
+
+static const char * const bcond_name[32] = {
+/* 0 */ "bltz", "bgez", "bltzl", "bgezl", "?", "?", "?", "?",
+/* 8 */ "tgei", "tgeiu", "tlti", "tltiu", "teqi", "?", "tnei", "?",
+/*16 */ "bltzal", "bgezal", "bltzall", "bgezall", "?", "?", "?", "?",
+/*24 */ "?", "?", "?", "?", "?", "?", "?", "?",
+};
+
+static const char * const cop1_name[64] = {
+/* 0 */ "fadd",  "fsub", "fmpy", "fdiv", "fsqrt","fabs", "fmov", "fneg",
+/* 8 */ "fop08","fop09","fop0a","fop0b","fop0c","fop0d","fop0e","fop0f",
+/*16 */ "fop10","fop11","fop12","fop13","fop14","fop15","fop16","fop17",
+/*24 */ "fop18","fop19","fop1a","fop1b","fop1c","fop1d","fop1e","fop1f",
+/*32 */ "fcvts","fcvtd","fcvte","fop23","fcvtw","fop25","fop26","fop27",
+/*40 */ "fop28","fop29","fop2a","fop2b","fop2c","fop2d","fop2e","fop2f",
+/*48 */ "fcmp.f","fcmp.un","fcmp.eq","fcmp.ueq","fcmp.olt","fcmp.ult",
+    "fcmp.ole","fcmp.ule",
+/*56 */ "fcmp.sf","fcmp.ngle","fcmp.seq","fcmp.ngl","fcmp.lt","fcmp.nge",
+    "fcmp.le","fcmp.ngt"
+};
+
+static const char * const fmt_name[16] = {
+    "s",    "d",    "e",    "fmt3",
+    "w",    "fmt5", "fmt6", "fmt7",
+    "fmt8", "fmt9", "fmta", "fmtb",
+    "fmtc", "fmtd", "fmte", "fmtf"
+};
+
+#if defined(__mips_n32) || defined(__mips_n64)
+static char * const reg_name[32] = {
+    "zero", "at",   "v0",   "v1",   "a0",   "a1",   "a2",   "a3",
+    "a4",   "a5",   "a6",   "a7",   "t0",   "t1",   "t2",   "t3",
+    "s0",   "s1",   "s2",   "s3",   "s4",   "s5",   "s6",   "s7",
+    "t8",   "t9",   "k0",   "k1",   "gp",   "sp",   "s8",   "ra"
+};
+#else
+
+static char * alt_arm_reg_name[32] = {  // hacked names for comparison with ARM code
+    "zero", "at",   "r0",   "r1",   "r2",   "r3",   "r4",   "r5",
+    "r6",   "r7",   "r8",   "r9",   "r10",  "r11",  "r12",  "r13",
+    "r14",  "r15",  "at2",  "cmp",  "s4",   "s5",   "s6",   "s7",
+    "t8",   "t9",   "k0",   "k1",   "gp",   "sp",   "s8",   "ra"
+};
+
+static char * mips_reg_name[32] = {
+    "zero", "at",   "v0",   "v1",   "a0",   "a1",   "a2",   "a3",
+    "t0",   "t1",   "t2",   "t3",   "t4",   "t5",   "t6",   "t7",
+    "s0",   "s1",   "s2",   "s3",   "s4",   "s5",   "s6",   "s7",
+    "t8",   "t9",   "k0",   "k1",   "gp",   "sp",   "s8",   "ra"
+};
+
+static char ** reg_name =  &mips_reg_name[0];
+
+#endif /* __mips_n32 || __mips_n64 */
+
+static const char * const c0_opname[64] = {
+    "c0op00","tlbr",  "tlbwi", "c0op03","c0op04","c0op05","tlbwr", "c0op07",
+    "tlbp",  "c0op11","c0op12","c0op13","c0op14","c0op15","c0op16","c0op17",
+    "rfe",   "c0op21","c0op22","c0op23","c0op24","c0op25","c0op26","c0op27",
+    "eret",  "c0op31","c0op32","c0op33","c0op34","c0op35","c0op36","c0op37",
+    "c0op40","c0op41","c0op42","c0op43","c0op44","c0op45","c0op46","c0op47",
+    "c0op50","c0op51","c0op52","c0op53","c0op54","c0op55","c0op56","c0op57",
+    "c0op60","c0op61","c0op62","c0op63","c0op64","c0op65","c0op66","c0op67",
+    "c0op70","c0op71","c0op72","c0op73","c0op74","c0op75","c0op77","c0op77",
+};
+
+static const char * const c0_reg[32] = {
+    "index",    "random",   "tlblo0",  "tlblo1",
+    "context",  "pagemask", "wired",   "cp0r7",
+    "badvaddr", "count",    "tlbhi",   "compare",
+    "status",   "cause",    "epc",     "prid",
+    "config",   "lladdr",   "watchlo", "watchhi",
+    "xcontext", "cp0r21",   "cp0r22",  "debug",
+    "depc",     "perfcnt",  "ecc",     "cacheerr",
+    "taglo",    "taghi",    "errepc",  "desave"
+};
+
+static void print_addr(db_addr_t);
+db_addr_t mips_disassem(db_addr_t loc, char *di_buffer, int alt_dis_format);
+
+
+/*
+ * Disassemble instruction 'insn' nominally at 'loc'.
+ * 'loc' may in fact contain a breakpoint instruction.
+ */
+static db_addr_t
+db_disasm_insn(int insn, db_addr_t loc, bool altfmt)
+{
+    bool bdslot = false;
+    InstFmt i;
+
+    i.word = insn;
+
+    switch (i.JType.op) {
+    case OP_SPECIAL:
+        if (i.word == 0) {
+            db_printf("nop");
+            break;
+        }
+        if (i.word == 0x0080) {
+            db_printf("NIY");
+            break;
+        }
+        if (i.word == 0x00c0) {
+            db_printf("NOT IMPL");
+            break;
+        }
+        /* Special cases --------------------------------------------------
+         * "addu" is a "move" only in 32-bit mode.  What's the correct
+         * answer - never decode addu/daddu as "move"?
+         */
+        if ( (i.RType.func == OP_ADDU && i.RType.rt == 0)  ||
+             (i.RType.func == OP_OR   && i.RType.rt == 0) ) {
+            db_printf("move\t%s,%s",
+                reg_name[i.RType.rd],
+                reg_name[i.RType.rs]);
+            break;
+        }
+        // mips32r2, rotr & rotrv
+        if (i.RType.func == OP_SRL && (i.RType.rs & 1) == 1) {
+            db_printf("rotr\t%s,%s,%d", reg_name[i.RType.rd],
+                reg_name[i.RType.rt], i.RType.shamt);
+            break;
+        }
+        if (i.RType.func == OP_SRLV && (i.RType.shamt & 1) == 1) {
+            db_printf("rotrv\t%s,%s,%s", reg_name[i.RType.rd],
+                reg_name[i.RType.rt], reg_name[i.RType.rs]);
+            break;
+        }
+
+
+        db_printf("%s", spec_name[i.RType.func]);
+        switch (i.RType.func) {
+        case OP_SLL:
+        case OP_SRL:
+        case OP_SRA:
+        case OP_DSLL:
+
+        case OP_DSRL:
+        case OP_DSRA:
+        case OP_DSLL32:
+        case OP_DSRL32:
+        case OP_DSRA32:
+            db_printf("\t%s,%s,%d",
+                reg_name[i.RType.rd],
+                reg_name[i.RType.rt],
+                i.RType.shamt);
+            break;
+
+        case OP_SLLV:
+        case OP_SRLV:
+        case OP_SRAV:
+        case OP_DSLLV:
+        case OP_DSRLV:
+        case OP_DSRAV:
+            db_printf("\t%s,%s,%s",
+                reg_name[i.RType.rd],
+                reg_name[i.RType.rt],
+                reg_name[i.RType.rs]);
+            break;
+
+        case OP_MFHI:
+        case OP_MFLO:
+            db_printf("\t%s", reg_name[i.RType.rd]);
+            break;
+
+        case OP_JR:
+        case OP_JALR:
+            db_printf("\t%s", reg_name[i.RType.rs]);
+            bdslot = true;
+            break;
+        case OP_MTLO:
+        case OP_MTHI:
+            db_printf("\t%s", reg_name[i.RType.rs]);
+            break;
+
+        case OP_MULT:
+        case OP_MULTU:
+        case OP_DMULT:
+        case OP_DMULTU:
+        case OP_DIV:
+        case OP_DIVU:
+        case OP_DDIV:
+        case OP_DDIVU:
+            db_printf("\t%s,%s",
+                reg_name[i.RType.rs],
+                reg_name[i.RType.rt]);
+            break;
+
+
+        case OP_SYSCALL:
+        case OP_SYNC:
+            break;
+
+        case OP_BREAK:
+            db_printf("\t%d", (i.RType.rs << 5) | i.RType.rt);
+            break;
+
+        default:
+            db_printf("\t%s,%s,%s",
+                reg_name[i.RType.rd],
+                reg_name[i.RType.rs],
+                reg_name[i.RType.rt]);
+        }
+        break;
+
+    case OP_SPECIAL2:
+        if (i.RType.func == OP_MUL)
+            db_printf("%s\t%s,%s,%s",
+                spec2_name[i.RType.func & 0x3f],
+                    reg_name[i.RType.rd],
+                    reg_name[i.RType.rs],
+                    reg_name[i.RType.rt]);
+        else
+            db_printf("%s\t%s,%s",
+                spec2_name[i.RType.func & 0x3f],
+                    reg_name[i.RType.rs],
+                    reg_name[i.RType.rt]);
+
+        break;
+
+    case OP_SPECIAL3:
+        if (i.RType.func == OP_EXT)
+            db_printf("ext\t%s,%s,%d,%d",
+                    reg_name[i.RType.rt],
+                    reg_name[i.RType.rs],
+                    i.RType.rd+1,
+                    i.RType.shamt);
+        else if (i.RType.func == OP_INS)
+            db_printf("ins\t%s,%s,%d,%d",
+                    reg_name[i.RType.rt],
+                    reg_name[i.RType.rs],
+                    i.RType.rd+1,
+                    i.RType.shamt);
+        else if (i.RType.func == OP_BSHFL && i.RType.shamt == OP_WSBH)
+            db_printf("wsbh\t%s,%s",
+                reg_name[i.RType.rd],
+                reg_name[i.RType.rt]);
+        else if (i.RType.func == OP_BSHFL && i.RType.shamt == OP_SEB)
+            db_printf("seb\t%s,%s",
+                reg_name[i.RType.rd],
+                reg_name[i.RType.rt]);
+        else if (i.RType.func == OP_BSHFL && i.RType.shamt == OP_SEH)
+            db_printf("seh\t%s,%s",
+                reg_name[i.RType.rd],
+                reg_name[i.RType.rt]);
+        else
+            db_printf("Unknown");
+        break;
+
+    case OP_BCOND:
+        db_printf("%s\t%s,", bcond_name[i.IType.rt],
+            reg_name[i.IType.rs]);
+        goto pr_displ;
+
+    case OP_BLEZ:
+    case OP_BLEZL:
+    case OP_BGTZ:
+    case OP_BGTZL:
+        db_printf("%s\t%s,", op_name[i.IType.op],
+            reg_name[i.IType.rs]);
+        goto pr_displ;
+
+    case OP_BEQ:
+    case OP_BEQL:
+        if (i.IType.rs == 0 && i.IType.rt == 0) {
+            db_printf("b  \t");
+            goto pr_displ;
+        }
+        /* FALLTHROUGH */
+    case OP_BNE:
+    case OP_BNEL:
+        db_printf("%s\t%s,%s,", op_name[i.IType.op],
+            reg_name[i.IType.rs],
+            reg_name[i.IType.rt]);
+    pr_displ:
+        print_addr(loc + 4 + ((short)i.IType.imm << 2));
+        bdslot = true;
+        break;
+
+    case OP_COP0:
+        switch (i.RType.rs) {
+        case OP_BCx:
+        case OP_BCy:
+
+            db_printf("bc0%c\t",
+                "ft"[i.RType.rt & COPz_BC_TF_MASK]);
+            goto pr_displ;
+
+        case OP_MT:
+            db_printf("mtc0\t%s,%s",
+                reg_name[i.RType.rt],
+                c0_reg[i.RType.rd]);
+            break;
+
+        case OP_DMT:
+            db_printf("dmtc0\t%s,%s",
+                reg_name[i.RType.rt],
+                c0_reg[i.RType.rd]);
+            break;
+
+        case OP_MF:
+            db_printf("mfc0\t%s,%s",
+                reg_name[i.RType.rt],
+                c0_reg[i.RType.rd]);
+            break;
+
+        case OP_DMF:
+            db_printf("dmfc0\t%s,%s",
+                reg_name[i.RType.rt],
+                c0_reg[i.RType.rd]);
+            break;
+
+        default:
+            db_printf("%s", c0_opname[i.FRType.func]);
+        }
+        break;
+
+    case OP_COP1:
+        switch (i.RType.rs) {
+        case OP_BCx:
+        case OP_BCy:
+            db_printf("bc1%c\t",
+                "ft"[i.RType.rt & COPz_BC_TF_MASK]);
+            goto pr_displ;
+
+        case OP_MT:
+            db_printf("mtc1\t%s,f%d",
+                reg_name[i.RType.rt],
+                i.RType.rd);
+            break;
+
+        case OP_MF:
+            db_printf("mfc1\t%s,f%d",
+                reg_name[i.RType.rt],
+                i.RType.rd);
+            break;
+
+        case OP_CT:
+            db_printf("ctc1\t%s,f%d",
+                reg_name[i.RType.rt],
+                i.RType.rd);
+            break;
+
+        case OP_CF:
+            db_printf("cfc1\t%s,f%d",
+                reg_name[i.RType.rt],
+                i.RType.rd);
+            break;
+
+        default:
+            db_printf("%s.%s\tf%d,f%d,f%d",
+                cop1_name[i.FRType.func],
+                fmt_name[i.FRType.fmt],
+                i.FRType.fd, i.FRType.fs, i.FRType.ft);
+        }
+        break;
+
+    case OP_J:
+    case OP_JAL:
+        db_printf("%s\t", op_name[i.JType.op]);
+        print_addr((loc & 0xF0000000) | (i.JType.target << 2));
+        bdslot = true;
+        break;
+
+    case OP_LWC1:
+    case OP_SWC1:
+        db_printf("%s\tf%d,", op_name[i.IType.op],
+            i.IType.rt);
+        goto loadstore;
+
+    case OP_LB:
+    case OP_LH:
+    case OP_LW:
+    case OP_LD:
+    case OP_LBU:
+    case OP_LHU:
+    case OP_LWU:
+    case OP_SB:
+    case OP_SH:
+    case OP_SW:
+    case OP_SD:
+        db_printf("%s\t%s,", op_name[i.IType.op],
+            reg_name[i.IType.rt]);
+    loadstore:
+        db_printf("%d(%s)", (short)i.IType.imm,
+            reg_name[i.IType.rs]);
+        break;
+
+    case OP_ORI:
+    case OP_XORI:
+        if (i.IType.rs == 0) {
+            db_printf("li\t%s,0x%x",
+                reg_name[i.IType.rt],
+                i.IType.imm);
+            break;
+        }
+        /* FALLTHROUGH */
+    case OP_ANDI:
+        db_printf("%s\t%s,%s,0x%x", op_name[i.IType.op],
+            reg_name[i.IType.rt],
+            reg_name[i.IType.rs],
+            i.IType.imm);
+        break;
+
+    case OP_LUI:
+        db_printf("%s\t%s,0x%x", op_name[i.IType.op],
+            reg_name[i.IType.rt],
+            i.IType.imm);
+        break;
+
+    case OP_CACHE:
+        db_printf("%s\t0x%x,0x%x(%s)",
+            op_name[i.IType.op],
+            i.IType.rt,
+            i.IType.imm,
+            reg_name[i.IType.rs]);
+        break;
+
+    case OP_ADDI:
+    case OP_DADDI:
+    case OP_ADDIU:
+    case OP_DADDIU:
+        if (i.IType.rs == 0) {
+            db_printf("li\t%s,%d",
+                reg_name[i.IType.rt],
+                (short)i.IType.imm);
+            break;
+        }
+        /* FALLTHROUGH */
+    default:
+        db_printf("%s\t%s,%s,%d", op_name[i.IType.op],
+            reg_name[i.IType.rt],
+            reg_name[i.IType.rs],
+            (short)i.IType.imm);
+    }
+    // db_printf("\n");
+    // if (bdslot) {
+    //     db_printf("   bd: ");
+    //     mips_disassem(loc+4);
+    //     return (loc + 8);
+    // }
+    return (loc + 4);
+}
+
+static void
+print_addr(db_addr_t loc)
+{
+    db_printf("0x%08x", loc);
+}
+
+
+
+static void db_printf(const char* fmt, ...)
+{
+    int cnt;
+    va_list argp;
+    va_start(argp, fmt);
+    if (sprintf_buffer) {
+        cnt = vsnprintf(sprintf_buffer, sprintf_buf_len, fmt, argp);
+        sprintf_buffer += cnt;
+        sprintf_buf_len -= cnt;
+    } else {
+        vprintf(fmt, argp);
+    }
+}
+
+
+/*
+ * Disassemble instruction at 'loc'.
+ * Return address of start of next instruction.
+ * Since this function is used by 'examine' and by 'step'
+ * "next instruction" does NOT mean the next instruction to
+ * be executed but the 'linear' next instruction.
+ */
+db_addr_t
+mips_disassem(db_addr_t loc, char *di_buffer, int alt_dis_format)
+{
+    u_int32_t instr;
+
+    if (alt_dis_format) {   // use ARM register names for disassembly
+        reg_name = &alt_arm_reg_name[0];
+    }
+
+    sprintf_buffer = di_buffer;     // quick 'n' dirty printf() vs sprintf()
+    sprintf_buf_len = 39;           // should be passed in
+
+    instr =  *(u_int32_t *)loc;
+    return (db_disasm_insn(instr, loc, false));
+}
+
diff --git a/libpixelflinger/codeflinger/mips_disassem.h b/libpixelflinger/codeflinger/mips_disassem.h
new file mode 100644
index 0000000..2d5b7f5
--- /dev/null
+++ b/libpixelflinger/codeflinger/mips_disassem.h
@@ -0,0 +1,66 @@
+/*  $NetBSD: db_disasm.c,v 1.19 2007/02/28 04:21:53 thorpej Exp $   */
+
+/*-
+ * Copyright (c) 1991, 1993
+ *  The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Ralph Campbell.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *  from: @(#)kadb.c    8.1 (Berkeley) 6/10/93
+ */
+
+
+
+#ifndef ANDROID_MIPS_DISASSEM_H
+#define ANDROID_MIPS_DISASSEM_H
+
+#include <sys/types.h>
+
+#if __cplusplus
+extern "C" {
+#endif
+
+
+// could add an interface like this, but I have not
+// typedef struct {
+//  u_int   (*di_readword)(u_int);
+//  void    (*di_printaddr)(u_int);
+//  void    (*di_printf)(const char *, ...);
+// } disasm_interface_t;
+
+/* Prototypes for callable functions */
+
+// u_int disasm(const disasm_interface_t *, u_int, int);
+
+void mips_disassem(uint32_t *location, char *di_buffer, int alt_fmt);
+
+#if __cplusplus
+}
+#endif
+
+#endif /* !ANDROID_MIPS_DISASSEM_H */
diff --git a/libpixelflinger/codeflinger/mips_opcode.h b/libpixelflinger/codeflinger/mips_opcode.h
new file mode 100644
index 0000000..7ed5ef5
--- /dev/null
+++ b/libpixelflinger/codeflinger/mips_opcode.h
@@ -0,0 +1,316 @@
+/*  $NetBSD: mips_opcode.h,v 1.12 2005/12/11 12:18:09 christos Exp $    */
+
+/*-
+ * Copyright (c) 1992, 1993
+ *  The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Ralph Campbell.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *  @(#)mips_opcode.h   8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Define the instruction formats and opcode values for the
+ * MIPS instruction set.
+ */
+
+#include <endian.h>
+
+/*
+ * Define the instruction formats.
+ */
+typedef union {
+    unsigned word;
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+    struct {
+        unsigned imm: 16;
+        unsigned rt: 5;
+        unsigned rs: 5;
+        unsigned op: 6;
+    } IType;
+
+    struct {
+        unsigned target: 26;
+        unsigned op: 6;
+    } JType;
+
+    struct {
+        unsigned func: 6;
+        unsigned shamt: 5;
+        unsigned rd: 5;
+        unsigned rt: 5;
+        unsigned rs: 5;
+        unsigned op: 6;
+    } RType;
+
+    struct {
+        unsigned func: 6;
+        unsigned fd: 5;
+        unsigned fs: 5;
+        unsigned ft: 5;
+        unsigned fmt: 4;
+        unsigned : 1;       /* always '1' */
+        unsigned op: 6;     /* always '0x11' */
+    } FRType;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+    struct {
+        unsigned op: 6;
+        unsigned rs: 5;
+        unsigned rt: 5;
+        unsigned imm: 16;
+    } IType;
+
+    struct {
+        unsigned op: 6;
+        unsigned target: 26;
+    } JType;
+
+    struct {
+        unsigned op: 6;
+        unsigned rs: 5;
+        unsigned rt: 5;
+        unsigned rd: 5;
+        unsigned shamt: 5;
+        unsigned func: 6;
+    } RType;
+
+    struct {
+        unsigned op: 6;     /* always '0x11' */
+        unsigned : 1;       /* always '1' */
+        unsigned fmt: 4;
+        unsigned ft: 5;
+        unsigned fs: 5;
+        unsigned fd: 5;
+        unsigned func: 6;
+    } FRType;
+#endif
+} InstFmt;
+
+/*
+ * Values for the 'op' field.
+ */
+#define OP_SPECIAL  000
+#define OP_BCOND    001
+#define OP_J        002
+#define OP_JAL      003
+#define OP_BEQ      004
+#define OP_BNE      005
+#define OP_BLEZ     006
+#define OP_BGTZ     007
+
+#define OP_ADDI     010
+#define OP_ADDIU    011
+#define OP_SLTI     012
+#define OP_SLTIU    013
+#define OP_ANDI     014
+#define OP_ORI      015
+#define OP_XORI     016
+#define OP_LUI      017
+
+#define OP_COP0     020
+#define OP_COP1     021
+#define OP_COP2     022
+#define OP_COP3     023
+#define OP_BEQL     024     /* MIPS-II, for r4000 port */
+#define OP_BNEL     025     /* MIPS-II, for r4000 port */
+#define OP_BLEZL    026     /* MIPS-II, for r4000 port */
+#define OP_BGTZL    027     /* MIPS-II, for r4000 port */
+
+#define OP_DADDI    030     /* MIPS-II, for r4000 port */
+#define OP_DADDIU   031     /* MIPS-II, for r4000 port */
+#define OP_LDL      032     /* MIPS-II, for r4000 port */
+#define OP_LDR      033     /* MIPS-II, for r4000 port */
+
+#define OP_SPECIAL2 034     /* QED opcodes */
+#define OP_SPECIAL3 037     /* mips32r2 opcodes */
+
+#define OP_LB       040
+#define OP_LH       041
+#define OP_LWL      042
+#define OP_LW       043
+#define OP_LBU      044
+#define OP_LHU      045
+#define OP_LWR      046
+#define OP_LHU      045
+#define OP_LWR      046
+#define OP_LWU      047     /* MIPS-II, for r4000 port */
+
+#define OP_SB       050
+#define OP_SH       051
+#define OP_SWL      052
+#define OP_SW       053
+#define OP_SDL      054     /* MIPS-II, for r4000 port */
+#define OP_SDR      055     /* MIPS-II, for r4000 port */
+#define OP_SWR      056
+#define OP_CACHE    057     /* MIPS-II, for r4000 port */
+
+#define OP_LL       060
+#define OP_LWC0     OP_LL   /* backwards source compatibility */
+#define OP_LWC1     061
+#define OP_LWC2     062
+#define OP_LWC3     063
+#define OP_LLD      064     /* MIPS-II, for r4000 port */
+#define OP_LDC1     065
+#define OP_LD       067     /* MIPS-II, for r4000 port */
+
+#define OP_SC       070
+#define OP_SWC0     OP_SC   /* backwards source compatibility */
+#define OP_SWC1     071
+#define OP_SWC2     072
+#define OP_SWC3     073
+#define OP_SCD      074     /* MIPS-II, for r4000 port */
+#define OP_SDC1     075
+#define OP_SD       077     /* MIPS-II, for r4000 port */
+
+/*
+ * Values for the 'func' field when 'op' == OP_SPECIAL.
+ */
+#define OP_SLL      000
+#define OP_SRL      002
+#define OP_SRA      003
+#define OP_SLLV     004
+#define OP_SRLV     006
+#define OP_SRAV     007
+
+#define OP_JR       010
+#define OP_JALR     011
+#define OP_SYSCALL  014
+#define OP_BREAK    015
+#define OP_SYNC     017     /* MIPS-II, for r4000 port */
+
+#define OP_MFHI     020
+#define OP_MTHI     021
+#define OP_MFLO     022
+#define OP_MTLO     023
+#define OP_DSLLV    024     /* MIPS-II, for r4000 port */
+#define OP_DSRLV    026     /* MIPS-II, for r4000 port */
+#define OP_DSRAV    027     /* MIPS-II, for r4000 port */
+
+#define OP_MULT     030
+#define OP_MULTU    031
+#define OP_DIV      032
+#define OP_DIVU     033
+#define OP_DMULT    034     /* MIPS-II, for r4000 port */
+#define OP_DMULTU   035     /* MIPS-II, for r4000 port */
+#define OP_DDIV     036     /* MIPS-II, for r4000 port */
+#define OP_DDIVU    037     /* MIPS-II, for r4000 port */
+
+#define OP_ADD      040
+#define OP_ADDU     041
+#define OP_SUB      042
+#define OP_SUBU     043
+#define OP_AND      044
+#define OP_OR       045
+#define OP_XOR      046
+#define OP_NOR      047
+
+#define OP_SLT      052
+#define OP_SLTU     053
+#define OP_DADD     054     /* MIPS-II, for r4000 port */
+#define OP_DADDU    055     /* MIPS-II, for r4000 port */
+#define OP_DSUB     056     /* MIPS-II, for r4000 port */
+#define OP_DSUBU    057     /* MIPS-II, for r4000 port */
+
+#define OP_TGE      060     /* MIPS-II, for r4000 port */
+#define OP_TGEU     061     /* MIPS-II, for r4000 port */
+#define OP_TLT      062     /* MIPS-II, for r4000 port */
+#define OP_TLTU     063     /* MIPS-II, for r4000 port */
+#define OP_TEQ      064     /* MIPS-II, for r4000 port */
+#define OP_TNE      066     /* MIPS-II, for r4000 port */
+
+#define OP_DSLL     070     /* MIPS-II, for r4000 port */
+#define OP_DSRL     072     /* MIPS-II, for r4000 port */
+#define OP_DSRA     073     /* MIPS-II, for r4000 port */
+#define OP_DSLL32   074     /* MIPS-II, for r4000 port */
+#define OP_DSRL32   076     /* MIPS-II, for r4000 port */
+#define OP_DSRA32   077     /* MIPS-II, for r4000 port */
+
+/*
+ * Values for the 'func' field when 'op' == OP_SPECIAL2.
+ */
+#define OP_MAD      000     /* QED */
+#define OP_MADU     001     /* QED */
+#define OP_MUL      002     /* QED */
+
+/*
+ * Values for the 'func' field when 'op' == OP_SPECIAL3.
+ */
+#define OP_EXT      000
+#define OP_INS      004
+#define OP_BSHFL    040
+
+/*
+ * Values for the 'shamt' field when OP_SPECIAL3 && func OP_BSHFL.
+ */
+#define OP_WSBH     002
+#define OP_SEB      020
+#define OP_SEH      030
+
+/*
+ * Values for the 'func' field when 'op' == OP_BCOND.
+ */
+#define OP_BLTZ     000
+#define OP_BGEZ     001
+#define OP_BLTZL    002     /* MIPS-II, for r4000 port */
+#define OP_BGEZL    003     /* MIPS-II, for r4000 port */
+
+#define OP_TGEI     010     /* MIPS-II, for r4000 port */
+#define OP_TGEIU    011     /* MIPS-II, for r4000 port */
+#define OP_TLTI     012     /* MIPS-II, for r4000 port */
+#define OP_TLTIU    013     /* MIPS-II, for r4000 port */
+#define OP_TEQI     014     /* MIPS-II, for r4000 port */
+#define OP_TNEI     016     /* MIPS-II, for r4000 port */
+
+#define OP_BLTZAL   020     /* MIPS-II, for r4000 port */
+#define OP_BGEZAL   021
+#define OP_BLTZALL  022
+#define OP_BGEZALL  023
+
+/*
+ * Values for the 'rs' field when 'op' == OP_COPz.
+ */
+#define OP_MF       000
+#define OP_DMF      001     /* MIPS-II, for r4000 port */
+#define OP_MT       004
+#define OP_DMT      005     /* MIPS-II, for r4000 port */
+#define OP_BCx      010
+#define OP_BCy      014
+#define OP_CF       002
+#define OP_CT       006
+
+/*
+ * Values for the 'rt' field when 'op' == OP_COPz.
+ */
+#define COPz_BC_TF_MASK 0x01
+#define COPz_BC_TRUE    0x01
+#define COPz_BC_FALSE   0x00
+#define COPz_BCL_TF_MASK    0x02        /* MIPS-II, for r4000 port */
+#define COPz_BCL_TRUE   0x02        /* MIPS-II, for r4000 port */
+#define COPz_BCL_FALSE  0x00        /* MIPS-II, for r4000 port */
diff --git a/libpixelflinger/codeflinger/texturing.cpp b/libpixelflinger/codeflinger/texturing.cpp
index 8464fbd..4d5a50f 100644
--- a/libpixelflinger/codeflinger/texturing.cpp
+++ b/libpixelflinger/codeflinger/texturing.cpp
@@ -464,6 +464,9 @@ void GGLAssembler::build_textures(  fragment_parts_t& parts,
                 CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]);
             }
 
+            if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
+                return;
+
             comment("compute repeat/clamp");
             int u       = scratches.obtain();
             int v       = scratches.obtain();
@@ -472,6 +475,9 @@ void GGLAssembler::build_textures(  fragment_parts_t& parts,
             int U = 0;
             int V = 0;
 
+            if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
+                return;
+
             CONTEXT_LOAD(width,  generated_vars.texture[i].width);
             CONTEXT_LOAD(height, generated_vars.texture[i].height);
 
@@ -510,6 +516,9 @@ void GGLAssembler::build_textures(  fragment_parts_t& parts,
                 U = scratches.obtain();
                 V = scratches.obtain();
 
+                if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
+                    return;
+
                 // sample the texel center
                 SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1)));
                 SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1)));
@@ -593,6 +602,10 @@ void GGLAssembler::build_textures(  fragment_parts_t& parts,
             comment("iterate s,t");
             int dsdx = scratches.obtain();
             int dtdx = scratches.obtain();
+
+            if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
+                return;
+
             CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);
             CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);
             ADD(AL, 0, s.reg, s.reg, dsdx);
@@ -611,6 +624,10 @@ void GGLAssembler::build_textures(  fragment_parts_t& parts,
             texel.setTo(regs.obtain(), &tmu.format);
             txPtr.setTo(texel.reg, tmu.bits);
             int stride = scratches.obtain();
+
+            if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS)
+                return;
+
             CONTEXT_LOAD(stride,    generated_vars.texture[i].stride);
             CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
             SMLABB(AL, u, v, stride, u);    // u+v*stride 
@@ -1078,6 +1095,7 @@ void GGLAssembler::build_texture_environment(
 
                 Scratch scratches(registerFile());
                 pixel_t texel(parts.texel[i]);
+
                 if (multiTexture && 
                     tmu.swrap == GGL_NEEDS_WRAP_11 &&
                     tmu.twrap == GGL_NEEDS_WRAP_11)
diff --git a/libpixelflinger/scanline.cpp b/libpixelflinger/scanline.cpp
index d1f3d96..a5d28b2 100644
--- a/libpixelflinger/scanline.cpp
+++ b/libpixelflinger/scanline.cpp
@@ -32,6 +32,9 @@
 #include "codeflinger/CodeCache.h"
 #include "codeflinger/GGLAssembler.h"
 #include "codeflinger/ARMAssembler.h"
+#if defined(__mips__)
+#include "codeflinger/MIPSAssembler.h"
+#endif
 //#include "codeflinger/ARMAssemblerOptimizer.h"
 
 // ----------------------------------------------------------------------------
@@ -49,7 +52,7 @@
 #   define ANDROID_CODEGEN      ANDROID_CODEGEN_GENERATED
 #endif
 
-#if defined(__arm__)
+#if defined(__arm__) || defined(__mips__)
 #   define ANDROID_ARM_CODEGEN  1
 #else
 #   define ANDROID_ARM_CODEGEN  0
@@ -63,7 +66,11 @@
  */
 #define DEBUG_NEEDS  0
 
+#ifdef __mips__
+#define ASSEMBLY_SCRATCH_SIZE   4096
+#else
 #define ASSEMBLY_SCRATCH_SIZE   2048
+#endif
 
 // ----------------------------------------------------------------------------
 namespace android {
@@ -266,7 +273,12 @@ static  const needs_filter_t fill16noblend = {
 // ----------------------------------------------------------------------------
 
 #if ANDROID_ARM_CODEGEN
+
+#if defined(__mips__)
+static CodeCache gCodeCache(32 * 1024);
+#else
 static CodeCache gCodeCache(12 * 1024);
+#endif
 
 class ScanlineAssembly : public Assembly {
     AssemblyKey<needs_t> mKey;
@@ -375,9 +387,14 @@ static void pick_scanline(context_t* c)
         sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs, 
                 ASSEMBLY_SCRATCH_SIZE);
         // initialize our assembler
+#if defined(__arm__)
         GGLAssembler assembler( new ARMAssembler(a) );
         //GGLAssembler assembler(
         //        new ARMAssemblerOptimizer(new ARMAssembler(a)) );
+#endif
+#if defined(__mips__)
+        GGLAssembler assembler( new ArmToMipsAssembler(a) );
+#endif
         // generate the scanline code for the given needs
         int err = assembler.scanline(c->state.needs, c);
         if (ggl_likely(!err)) {
diff --git a/libpixelflinger/tests/codegen/codegen.cpp b/libpixelflinger/tests/codegen/codegen.cpp
index 94e2481..3d5a040 100644
--- a/libpixelflinger/tests/codegen/codegen.cpp
+++ b/libpixelflinger/tests/codegen/codegen.cpp
@@ -9,14 +9,19 @@
 #include "codeflinger/CodeCache.h"
 #include "codeflinger/GGLAssembler.h"
 #include "codeflinger/ARMAssembler.h"
+#include "codeflinger/MIPSAssembler.h"
 
-#if defined(__arm__)
+#if defined(__arm__) || defined(__mips__)
 #   define ANDROID_ARM_CODEGEN  1
 #else
 #   define ANDROID_ARM_CODEGEN  0
 #endif
 
+#if defined (__mips__)
+#define ASSEMBLY_SCRATCH_SIZE   4096
+#else
 #define ASSEMBLY_SCRATCH_SIZE   2048
+#endif
 
 using namespace android;
 
@@ -39,14 +44,22 @@ static void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1)
     needs.t[0] = t0;
     needs.t[1] = t1;
     sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE));
+
+#if defined(__arm__)
     GGLAssembler assembler( new ARMAssembler(a) );
+#endif
+
+#if defined(__mips__)
+    GGLAssembler assembler( new ArmToMipsAssembler(a) );
+#endif
+
     int err = assembler.scanline(needs, (context_t*)c);
     if (err != 0) {
         printf("error %08x (%s)\n", err, strerror(-err));
     }
     gglUninit(c);
 #else
-    printf("This test runs only on ARM\n");
+    printf("This test runs only on ARM or MIPS\n");
 #endif
 }
 
-- 
cgit v1.1