Pixelflinger: Support for handling 64-bit addresses in GGL Assembler

GGLAssembler assumes addresses to be 32-bit and uses ARM 32-bit instructions to load/store/manipulate addresses. To support, 64-bit architectures, following changes has been done 1. ARMAssemblerInterface has been extended to support four new operations ADDR_LDR, ADDR_STR, ADDR_SUB, ADDR_ADD. Base class implements these virtual functions to use 32bit equivalent function. This avoids existing 32-bit Assembler backend implementations like ARMAssembler and MIPSAssembler from mapping the new functions to existing equivalent routines. This also allows 64-bit Architectures like AArch64 to override the function in their assembler backend implementations. 2. GGLAssembler code (spread over GGLAssembler.cpp, GGLAssembler.h and texturing.cpp) has been changed to use the new operations for address operations. Change-Id: I3d7eace4691e3e47cef737d97ac67ce6ef4fb18d Signed-off-by: Ashok Bhat <ashok.bhat@arm.com>
author: Ashok Bhat <ashok.bhat@arm.com> 2013-02-21 10:27:40 +0000
committer: David Butcher <david.butcher@arm.com> 2013-12-12 17:30:13 +0000
commit: bfc6dc4ca8d5a1ee498e80a18350b91519a5412c (patch)
tree: 9df6c0b02646b3635ac04a0aa7623be572997ab6
parent: d10afb17486f87a56baf97a893b4e4533eb5b851 (diff)
download: system_core-bfc6dc4ca8d5a1ee498e80a18350b91519a5412c.zip
system_core-bfc6dc4ca8d5a1ee498e80a18350b91519a5412c.tar.gz
system_core-bfc6dc4ca8d5a1ee498e80a18350b91519a5412c.tar.bz2
7 files changed, 80 insertions, 19 deletions
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
index 073633c..5041999 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
@@ -61,6 +61,29 @@ uint32_t ARMAssemblerInterface::__immed8_pre(int32_t immed8, int W)
             ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF));
 }
 
+// The following four functions are required for address manipulation
+// These are virtual functions, which can be overridden by architectures
+// that need special handling of address values (e.g. 64-bit arch)
 
+void ARMAssemblerInterface::ADDR_LDR(int cc, int Rd,
+     int Rn, uint32_t offset)
+{
+    LDR(cc, Rd, Rn, offset);
+}
+void ARMAssemblerInterface::ADDR_STR(int cc, int Rd,
+     int Rn, uint32_t offset)
+{
+    STR(cc, Rd, Rn, offset);
+}
+void ARMAssemblerInterface::ADDR_ADD(int cc, int s,
+     int Rd, int Rn, uint32_t Op2)
+{
+    dataProcessing(opADD, cc, s, Rd, Rn, Op2);
+}
+void ARMAssemblerInterface::ADDR_SUB(int cc, int s,
+     int Rd, int Rn, uint32_t Op2)
+{
+    dataProcessing(opSUB, cc, s, Rd, Rn, Op2);
+}
 }; // namespace android
 
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
index 9991980..e5a9a26 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerInterface.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
@@ -331,6 +331,16 @@ public:
     inline void
     SMLAWT(int cc, int Rd, int Rm, int Rs, int Rn) {
         SMLAW(cc, yT, Rd, Rm, Rs, Rn);    }
+
+    // Address loading/storing/manipulation
+    virtual void ADDR_LDR(int cc, int Rd,
+                int Rn, uint32_t offset = __immed12_pre(0));
+    virtual void ADDR_STR (int cc, int Rd,
+                int Rn, uint32_t offset = __immed12_pre(0));
+    virtual void ADDR_ADD(int cc, int s, int Rd,
+                int Rn, uint32_t Op2);
+    virtual void ADDR_SUB(int cc, int s, int Rd,
+                int Rn, uint32_t Op2);
 };
 
 }; // namespace android
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
index 1c7bc76..816de48 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
@@ -294,5 +294,18 @@ void ARMAssemblerProxy::UBFX(int cc, int Rd, int Rn, int lsb, int width) {
     mTarget->UBFX(cc, Rd, Rn, lsb, width);
 }
 
+void ARMAssemblerProxy::ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset) {
+     mTarget->ADDR_LDR(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::ADDR_STR(int cc, int Rd, int Rn, uint32_t offset) {
+     mTarget->ADDR_STR(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2){
+     mTarget->ADDR_ADD(cc, s, Rd, Rn, Op2);
+}
+void ARMAssemblerProxy::ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2){
+     mTarget->ADDR_SUB(cc, s, Rd, Rn, Op2);
+}
+
 }; // namespace android
 
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.h b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
index 70cb464..b852794 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerProxy.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
@@ -146,6 +146,15 @@ public:
     virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
     virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width);
 
+    virtual void ADDR_LDR(int cc, int Rd,
+                int Rn, uint32_t offset = __immed12_pre(0));
+    virtual void ADDR_STR (int cc, int Rd,
+                int Rn, uint32_t offset = __immed12_pre(0));
+    virtual void ADDR_ADD(int cc, int s, int Rd,
+                int Rn, uint32_t Op2);
+    virtual void ADDR_SUB(int cc, int s, int Rd,
+                int Rn, uint32_t Op2);
+
 private:
     ARMAssemblerInterface*  mTarget;
 };
diff --git a/libpixelflinger/codeflinger/GGLAssembler.cpp b/libpixelflinger/codeflinger/GGLAssembler.cpp
index 0cb042e..725495f 100644
--- a/libpixelflinger/codeflinger/GGLAssembler.cpp
+++ b/libpixelflinger/codeflinger/GGLAssembler.cpp
@@ -263,7 +263,7 @@ int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
                 const int mask = GGL_DITHER_SIZE-1;
                 parts.dither = reg_t(regs.obtain());
                 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
-                ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
+                ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg);
                 LDRB(AL, parts.dither.reg, parts.dither.reg,
                         immed12_pre(GGL_OFFSETOF(ditherMatrix)));
             }
@@ -336,7 +336,7 @@ int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
         build_iterate_z(parts);
         build_iterate_f(parts);
         if (!mAllMasked) {
-            ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
+            ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
         }
         SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
         B(PL, "fragment_loop");
@@ -392,7 +392,7 @@ void GGLAssembler::build_scanline_prolog(
         int Rs = scratches.obtain();
         parts.cbPtr.setTo(obtainReg(), cb_bits);
         CONTEXT_LOAD(Rs, state.buffers.color.stride);
-        CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
+        CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data);
         SMLABB(AL, Rs, Ry, Rs, Rx);  // Rs = Rx + Ry*Rs
         base_offset(parts.cbPtr, parts.cbPtr, Rs);
         scratches.recycle(Rs);
@@ -428,11 +428,11 @@ void GGLAssembler::build_scanline_prolog(
         int Rs = dzdx;
         int zbase = scratches.obtain();
         CONTEXT_LOAD(Rs, state.buffers.depth.stride);
-        CONTEXT_LOAD(zbase, state.buffers.depth.data);
+        CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data);
         SMLABB(AL, Rs, Ry, Rs, Rx);
         ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
-        ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
-        CONTEXT_STORE(zbase, generated_vars.zbase);
+        ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
+        CONTEXT_ADDR_STORE(zbase, generated_vars.zbase);
     }
 
     // init texture coordinates
@@ -445,8 +445,8 @@ void GGLAssembler::build_scanline_prolog(
     // init coverage factor application (anti-aliasing)
     if (mAA) {
         parts.covPtr.setTo(obtainReg(), 16);
-        CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
-        ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
+        CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage);
+        ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
     }
 }
 
@@ -765,8 +765,8 @@ void GGLAssembler::build_depth_test(
         int depth = scratches.obtain();
         int z = parts.z.reg;
         
-        CONTEXT_LOAD(zbase, generated_vars.zbase);  // stall
-        SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
+        CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase);  // stall
+        ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
             // above does zbase = zbase + ((count >> 16) << 1)
 
         if (mask & Z_TEST) {
@@ -990,22 +990,22 @@ void GGLAssembler::base_offset(
 {
     switch (b.size) {
     case 32:
-        ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
+        ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
         break;
     case 24:
         if (d.reg == b.reg) {
-            ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
-            ADD(AL, 0, d.reg, d.reg, o.reg);
+            ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
+            ADDR_ADD(AL, 0, d.reg, d.reg, o.reg);
         } else {
-            ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
-            ADD(AL, 0, d.reg, d.reg, b.reg);
+            ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
+            ADDR_ADD(AL, 0, d.reg, d.reg, b.reg);
         }
         break;
     case 16:
-        ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
+        ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
         break;
     case 8:
-        ADD(AL, 0, d.reg, b.reg, o.reg);
+        ADDR_ADD(AL, 0, d.reg, b.reg, o.reg);
         break;
     }
 }
diff --git a/libpixelflinger/codeflinger/GGLAssembler.h b/libpixelflinger/codeflinger/GGLAssembler.h
index d993684..9db20df 100644
--- a/libpixelflinger/codeflinger/GGLAssembler.h
+++ b/libpixelflinger/codeflinger/GGLAssembler.h
@@ -31,6 +31,12 @@ namespace android {
 
 // ----------------------------------------------------------------------------
 
+#define CONTEXT_ADDR_LOAD(REG, FIELD) \
+    ADDR_LDR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
+
+#define CONTEXT_ADDR_STORE(REG, FIELD) \
+    ADDR_STR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
+
 #define CONTEXT_LOAD(REG, FIELD) \
     LDR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
 
diff --git a/libpixelflinger/codeflinger/texturing.cpp b/libpixelflinger/codeflinger/texturing.cpp
index 9e3d217..b2cfbb3 100644
--- a/libpixelflinger/codeflinger/texturing.cpp
+++ b/libpixelflinger/codeflinger/texturing.cpp
@@ -356,7 +356,7 @@ void GGLAssembler::init_textures(
             // merge base & offset
             CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride);
             SMLABB(AL, Rx, Ry, txPtr.reg, Rx);               // x+y*stride
-            CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
+            CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
             base_offset(txPtr, txPtr, Rx);
         } else {
             Scratch scratches(registerFile());
@@ -629,7 +629,7 @@ void GGLAssembler::build_textures(  fragment_parts_t& parts,
                 return;
 
             CONTEXT_LOAD(stride,    generated_vars.texture[i].stride);
-            CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
+            CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
             SMLABB(AL, u, v, stride, u);    // u+v*stride 
             base_offset(txPtr, txPtr, u);
author	Ashok Bhat <ashok.bhat@arm.com>	2013-02-21 10:27:40 +0000
committer	David Butcher <david.butcher@arm.com>	2013-12-12 17:30:13 +0000
commit	bfc6dc4ca8d5a1ee498e80a18350b91519a5412c (patch)
tree	9df6c0b02646b3635ac04a0aa7623be572997ab6
parent	d10afb17486f87a56baf97a893b4e4533eb5b851 (diff)
download	system_core-bfc6dc4ca8d5a1ee498e80a18350b91519a5412c.zip system_core-bfc6dc4ca8d5a1ee498e80a18350b91519a5412c.tar.gz system_core-bfc6dc4ca8d5a1ee498e80a18350b91519a5412c.tar.bz2