diff options
25 files changed, 4244 insertions, 12 deletions
diff --git a/include/private/pixelflinger/ggl_fixed.h b/include/private/pixelflinger/ggl_fixed.h index 217ec04..d0493f3 100644 --- a/include/private/pixelflinger/ggl_fixed.h +++ b/include/private/pixelflinger/ggl_fixed.h @@ -457,6 +457,69 @@ inline int64_t gglMulii(int32_t x, int32_t y) { return u.res; } +#elif defined(__aarch64__) + +// inline AArch64 implementations + +inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST; +inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) +{ + GGLfixed result; + GGLfixed round; + + asm("mov %x[round], #1 \n" + "lsl %x[round], %x[round], %x[shift] \n" + "lsr %x[round], %x[round], #1 \n" + "smaddl %x[result], %w[x], %w[y],%x[round] \n" + "lsr %x[result], %x[result], %x[shift] \n" + : [round]"=&r"(round), [result]"=&r"(result) \ + : [x]"r"(x), [y]"r"(y), [shift] "r"(shift) \ + : + ); + return result; +} +inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST; +inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) +{ + GGLfixed result; + asm("smull %x[result], %w[x], %w[y] \n" + "lsr %x[result], %x[result], %x[shift] \n" + "add %w[result], %w[result], %w[a] \n" + : [result]"=&r"(result) \ + : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \ + : + ); + return result; +} + +inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST; +inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) +{ + + GGLfixed result; + int rshift; + + asm("smull %x[result], %w[x], %w[y] \n" + "lsr %x[result], %x[result], %x[shift] \n" + "sub %w[result], %w[result], %w[a] \n" + : [result]"=&r"(result) \ + : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \ + : + ); + return result; +} +inline int64_t gglMulii(int32_t x, int32_t y) CONST; +inline int64_t gglMulii(int32_t x, int32_t y) +{ + int64_t res; + asm("smull %x0, %w1, %w2 \n" + : "=r"(res) + : "%r"(x), "r"(y) + : + ); + return res; +} + #else // ---------------------------------------------------------------------- inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST; @@ -498,7 +561,7 @@ inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) { inline int32_t gglClz(int32_t x) CONST; inline int32_t gglClz(int32_t x) { -#if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) +#if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) || defined(__aarch64__) return __builtin_clz(x); #else if (!x) return 32; @@ -554,6 +617,8 @@ inline GGLfixed gglClampx(GGLfixed c) // clamps to zero in one instruction, but gcc won't generate it and // replace it by a cmp + movlt (it's quite amazing actually). asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c)); +#elif defined(__aarch64__) + asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c)); #else c &= ~(c>>31); #endif diff --git a/libpixelflinger/Android.mk b/libpixelflinger/Android.mk index 7f20e5b..0f502c0 100644 --- a/libpixelflinger/Android.mk +++ b/libpixelflinger/Android.mk @@ -9,13 +9,11 @@ include $(CLEAR_VARS) PIXELFLINGER_SRC_FILES:= \ codeflinger/ARMAssemblerInterface.cpp \ codeflinger/ARMAssemblerProxy.cpp \ - codeflinger/ARMAssembler.cpp \ codeflinger/CodeCache.cpp \ codeflinger/GGLAssembler.cpp \ codeflinger/load_store.cpp \ codeflinger/blending.cpp \ codeflinger/texturing.cpp \ - codeflinger/disassem.c \ codeflinger/tinyutils/SharedBuffer.cpp \ codeflinger/tinyutils/VectorImpl.cpp \ fixed.cpp.arm \ @@ -39,6 +37,8 @@ endif endif ifeq ($(TARGET_ARCH),arm) +PIXELFLINGER_SRC_FILES += codeflinger/ARMAssembler.cpp +PIXELFLINGER_SRC_FILES += codeflinger/disassem.c # special optimization flags for pixelflinger PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer endif @@ -52,6 +52,14 @@ endif LOCAL_SHARED_LIBRARIES := libcutils liblog +ifeq ($(TARGET_ARCH),aarch64) +PIXELFLINGER_SRC_FILES += arch-aarch64/t32cb16blend.S +PIXELFLINGER_SRC_FILES += arch-aarch64/col32cb16blend.S +PIXELFLINGER_SRC_FILES += codeflinger/Aarch64Assembler.cpp +PIXELFLINGER_SRC_FILES += codeflinger/Aarch64Disassembler.cpp +PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer +endif + # # Shared library # diff --git a/libpixelflinger/arch-aarch64/col32cb16blend.S b/libpixelflinger/arch-aarch64/col32cb16blend.S new file mode 100644 index 0000000..aa969a4 --- /dev/null +++ b/libpixelflinger/arch-aarch64/col32cb16blend.S @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + .text + .align + + .global scanline_col32cb16blend_aarch64 + +// +// This function alpha blends a fixed color into a destination scanline, using +// the formula: +// +// d = s + (((a + (a >> 7)) * d) >> 8) +// +// where d is the destination pixel, +// s is the source color, +// a is the alpha channel of the source color. +// + +// x0 = destination buffer pointer +// w1 = color value +// w2 = count + + +scanline_col32cb16blend_aarch64: + + lsr w5, w1, #24 // shift down alpha + mov w9, #0xff // create mask + add w5, w5, w5, lsr #7 // add in top bit + mov w4, #256 // create #0x100 + sub w5, w4, w5 // invert alpha + and w10, w1, #0xff // extract red + and w12, w9, w1, lsr #8 // extract green + and w4, w9, w1, lsr #16 // extract blue + lsl w10, w10, #5 // prescale red + lsl w12, w12, #6 // prescale green + lsl w4, w4, #5 // prescale blue + lsr w9, w9, #2 // create dest green mask + +1: + ldrh w8, [x0] // load dest pixel + subs w2, w2, #1 // decrement loop counter + lsr w6, w8, #11 // extract dest red + and w7, w9, w8, lsr #5 // extract dest green + and w8, w8, #0x1f // extract dest blue + + madd w6, w6, w5, w10 // dest red * alpha + src red + madd w7, w7, w5, w12 // dest green * alpha + src green + madd w8, w8, w5, w4 // dest blue * alpha + src blue + + lsr w6, w6, #8 // shift down red + lsr w7, w7, #8 // shift down green + lsl w6, w6, #11 // shift red into 565 + orr w6, w6, w7, lsl #5 // shift green into 565 + orr w6, w6, w8, lsr #8 // shift blue into 565 + + strh w6, [x0], #2 // store pixel to dest, update ptr + b.ne 1b // if count != 0, loop + + ret + + + diff --git a/libpixelflinger/arch-aarch64/t32cb16blend.S b/libpixelflinger/arch-aarch64/t32cb16blend.S new file mode 100644 index 0000000..b62ed36 --- /dev/null +++ b/libpixelflinger/arch-aarch64/t32cb16blend.S @@ -0,0 +1,213 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + .text + .align + + .global scanline_t32cb16blend_aarch64 + +/* + * .macro pixel + * + * This macro alpha blends RGB565 original pixel located in either + * top or bottom 16 bits of DREG register with SRC 32 bit pixel value + * and writes the result to FB register + * + * \DREG is a 32-bit register containing *two* original destination RGB565 + * pixels, with the even one in the low-16 bits, and the odd one in the + * high 16 bits. + * + * \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors. + * + * \FB is a target register that will contain the blended pixel values. + * + * \ODD is either 0 or 1 and indicates if we're blending the lower or + * upper 16-bit pixels in DREG into FB + * + * + * clobbered: w6, w7, w16, w17, w18 + * + */ + +.macro pixel, DREG, SRC, FB, ODD + + // SRC = 0xAABBGGRR + lsr w7, \SRC, #24 // sA + add w7, w7, w7, lsr #7 // sA + (sA >> 7) + mov w6, #0x100 + sub w7, w6, w7 // sA = 0x100 - (sA+(sA>>7)) + +1: + +.if \ODD //Blending odd pixel present in top 16 bits of DREG register + + // red + lsr w16, \DREG, #(16 + 11) + mul w16, w7, w16 + lsr w6, \SRC, #3 + and w6, w6, #0x1F + add w16, w6, w16, lsr #8 + cmp w16, #0x1F + orr w17, \FB, #(0x1F<<(16 + 11)) + orr w18, \FB, w16, lsl #(16 + 11) + csel \FB, w17, w18, hi + // green + and w6, \DREG, #(0x3F<<(16 + 5)) + lsr w17,w6,#(16+5) + mul w6, w7, w17 + lsr w16, \SRC, #(8+2) + and w16, w16, #0x3F + add w6, w16, w6, lsr #8 + cmp w6, #0x3F + orr w17, \FB, #(0x3F<<(16 + 5)) + orr w18, \FB, w6, lsl #(16 + 5) + csel \FB, w17, w18, hi + // blue + and w16, \DREG, #(0x1F << 16) + lsr w17,w16,#16 + mul w16, w7, w17 + lsr w6, \SRC, #(8+8+3) + and w6, w6, #0x1F + add w16, w6, w16, lsr #8 + cmp w16, #0x1F + orr w17, \FB, #(0x1F << 16) + orr w18, \FB, w16, lsl #16 + csel \FB, w17, w18, hi + +.else //Blending even pixel present in bottom 16 bits of DREG register + + // red + lsr w16, \DREG, #11 + and w16, w16, #0x1F + mul w16, w7, w16 + lsr w6, \SRC, #3 + and w6, w6, #0x1F + add w16, w6, w16, lsr #8 + cmp w16, #0x1F + mov w17, #(0x1F<<11) + lsl w18, w16, #11 + csel \FB, w17, w18, hi + + + // green + and w6, \DREG, #(0x3F<<5) + mul w6, w7, w6 + lsr w16, \SRC, #(8+2) + and w16, w16, #0x3F + add w6, w16, w6, lsr #(5+8) + cmp w6, #0x3F + orr w17, \FB, #(0x3F<<5) + orr w18, \FB, w6, lsl #5 + csel \FB, w17, w18, hi + + // blue + and w16, \DREG, #0x1F + mul w16, w7, w16 + lsr w6, \SRC, #(8+8+3) + and w6, w6, #0x1F + add w16, w6, w16, lsr #8 + cmp w16, #0x1F + orr w17, \FB, #0x1F + orr w18, \FB, w16 + csel \FB, w17, w18, hi + +.endif // End of blending even pixel + +.endm // End of pixel macro + + +// x0: dst ptr +// x1: src ptr +// w2: count +// w3: d +// w4: s0 +// w5: s1 +// w6: pixel +// w7: pixel +// w8: free +// w9: free +// w10: free +// w11: free +// w12: scratch +// w14: pixel + +scanline_t32cb16blend_aarch64: + + // align DST to 32 bits + tst x0, #0x3 + b.eq aligned + subs w2, w2, #1 + b.lo return + +last: + ldr w4, [x1], #4 + ldrh w3, [x0] + pixel w3, w4, w12, 0 + strh w12, [x0], #2 + +aligned: + subs w2, w2, #2 + b.lo 9f + + // The main loop is unrolled twice and processes 4 pixels +8: + ldp w4,w5, [x1], #8 + add x0, x0, #4 + // it's all zero, skip this pixel + orr w3, w4, w5 + cbz w3, 7f + + // load the destination + ldr w3, [x0, #-4] + // stream the destination + pixel w3, w4, w12, 0 + pixel w3, w5, w12, 1 + str w12, [x0, #-4] + + // 2nd iteration of the loop, don't stream anything + subs w2, w2, #2 + csel w4, w5, w4, lt + blt 9f + ldp w4,w5, [x1], #8 + add x0, x0, #4 + orr w3, w4, w5 + cbz w3, 7f + ldr w3, [x0, #-4] + pixel w3, w4, w12, 0 + pixel w3, w5, w12, 1 + str w12, [x0, #-4] + +7: subs w2, w2, #2 + bhs 8b + mov w4, w5 + +9: adds w2, w2, #1 + b.lo return + b last + +return: + ret diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h index e5a9a26..6e0d7c6 100644 --- a/libpixelflinger/codeflinger/ARMAssemblerInterface.h +++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.h @@ -63,7 +63,7 @@ public: }; enum { - CODEGEN_ARCH_ARM = 1, CODEGEN_ARCH_MIPS + CODEGEN_ARCH_ARM = 1, CODEGEN_ARCH_MIPS, CODEGEN_ARCH_AARCH64 }; // ----------------------------------------------------------------------- diff --git a/libpixelflinger/codeflinger/Aarch64Assembler.cpp b/libpixelflinger/codeflinger/Aarch64Assembler.cpp new file mode 100644 index 0000000..0e4f7df --- /dev/null +++ b/libpixelflinger/codeflinger/Aarch64Assembler.cpp @@ -0,0 +1,1242 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define LOG_TAG "ArmToAarch64Assembler" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <cutils/log.h> +#include <cutils/properties.h> +#include <private/pixelflinger/ggl_context.h> + +#include "codeflinger/Aarch64Assembler.h" +#include "codeflinger/CodeCache.h" +#include "codeflinger/Aarch64Disassembler.h" + + +/* +** -------------------------------------------- +** Support for Aarch64 in GGLAssembler JIT +** -------------------------------------------- +** +** Approach +** - GGLAssembler and associated files are largely un-changed. +** - A translator class maps ArmAssemblerInterface calls to +** generate AArch64 instructions. +** +** ---------------------- +** ArmToAarch64Assembler +** ---------------------- +** +** - Subclassed from ArmAssemblerInterface +** +** - Translates each ArmAssemblerInterface call to generate +** one or more Aarch64 instructions as necessary. +** +** - Does not implement ArmAssemblerInterface portions unused by GGLAssembler +** It calls NOT_IMPLEMENTED() for such cases, which in turn logs +** a fatal message. +** +** - Uses A64_.. series of functions to generate instruction machine code +** for Aarch64 instructions. These functions also log the instruction +** to LOG, if AARCH64_ASM_DEBUG define is set to 1 +** +** - Dumps machine code and eqvt assembly if "debug.pf.disasm" option is set +** It uses aarch64_disassemble to perform disassembly +** +** - Uses register 13 (SP in ARM), 15 (PC in ARM), 16, 17 for storing +** intermediate results. GGLAssembler does not use SP and PC as these +** registers are marked as reserved. The temporary registers are not +** saved/restored on stack as these are caller-saved registers in Aarch64 +** +** - Uses CSEL instruction to support conditional execution. The result is +** stored in a temporary register and then copied to the target register +** if the condition is true. +** +** - In the case of conditional data transfer instructions, conditional +** branch is used to skip over instruction, if the condition is false +** +** - Wherever possible, immediate values are transferred to temporary +** register prior to processing. This simplifies overall implementation +** as instructions requiring immediate values are converted to +** move immediate instructions followed by register-register instruction. +** +** -------------------------------------------- +** ArmToAarch64Assembler unit test bench +** -------------------------------------------- +** +** - Tests ArmToAarch64Assembler interface for all the possible +** ways in which GGLAssembler uses ArmAssemblerInterface interface. +** +** - Uses test jacket (written in assembly) to set the registers, +** condition flags prior to calling generated instruction. It also +** copies registers and flags at the end of execution. Caller then +** checks if generated code performed correct operation based on +** output registers and flags. +** +** - Broadly contains three type of tests, (i) data operation tests +** (ii) data transfer tests and (iii) LDM/STM tests. +** +** ---------------------- +** Aarch64 disassembler +** ---------------------- +** - This disassembler disassembles only those machine codes which can be +** generated by ArmToAarch64Assembler. It has a unit testbench which +** tests all the instructions supported by the disassembler. +** +** ------------------------------------------------------------------ +** ARMAssembler/ARMAssemblerInterface/ARMAssemblerProxy changes +** ------------------------------------------------------------------ +** +** - In existing code, addresses were being handled as 32 bit values at +** certain places. +** +** - Added a new set of functions for address load/store/manipulation. +** These are ADDR_LDR, ADDR_STR, ADDR_ADD, ADDR_SUB and they map to +** default 32 bit implementations in ARMAssemblerInterface. +** +** - ArmToAarch64Assembler maps these functions to appropriate 64 bit +** functions. +** +** ---------------------- +** GGLAssembler changes +** ---------------------- +** - Since ArmToAarch64Assembler can generate 4 Aarch64 instructions for +** each call in worst case, the memory required is set to 4 times +** ARM memory +** +** - Address load/store/manipulation were changed to use new functions +** added in the ARMAssemblerInterface. +** +*/ + + +#define NOT_IMPLEMENTED() LOG_FATAL("Arm instruction %s not yet implemented\n", __func__) + +#define AARCH64_ASM_DEBUG 0 + +#if AARCH64_ASM_DEBUG + #define LOG_INSTR(...) ALOGD("\t" __VA_ARGS__) + #define LOG_LABEL(...) ALOGD(__VA_ARGS__) +#else + #define LOG_INSTR(...) ((void)0) + #define LOG_LABEL(...) ((void)0) +#endif + +namespace android { + +static const char* shift_codes[] = +{ + "LSL", "LSR", "ASR", "ROR" +}; +static const char *cc_codes[] = +{ + "EQ", "NE", "CS", "CC", "MI", + "PL", "VS", "VC", "HI", "LS", + "GE", "LT", "GT", "LE", "AL", "NV" +}; + +ArmToAarch64Assembler::ArmToAarch64Assembler(const sp<Assembly>& assembly) + : ARMAssemblerInterface(), + mAssembly(assembly) +{ + mBase = mPC = (uint32_t *)assembly->base(); + mDuration = ggl_system_time(); + mZeroReg = 13; + mTmpReg1 = 15; + mTmpReg2 = 16; + mTmpReg3 = 17; +} + +ArmToAarch64Assembler::ArmToAarch64Assembler(void *base) + : ARMAssemblerInterface(), mAssembly(NULL) +{ + mBase = mPC = (uint32_t *)base; + mDuration = ggl_system_time(); + // Regs 13, 15, 16, 17 are used as temporary registers + mZeroReg = 13; + mTmpReg1 = 15; + mTmpReg2 = 16; + mTmpReg3 = 17; +} + +ArmToAarch64Assembler::~ArmToAarch64Assembler() +{ +} + +uint32_t* ArmToAarch64Assembler::pc() const +{ + return mPC; +} + +uint32_t* ArmToAarch64Assembler::base() const +{ + return mBase; +} + +void ArmToAarch64Assembler::reset() +{ + if(mAssembly == NULL) + mPC = mBase; + else + mBase = mPC = (uint32_t *)mAssembly->base(); + mBranchTargets.clear(); + mLabels.clear(); + mLabelsInverseMapping.clear(); + mComments.clear(); +#if AARCH64_ASM_DEBUG + ALOGI("RESET\n"); +#endif +} + +int ArmToAarch64Assembler::getCodegenArch() +{ + return CODEGEN_ARCH_AARCH64; +} + +// ---------------------------------------------------------------------------- + +void ArmToAarch64Assembler::disassemble(const char* name) +{ + if(name) + { + printf("%s:\n", name); + } + size_t count = pc()-base(); + uint32_t* i = base(); + while (count--) + { + ssize_t label = mLabelsInverseMapping.indexOfKey(i); + if (label >= 0) + { + printf("%s:\n", mLabelsInverseMapping.valueAt(label)); + } + ssize_t comment = mComments.indexOfKey(i); + if (comment >= 0) + { + printf("; %s\n", mComments.valueAt(comment)); + } + printf("%p: %08x ", i, uint32_t(i[0])); + { + char instr[256]; + ::aarch64_disassemble(*i, instr); + printf("%s\n", instr); + } + i++; + } +} + +void ArmToAarch64Assembler::comment(const char* string) +{ + mComments.add(mPC, string); + LOG_INSTR("//%s\n", string); +} + +void ArmToAarch64Assembler::label(const char* theLabel) +{ + mLabels.add(theLabel, mPC); + mLabelsInverseMapping.add(mPC, theLabel); + LOG_LABEL("%s:\n", theLabel); +} + +void ArmToAarch64Assembler::B(int cc, const char* label) +{ + mBranchTargets.add(branch_target_t(label, mPC)); + LOG_INSTR("B%s %s\n", cc_codes[cc], label ); + *mPC++ = (0x54 << 24) | cc; +} + +void ArmToAarch64Assembler::BL(int cc, const char* label) +{ + NOT_IMPLEMENTED(); //Not Required +} + +// ---------------------------------------------------------------------------- +//Prolog/Epilog & Generate... +// ---------------------------------------------------------------------------- + +void ArmToAarch64Assembler::prolog() +{ + // write prolog code + mPrologPC = mPC; + *mPC++ = A64_MOVZ_X(mZeroReg,0,0); +} + +void ArmToAarch64Assembler::epilog(uint32_t touched) +{ + // write epilog code + static const int XLR = 30; + *mPC++ = A64_RET(XLR); +} + +int ArmToAarch64Assembler::generate(const char* name) +{ + // fixup all the branches + size_t count = mBranchTargets.size(); + while (count--) + { + const branch_target_t& bt = mBranchTargets[count]; + uint32_t* target_pc = mLabels.valueFor(bt.label); + LOG_ALWAYS_FATAL_IF(!target_pc, + "error resolving branch targets, target_pc is null"); + int32_t offset = int32_t(target_pc - bt.pc); + *bt.pc |= (offset & 0x7FFFF) << 5; + } + + if(mAssembly != NULL) + mAssembly->resize( int(pc()-base())*4 ); + + // the instruction cache is flushed by CodeCache + const int64_t duration = ggl_system_time() - mDuration; + const char * const format = "generated %s (%d ins) at [%p:%p] in %ld ns\n"; + ALOGI(format, name, int(pc()-base()), base(), pc(), duration); + + + char value[PROPERTY_VALUE_MAX]; + property_get("debug.pf.disasm", value, "0"); + if (atoi(value) != 0) + { + printf(format, name, int(pc()-base()), base(), pc(), duration); + disassemble(name); + } + return NO_ERROR; +} + +uint32_t* ArmToAarch64Assembler::pcForLabel(const char* label) +{ + return mLabels.valueFor(label); +} + +// ---------------------------------------------------------------------------- +// Data Processing... +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::dataProcessingCommon(int opcode, + int s, int Rd, int Rn, uint32_t Op2) +{ + if(opcode != opSUB && s == 1) + { + NOT_IMPLEMENTED(); //Not required + return; + } + + if(opcode != opSUB && opcode != opADD && opcode != opAND && + opcode != opORR && opcode != opMVN) + { + NOT_IMPLEMENTED(); //Not required + return; + } + + if(Op2 == OPERAND_REG_IMM && mAddrMode.reg_imm_shift > 31) + { + NOT_IMPLEMENTED(); + return; + } + + //Store immediate in temporary register and convert + //immediate operation into register operation + if(Op2 == OPERAND_IMM) + { + int imm = mAddrMode.immediate; + *mPC++ = A64_MOVZ_W(mTmpReg2, imm & 0x0000FFFF, 0); + *mPC++ = A64_MOVK_W(mTmpReg2, (imm >> 16) & 0x0000FFFF, 16); + Op2 = mTmpReg2; + } + + + { + uint32_t shift; + uint32_t amount; + uint32_t Rm; + + if(Op2 == OPERAND_REG_IMM) + { + shift = mAddrMode.reg_imm_type; + amount = mAddrMode.reg_imm_shift; + Rm = mAddrMode.reg_imm_Rm; + } + else if(Op2 < OPERAND_REG) + { + shift = 0; + amount = 0; + Rm = Op2; + } + else + { + NOT_IMPLEMENTED(); //Not required + return; + } + + switch(opcode) + { + case opADD: *mPC++ = A64_ADD_W(Rd, Rn, Rm, shift, amount); break; + case opAND: *mPC++ = A64_AND_W(Rd, Rn, Rm, shift, amount); break; + case opORR: *mPC++ = A64_ORR_W(Rd, Rn, Rm, shift, amount); break; + case opMVN: *mPC++ = A64_ORN_W(Rd, Rn, Rm, shift, amount); break; + case opSUB: *mPC++ = A64_SUB_W(Rd, Rn, Rm, shift, amount, s);break; + }; + + } +} + +void ArmToAarch64Assembler::dataProcessing(int opcode, int cc, + int s, int Rd, int Rn, uint32_t Op2) +{ + uint32_t Wd; + + if(cc != AL) + Wd = mTmpReg1; + else + Wd = Rd; + + if(opcode == opADD || opcode == opAND || opcode == opORR ||opcode == opSUB) + { + dataProcessingCommon(opcode, s, Wd, Rn, Op2); + } + else if(opcode == opCMP) + { + dataProcessingCommon(opSUB, 1, mTmpReg3, Rn, Op2); + } + else if(opcode == opRSB) + { + dataProcessingCommon(opSUB, s, Wd, Rn, Op2); + dataProcessingCommon(opSUB, s, Wd, mZeroReg, Wd); + } + else if(opcode == opMOV) + { + dataProcessingCommon(opORR, 0, Wd, mZeroReg, Op2); + if(s == 1) + { + dataProcessingCommon(opSUB, 1, mTmpReg3, Wd, mZeroReg); + } + } + else if(opcode == opMVN) + { + dataProcessingCommon(opMVN, s, Wd, mZeroReg, Op2); + } + else if(opcode == opBIC) + { + dataProcessingCommon(opMVN, s, mTmpReg3, mZeroReg, Op2); + dataProcessingCommon(opAND, s, Wd, Rn, mTmpReg3); + } + else + { + NOT_IMPLEMENTED(); + return; + } + + if(cc != AL) + { + *mPC++ = A64_CSEL_W(Rd, mTmpReg1, Rd, cc); + } +} +// ---------------------------------------------------------------------------- +// Address Processing... +// ---------------------------------------------------------------------------- + +void ArmToAarch64Assembler::ADDR_ADD(int cc, + int s, int Rd, int Rn, uint32_t Op2) +{ + if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required + if(s != 0) { NOT_IMPLEMENTED(); return;} //Not required + + + if(Op2 == OPERAND_REG_IMM && mAddrMode.reg_imm_type == LSL) + { + int Rm = mAddrMode.reg_imm_Rm; + int amount = mAddrMode.reg_imm_shift; + *mPC++ = A64_ADD_X_Wm_SXTW(Rd, Rn, Rm, amount); + } + else if(Op2 < OPERAND_REG) + { + int Rm = Op2; + int amount = 0; + *mPC++ = A64_ADD_X_Wm_SXTW(Rd, Rn, Rm, amount); + } + else if(Op2 == OPERAND_IMM) + { + int imm = mAddrMode.immediate; + *mPC++ = A64_MOVZ_W(mTmpReg1, imm & 0x0000FFFF, 0); + *mPC++ = A64_MOVK_W(mTmpReg1, (imm >> 16) & 0x0000FFFF, 16); + + int Rm = mTmpReg1; + int amount = 0; + *mPC++ = A64_ADD_X_Wm_SXTW(Rd, Rn, Rm, amount); + } + else + { + NOT_IMPLEMENTED(); //Not required + } +} + +void ArmToAarch64Assembler::ADDR_SUB(int cc, + int s, int Rd, int Rn, uint32_t Op2) +{ + if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required + if(s != 0) { NOT_IMPLEMENTED(); return;} //Not required + + if(Op2 == OPERAND_REG_IMM && mAddrMode.reg_imm_type == LSR) + { + *mPC++ = A64_ADD_W(mTmpReg1, mZeroReg, mAddrMode.reg_imm_Rm, + LSR, mAddrMode.reg_imm_shift); + *mPC++ = A64_SUB_X_Wm_SXTW(Rd, Rn, mTmpReg1, 0); + } + else + { + NOT_IMPLEMENTED(); //Not required + } +} + +// ---------------------------------------------------------------------------- +// multiply... +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::MLA(int cc, int s,int Rd, int Rm, int Rs, int Rn) +{ + if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required + + *mPC++ = A64_MADD_W(Rd, Rm, Rs, Rn); + if(s == 1) + dataProcessingCommon(opSUB, 1, mTmpReg1, Rd, mZeroReg); +} +void ArmToAarch64Assembler::MUL(int cc, int s, int Rd, int Rm, int Rs) +{ + if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required + if(s != 0) { NOT_IMPLEMENTED(); return;} //Not required + *mPC++ = A64_MADD_W(Rd, Rm, Rs, mZeroReg); +} +void ArmToAarch64Assembler::UMULL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs) +{ + NOT_IMPLEMENTED(); //Not required +} +void ArmToAarch64Assembler::UMUAL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs) +{ + NOT_IMPLEMENTED(); //Not required +} +void ArmToAarch64Assembler::SMULL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs) +{ + NOT_IMPLEMENTED(); //Not required +} +void ArmToAarch64Assembler::SMUAL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs) +{ + NOT_IMPLEMENTED(); //Not required +} + +// ---------------------------------------------------------------------------- +// branches relative to PC... +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::B(int cc, uint32_t* pc){ + NOT_IMPLEMENTED(); //Not required +} + +void ArmToAarch64Assembler::BL(int cc, uint32_t* pc){ + NOT_IMPLEMENTED(); //Not required +} + +void ArmToAarch64Assembler::BX(int cc, int Rn){ + NOT_IMPLEMENTED(); //Not required +} + +// ---------------------------------------------------------------------------- +// data transfer... +// ---------------------------------------------------------------------------- +enum dataTransferOp +{ + opLDR,opLDRB,opLDRH,opSTR,opSTRB,opSTRH +}; + +void ArmToAarch64Assembler::dataTransfer(int op, int cc, + int Rd, int Rn, uint32_t op_type, uint32_t size) +{ + const int XSP = 31; + if(Rn == SP) + Rn = XSP; + + if(op_type == OPERAND_IMM) + { + int addrReg; + int imm = mAddrMode.immediate; + if(imm >= 0 && imm < (1<<12)) + *mPC++ = A64_ADD_IMM_X(mTmpReg1, mZeroReg, imm, 0); + else if(imm < 0 && -imm < (1<<12)) + *mPC++ = A64_SUB_IMM_X(mTmpReg1, mZeroReg, -imm, 0); + else + { + NOT_IMPLEMENTED(); + return; + } + + addrReg = Rn; + if(mAddrMode.preindex == true || mAddrMode.postindex == true) + { + *mPC++ = A64_ADD_X(mTmpReg2, addrReg, mTmpReg1); + if(mAddrMode.preindex == true) + addrReg = mTmpReg2; + } + + if(cc != AL) + *mPC++ = A64_B_COND(cc^1, 8); + + *mPC++ = A64_LDRSTR_Wm_SXTW_0(op, size, Rd, addrReg, mZeroReg); + + if(mAddrMode.writeback == true) + *mPC++ = A64_CSEL_X(Rn, mTmpReg2, Rn, cc); + } + else if(op_type == OPERAND_REG_OFFSET) + { + if(cc != AL) + *mPC++ = A64_B_COND(cc^1, 8); + *mPC++ = A64_LDRSTR_Wm_SXTW_0(op, size, Rd, Rn, mAddrMode.reg_offset); + + } + else if(op_type > OPERAND_UNSUPPORTED) + { + if(cc != AL) + *mPC++ = A64_B_COND(cc^1, 8); + *mPC++ = A64_LDRSTR_Wm_SXTW_0(op, size, Rd, Rn, mZeroReg); + } + else + { + NOT_IMPLEMENTED(); // Not required + } + return; + +} +void ArmToAarch64Assembler::ADDR_LDR(int cc, int Rd, int Rn, uint32_t op_type) +{ + return dataTransfer(opLDR, cc, Rd, Rn, op_type, 64); +} +void ArmToAarch64Assembler::ADDR_STR(int cc, int Rd, int Rn, uint32_t op_type) +{ + return dataTransfer(opSTR, cc, Rd, Rn, op_type, 64); +} +void ArmToAarch64Assembler::LDR(int cc, int Rd, int Rn, uint32_t op_type) +{ + return dataTransfer(opLDR, cc, Rd, Rn, op_type); +} +void ArmToAarch64Assembler::LDRB(int cc, int Rd, int Rn, uint32_t op_type) +{ + return dataTransfer(opLDRB, cc, Rd, Rn, op_type); +} +void ArmToAarch64Assembler::STR(int cc, int Rd, int Rn, uint32_t op_type) +{ + return dataTransfer(opSTR, cc, Rd, Rn, op_type); +} + +void ArmToAarch64Assembler::STRB(int cc, int Rd, int Rn, uint32_t op_type) +{ + return dataTransfer(opSTRB, cc, Rd, Rn, op_type); +} + +void ArmToAarch64Assembler::LDRH(int cc, int Rd, int Rn, uint32_t op_type) +{ + return dataTransfer(opLDRH, cc, Rd, Rn, op_type); +} +void ArmToAarch64Assembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset) +{ + NOT_IMPLEMENTED(); //Not required +} +void ArmToAarch64Assembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset) +{ + NOT_IMPLEMENTED(); //Not required +} + +void ArmToAarch64Assembler::STRH(int cc, int Rd, int Rn, uint32_t op_type) +{ + return dataTransfer(opSTRH, cc, Rd, Rn, op_type); +} + +// ---------------------------------------------------------------------------- +// block data transfer... +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::LDM(int cc, int dir, + int Rn, int W, uint32_t reg_list) +{ + const int XSP = 31; + if(cc != AL || dir != IA || W == 0 || Rn != SP) + { + NOT_IMPLEMENTED(); + return; + } + + for(int i = 0; i < 32; ++i) + { + if((reg_list & (1 << i))) + { + int reg = i; + int size = 16; + *mPC++ = A64_LDR_IMM_PostIndex(reg, XSP, size); + } + } +} + +void ArmToAarch64Assembler::STM(int cc, int dir, + int Rn, int W, uint32_t reg_list) +{ + const int XSP = 31; + if(cc != AL || dir != DB || W == 0 || Rn != SP) + { + NOT_IMPLEMENTED(); + return; + } + + for(int i = 31; i >= 0; --i) + { + if((reg_list & (1 << i))) + { + int size = -16; + int reg = i; + *mPC++ = A64_STR_IMM_PreIndex(reg, XSP, size); + } + } +} + +// ---------------------------------------------------------------------------- +// special... +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::SWP(int cc, int Rn, int Rd, int Rm) +{ + NOT_IMPLEMENTED(); //Not required +} +void ArmToAarch64Assembler::SWPB(int cc, int Rn, int Rd, int Rm) +{ + NOT_IMPLEMENTED(); //Not required +} +void ArmToAarch64Assembler::SWI(int cc, uint32_t comment) +{ + NOT_IMPLEMENTED(); //Not required +} + +// ---------------------------------------------------------------------------- +// DSP instructions... +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::PLD(int Rn, uint32_t offset) { + NOT_IMPLEMENTED(); //Not required +} + +void ArmToAarch64Assembler::CLZ(int cc, int Rd, int Rm) +{ + NOT_IMPLEMENTED(); //Not required +} + +void ArmToAarch64Assembler::QADD(int cc, int Rd, int Rm, int Rn) +{ + NOT_IMPLEMENTED(); //Not required +} + +void ArmToAarch64Assembler::QDADD(int cc, int Rd, int Rm, int Rn) +{ + NOT_IMPLEMENTED(); //Not required +} + +void ArmToAarch64Assembler::QSUB(int cc, int Rd, int Rm, int Rn) +{ + NOT_IMPLEMENTED(); //Not required +} + +void ArmToAarch64Assembler::QDSUB(int cc, int Rd, int Rm, int Rn) +{ + NOT_IMPLEMENTED(); //Not required +} + +// ---------------------------------------------------------------------------- +// 16 x 16 multiplication +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::SMUL(int cc, int xy, + int Rd, int Rm, int Rs) +{ + if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required + + if (xy & xyTB) + *mPC++ = A64_SBFM_W(mTmpReg1, Rm, 16, 31); + else + *mPC++ = A64_SBFM_W(mTmpReg1, Rm, 0, 15); + + if (xy & xyBT) + *mPC++ = A64_SBFM_W(mTmpReg2, Rs, 16, 31); + else + *mPC++ = A64_SBFM_W(mTmpReg2, Rs, 0, 15); + + *mPC++ = A64_MADD_W(Rd,mTmpReg1,mTmpReg2, mZeroReg); +} +// ---------------------------------------------------------------------------- +// 32 x 16 multiplication +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::SMULW(int cc, int y, int Rd, int Rm, int Rs) +{ + if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required + + if (y & yT) + *mPC++ = A64_SBFM_W(mTmpReg1, Rs, 16, 31); + else + *mPC++ = A64_SBFM_W(mTmpReg1, Rs, 0, 15); + + *mPC++ = A64_SBFM_W(mTmpReg2, Rm, 0, 31); + *mPC++ = A64_SMADDL(mTmpReg3,mTmpReg1,mTmpReg2, mZeroReg); + *mPC++ = A64_UBFM_X(Rd,mTmpReg3, 16, 47); +} +// ---------------------------------------------------------------------------- +// 16 x 16 multiplication and accumulate +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::SMLA(int cc, int xy, int Rd, int Rm, int Rs, int Rn) +{ + if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required + if(xy != xyBB) { NOT_IMPLEMENTED(); return;} //Not required + + *mPC++ = A64_SBFM_W(mTmpReg1, Rm, 0, 15); + *mPC++ = A64_SBFM_W(mTmpReg2, Rs, 0, 15); + *mPC++ = A64_MADD_W(Rd, mTmpReg1, mTmpReg2, Rn); +} + +void ArmToAarch64Assembler::SMLAL(int cc, int xy, + int RdHi, int RdLo, int Rs, int Rm) +{ + NOT_IMPLEMENTED(); //Not required + return; +} + +void ArmToAarch64Assembler::SMLAW(int cc, int y, + int Rd, int Rm, int Rs, int Rn) +{ + NOT_IMPLEMENTED(); //Not required + return; +} + +// ---------------------------------------------------------------------------- +// Byte/half word extract and extend +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::UXTB16(int cc, int Rd, int Rm, int rotate) +{ + if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required + + *mPC++ = A64_EXTR_W(mTmpReg1, Rm, Rm, rotate * 8); + + uint32_t imm = 0x00FF00FF; + *mPC++ = A64_MOVZ_W(mTmpReg2, imm & 0xFFFF, 0); + *mPC++ = A64_MOVK_W(mTmpReg2, (imm >> 16) & 0x0000FFFF, 16); + *mPC++ = A64_AND_W(Rd,mTmpReg1, mTmpReg2); +} + +// ---------------------------------------------------------------------------- +// Bit manipulation +// ---------------------------------------------------------------------------- +void ArmToAarch64Assembler::UBFX(int cc, int Rd, int Rn, int lsb, int width) +{ + if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required + *mPC++ = A64_UBFM_W(Rd, Rn, lsb, lsb + width - 1); +} +// ---------------------------------------------------------------------------- +// Shifters... +// ---------------------------------------------------------------------------- +int ArmToAarch64Assembler::buildImmediate( + uint32_t immediate, uint32_t& rot, uint32_t& imm) +{ + rot = 0; + imm = immediate; + return 0; // Always true +} + + +bool ArmToAarch64Assembler::isValidImmediate(uint32_t immediate) +{ + uint32_t rot, imm; + return buildImmediate(immediate, rot, imm) == 0; +} + +uint32_t ArmToAarch64Assembler::imm(uint32_t immediate) +{ + mAddrMode.immediate = immediate; + mAddrMode.writeback = false; + mAddrMode.preindex = false; + mAddrMode.postindex = false; + return OPERAND_IMM; + +} + +uint32_t ArmToAarch64Assembler::reg_imm(int Rm, int type, uint32_t shift) +{ + mAddrMode.reg_imm_Rm = Rm; + mAddrMode.reg_imm_type = type; + mAddrMode.reg_imm_shift = shift; + return OPERAND_REG_IMM; +} + +uint32_t ArmToAarch64Assembler::reg_rrx(int Rm) +{ + NOT_IMPLEMENTED(); + return OPERAND_UNSUPPORTED; +} + +uint32_t ArmToAarch64Assembler::reg_reg(int Rm, int type, int Rs) +{ + NOT_IMPLEMENTED(); //Not required + return OPERAND_UNSUPPORTED; +} +// ---------------------------------------------------------------------------- +// Addressing modes... +// ---------------------------------------------------------------------------- +uint32_t ArmToAarch64Assembler::immed12_pre(int32_t immed12, int W) +{ + mAddrMode.immediate = immed12; + mAddrMode.writeback = W; + mAddrMode.preindex = true; + mAddrMode.postindex = false; + return OPERAND_IMM; +} + +uint32_t ArmToAarch64Assembler::immed12_post(int32_t immed12) +{ + mAddrMode.immediate = immed12; + mAddrMode.writeback = true; + mAddrMode.preindex = false; + mAddrMode.postindex = true; + return OPERAND_IMM; +} + +uint32_t ArmToAarch64Assembler::reg_scale_pre(int Rm, int type, + uint32_t shift, int W) +{ + if(type != 0 || shift != 0 || W != 0) + { + NOT_IMPLEMENTED(); //Not required + return OPERAND_UNSUPPORTED; + } + else + { + mAddrMode.reg_offset = Rm; + return OPERAND_REG_OFFSET; + } +} + +uint32_t ArmToAarch64Assembler::reg_scale_post(int Rm, int type, uint32_t shift) +{ + NOT_IMPLEMENTED(); //Not required + return OPERAND_UNSUPPORTED; +} + +uint32_t ArmToAarch64Assembler::immed8_pre(int32_t immed8, int W) +{ + mAddrMode.immediate = immed8; + mAddrMode.writeback = W; + mAddrMode.preindex = true; + mAddrMode.postindex = false; + return OPERAND_IMM; +} + +uint32_t ArmToAarch64Assembler::immed8_post(int32_t immed8) +{ + mAddrMode.immediate = immed8; + mAddrMode.writeback = true; + mAddrMode.preindex = false; + mAddrMode.postindex = true; + return OPERAND_IMM; +} + +uint32_t ArmToAarch64Assembler::reg_pre(int Rm, int W) +{ + if(W != 0) + { + NOT_IMPLEMENTED(); //Not required + return OPERAND_UNSUPPORTED; + } + else + { + mAddrMode.reg_offset = Rm; + return OPERAND_REG_OFFSET; + } +} + +uint32_t ArmToAarch64Assembler::reg_post(int Rm) +{ + NOT_IMPLEMENTED(); //Not required + return OPERAND_UNSUPPORTED; +} + +// ---------------------------------------------------------------------------- +// A64 instructions +// ---------------------------------------------------------------------------- + +static const char * dataTransferOpName[] = +{ + "LDR","LDRB","LDRH","STR","STRB","STRH" +}; + +static const uint32_t dataTransferOpCode [] = +{ + ((0xB8u << 24) | (0x3 << 21) | (0x6 << 13) | (0x0 << 12) |(0x1 << 11)), + ((0x38u << 24) | (0x3 << 21) | (0x6 << 13) | (0x1 << 12) |(0x1 << 11)), + ((0x78u << 24) | (0x3 << 21) | (0x6 << 13) | (0x0 << 12) |(0x1 << 11)), + ((0xB8u << 24) | (0x1 << 21) | (0x6 << 13) | (0x0 << 12) |(0x1 << 11)), + ((0x38u << 24) | (0x1 << 21) | (0x6 << 13) | (0x1 << 12) |(0x1 << 11)), + ((0x78u << 24) | (0x1 << 21) | (0x6 << 13) | (0x0 << 12) |(0x1 << 11)) +}; +uint32_t ArmToAarch64Assembler::A64_LDRSTR_Wm_SXTW_0(uint32_t op, + uint32_t size, uint32_t Rt, + uint32_t Rn, uint32_t Rm) +{ + if(size == 32) + { + LOG_INSTR("%s W%d, [X%d, W%d, SXTW #0]\n", + dataTransferOpName[op], Rt, Rn, Rm); + return(dataTransferOpCode[op] | (Rm << 16) | (Rn << 5) | Rt); + } + else + { + LOG_INSTR("%s X%d, [X%d, W%d, SXTW #0]\n", + dataTransferOpName[op], Rt, Rn, Rm); + return(dataTransferOpCode[op] | (0x1<<30) | (Rm<<16) | (Rn<<5)|Rt); + } +} + +uint32_t ArmToAarch64Assembler::A64_STR_IMM_PreIndex(uint32_t Rt, + uint32_t Rn, int32_t simm) +{ + if(Rn == 31) + LOG_INSTR("STR W%d, [SP, #%d]!\n", Rt, simm); + else + LOG_INSTR("STR W%d, [X%d, #%d]!\n", Rt, Rn, simm); + + uint32_t imm9 = (unsigned)(simm) & 0x01FF; + return (0xB8 << 24) | (imm9 << 12) | (0x3 << 10) | (Rn << 5) | Rt; +} + +uint32_t ArmToAarch64Assembler::A64_LDR_IMM_PostIndex(uint32_t Rt, + uint32_t Rn, int32_t simm) +{ + if(Rn == 31) + LOG_INSTR("LDR W%d, [SP], #%d\n",Rt,simm); + else + LOG_INSTR("LDR W%d, [X%d], #%d\n",Rt, Rn, simm); + + uint32_t imm9 = (unsigned)(simm) & 0x01FF; + return (0xB8 << 24) | (0x1 << 22) | + (imm9 << 12) | (0x1 << 10) | (Rn << 5) | Rt; + +} +uint32_t ArmToAarch64Assembler::A64_ADD_X_Wm_SXTW(uint32_t Rd, + uint32_t Rn, + uint32_t Rm, + uint32_t amount) +{ + LOG_INSTR("ADD X%d, X%d, W%d, SXTW #%d\n", Rd, Rn, Rm, amount); + return ((0x8B << 24) | (0x1 << 21) |(Rm << 16) | + (0x6 << 13) | (amount << 10) | (Rn << 5) | Rd); + +} + +uint32_t ArmToAarch64Assembler::A64_SUB_X_Wm_SXTW(uint32_t Rd, + uint32_t Rn, + uint32_t Rm, + uint32_t amount) +{ + LOG_INSTR("SUB X%d, X%d, W%d, SXTW #%d\n", Rd, Rn, Rm, amount); + return ((0xCB << 24) | (0x1 << 21) |(Rm << 16) | + (0x6 << 13) | (amount << 10) | (Rn << 5) | Rd); + +} + +uint32_t ArmToAarch64Assembler::A64_B_COND(uint32_t cc, uint32_t offset) +{ + LOG_INSTR("B.%s #.+%d\n", cc_codes[cc], offset); + return (0x54 << 24) | ((offset/4) << 5) | (cc); + +} +uint32_t ArmToAarch64Assembler::A64_ADD_X(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t shift, + uint32_t amount) +{ + LOG_INSTR("ADD X%d, X%d, X%d, %s #%d\n", + Rd, Rn, Rm, shift_codes[shift], amount); + return ((0x8B << 24) | (shift << 22) | ( Rm << 16) | + (amount << 10) |(Rn << 5) | Rd); +} +uint32_t ArmToAarch64Assembler::A64_ADD_IMM_X(uint32_t Rd, uint32_t Rn, + uint32_t imm, uint32_t shift) +{ + LOG_INSTR("ADD X%d, X%d, #%d, LSL #%d\n", Rd, Rn, imm, shift); + return (0x91 << 24) | ((shift/12) << 22) | (imm << 10) | (Rn << 5) | Rd; +} + +uint32_t ArmToAarch64Assembler::A64_SUB_IMM_X(uint32_t Rd, uint32_t Rn, + uint32_t imm, uint32_t shift) +{ + LOG_INSTR("SUB X%d, X%d, #%d, LSL #%d\n", Rd, Rn, imm, shift); + return (0xD1 << 24) | ((shift/12) << 22) | (imm << 10) | (Rn << 5) | Rd; +} + +uint32_t ArmToAarch64Assembler::A64_ADD_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t shift, + uint32_t amount) +{ + LOG_INSTR("ADD W%d, W%d, W%d, %s #%d\n", + Rd, Rn, Rm, shift_codes[shift], amount); + return ((0x0B << 24) | (shift << 22) | ( Rm << 16) | + (amount << 10) |(Rn << 5) | Rd); +} + +uint32_t ArmToAarch64Assembler::A64_SUB_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t shift, + uint32_t amount, + uint32_t setflag) +{ + if(setflag == 0) + { + LOG_INSTR("SUB W%d, W%d, W%d, %s #%d\n", + Rd, Rn, Rm, shift_codes[shift], amount); + return ((0x4B << 24) | (shift << 22) | ( Rm << 16) | + (amount << 10) |(Rn << 5) | Rd); + } + else + { + LOG_INSTR("SUBS W%d, W%d, W%d, %s #%d\n", + Rd, Rn, Rm, shift_codes[shift], amount); + return ((0x6B << 24) | (shift << 22) | ( Rm << 16) | + (amount << 10) |(Rn << 5) | Rd); + } +} + +uint32_t ArmToAarch64Assembler::A64_AND_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t shift, + uint32_t amount) +{ + LOG_INSTR("AND W%d, W%d, W%d, %s #%d\n", + Rd, Rn, Rm, shift_codes[shift], amount); + return ((0x0A << 24) | (shift << 22) | ( Rm << 16) | + (amount << 10) |(Rn << 5) | Rd); +} + +uint32_t ArmToAarch64Assembler::A64_ORR_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t shift, + uint32_t amount) +{ + LOG_INSTR("ORR W%d, W%d, W%d, %s #%d\n", + Rd, Rn, Rm, shift_codes[shift], amount); + return ((0x2A << 24) | (shift << 22) | ( Rm << 16) | + (amount << 10) |(Rn << 5) | Rd); +} + +uint32_t ArmToAarch64Assembler::A64_ORN_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t shift, + uint32_t amount) +{ + LOG_INSTR("ORN W%d, W%d, W%d, %s #%d\n", + Rd, Rn, Rm, shift_codes[shift], amount); + return ((0x2A << 24) | (shift << 22) | (0x1 << 21) | ( Rm << 16) | + (amount << 10) |(Rn << 5) | Rd); +} + +uint32_t ArmToAarch64Assembler::A64_CSEL_X(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t cond) +{ + LOG_INSTR("CSEL X%d, X%d, X%d, %s\n", Rd, Rn, Rm, cc_codes[cond]); + return ((0x9A << 24)|(0x1 << 23)|(Rm << 16) |(cond << 12)| (Rn << 5) | Rd); +} + +uint32_t ArmToAarch64Assembler::A64_CSEL_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t cond) +{ + LOG_INSTR("CSEL W%d, W%d, W%d, %s\n", Rd, Rn, Rm, cc_codes[cond]); + return ((0x1A << 24)|(0x1 << 23)|(Rm << 16) |(cond << 12)| (Rn << 5) | Rd); +} + +uint32_t ArmToAarch64Assembler::A64_RET(uint32_t Rn) +{ + LOG_INSTR("RET X%d\n", Rn); + return ((0xD6 << 24) | (0x1 << 22) | (0x1F << 16) | (Rn << 5)); +} + +uint32_t ArmToAarch64Assembler::A64_MOVZ_X(uint32_t Rd, uint32_t imm, + uint32_t shift) +{ + LOG_INSTR("MOVZ X%d, #0x%x, LSL #%d\n", Rd, imm, shift); + return(0xD2 << 24) | (0x1 << 23) | ((shift/16) << 21) | (imm << 5) | Rd; +} + +uint32_t ArmToAarch64Assembler::A64_MOVK_W(uint32_t Rd, uint32_t imm, + uint32_t shift) +{ + LOG_INSTR("MOVK W%d, #0x%x, LSL #%d\n", Rd, imm, shift); + return (0x72 << 24) | (0x1 << 23) | ((shift/16) << 21) | (imm << 5) | Rd; +} + +uint32_t ArmToAarch64Assembler::A64_MOVZ_W(uint32_t Rd, uint32_t imm, + uint32_t shift) +{ + LOG_INSTR("MOVZ W%d, #0x%x, LSL #%d\n", Rd, imm, shift); + return(0x52 << 24) | (0x1 << 23) | ((shift/16) << 21) | (imm << 5) | Rd; +} + +uint32_t ArmToAarch64Assembler::A64_SMADDL(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t Ra) +{ + LOG_INSTR("SMADDL X%d, W%d, W%d, X%d\n",Rd, Rn, Rm, Ra); + return ((0x9B << 24) | (0x1 << 21) | (Rm << 16)|(Ra << 10)|(Rn << 5) | Rd); +} + +uint32_t ArmToAarch64Assembler::A64_MADD_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t Ra) +{ + LOG_INSTR("MADD W%d, W%d, W%d, W%d\n",Rd, Rn, Rm, Ra); + return ((0x1B << 24) | (Rm << 16) | (Ra << 10) |(Rn << 5) | Rd); +} + +uint32_t ArmToAarch64Assembler::A64_SBFM_W(uint32_t Rd, uint32_t Rn, + uint32_t immr, uint32_t imms) +{ + LOG_INSTR("SBFM W%d, W%d, #%d, #%d\n", Rd, Rn, immr, imms); + return ((0x13 << 24) | (immr << 16) | (imms << 10) | (Rn << 5) | Rd); + +} +uint32_t ArmToAarch64Assembler::A64_UBFM_W(uint32_t Rd, uint32_t Rn, + uint32_t immr, uint32_t imms) +{ + LOG_INSTR("UBFM W%d, W%d, #%d, #%d\n", Rd, Rn, immr, imms); + return ((0x53 << 24) | (immr << 16) | (imms << 10) | (Rn << 5) | Rd); + +} +uint32_t ArmToAarch64Assembler::A64_UBFM_X(uint32_t Rd, uint32_t Rn, + uint32_t immr, uint32_t imms) +{ + LOG_INSTR("UBFM X%d, X%d, #%d, #%d\n", Rd, Rn, immr, imms); + return ((0xD3 << 24) | (0x1 << 22) | + (immr << 16) | (imms << 10) | (Rn << 5) | Rd); + +} +uint32_t ArmToAarch64Assembler::A64_EXTR_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t lsb) +{ + LOG_INSTR("EXTR W%d, W%d, W%d, #%d\n", Rd, Rn, Rm, lsb); + return (0x13 << 24)|(0x1 << 23) | (Rm << 16) | (lsb << 10)|(Rn << 5) | Rd; +} + +}; // namespace android + diff --git a/libpixelflinger/codeflinger/Aarch64Assembler.h b/libpixelflinger/codeflinger/Aarch64Assembler.h new file mode 100644 index 0000000..79c912b --- /dev/null +++ b/libpixelflinger/codeflinger/Aarch64Assembler.h @@ -0,0 +1,290 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef ANDROID_ARMTOAARCH64ASSEMBLER_H +#define ANDROID_ARMTOAARCH64ASSEMBLER_H + +#include <stdint.h> +#include <sys/types.h> + +#include "tinyutils/Vector.h" +#include "tinyutils/KeyedVector.h" +#include "tinyutils/smartpointer.h" + +#include "tinyutils/smartpointer.h" +#include "codeflinger/ARMAssemblerInterface.h" +#include "codeflinger/CodeCache.h" + +namespace android { + +// ---------------------------------------------------------------------------- + +class ArmToAarch64Assembler : public ARMAssemblerInterface +{ +public: + ArmToAarch64Assembler(const sp<Assembly>& assembly); + ArmToAarch64Assembler(void *base); + virtual ~ArmToAarch64Assembler(); + + uint32_t* base() const; + uint32_t* pc() const; + + + void disassemble(const char* name); + + // ------------------------------------------------------------------------ + // ARMAssemblerInterface... + // ------------------------------------------------------------------------ + + virtual void reset(); + + virtual int generate(const char* name); + virtual int getCodegenArch(); + + virtual void prolog(); + virtual void epilog(uint32_t touched); + virtual void comment(const char* string); + + + // ----------------------------------------------------------------------- + // shifters and addressing modes + // ----------------------------------------------------------------------- + + // shifters... + virtual bool isValidImmediate(uint32_t immed); + virtual int buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm); + + virtual uint32_t imm(uint32_t immediate); + virtual uint32_t reg_imm(int Rm, int type, uint32_t shift); + virtual uint32_t reg_rrx(int Rm); + virtual uint32_t reg_reg(int Rm, int type, int Rs); + + // addressing modes... + virtual uint32_t immed12_pre(int32_t immed12, int W=0); + virtual uint32_t immed12_post(int32_t immed12); + virtual uint32_t reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0); + virtual uint32_t reg_scale_post(int Rm, int type=0, uint32_t shift=0); + virtual uint32_t immed8_pre(int32_t immed8, int W=0); + virtual uint32_t immed8_post(int32_t immed8); + virtual uint32_t reg_pre(int Rm, int W=0); + virtual uint32_t reg_post(int Rm); + + + virtual void dataProcessing(int opcode, int cc, int s, + int Rd, int Rn, + uint32_t Op2); + virtual void MLA(int cc, int s, + int Rd, int Rm, int Rs, int Rn); + virtual void MUL(int cc, int s, + int Rd, int Rm, int Rs); + virtual void UMULL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs); + virtual void UMUAL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs); + virtual void SMULL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs); + virtual void SMUAL(int cc, int s, + int RdLo, int RdHi, int Rm, int Rs); + + virtual void B(int cc, uint32_t* pc); + virtual void BL(int cc, uint32_t* pc); + virtual void BX(int cc, int Rn); + virtual void label(const char* theLabel); + virtual void B(int cc, const char* label); + virtual void BL(int cc, const char* label); + + virtual uint32_t* pcForLabel(const char* label); + + virtual void ADDR_LDR(int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void ADDR_ADD(int cc, int s, int Rd, + int Rn, uint32_t Op2); + virtual void ADDR_SUB(int cc, int s, int Rd, + int Rn, uint32_t Op2); + virtual void ADDR_STR (int cc, int Rd, + int Rn, uint32_t offset = 0); + + virtual void LDR (int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void LDRB(int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void STR (int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void STRB(int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void LDRH (int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void LDRSB(int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void LDRSH(int cc, int Rd, + int Rn, uint32_t offset = 0); + virtual void STRH (int cc, int Rd, + int Rn, uint32_t offset = 0); + + + virtual void LDM(int cc, int dir, + int Rn, int W, uint32_t reg_list); + virtual void STM(int cc, int dir, + int Rn, int W, uint32_t reg_list); + + virtual void SWP(int cc, int Rn, int Rd, int Rm); + virtual void SWPB(int cc, int Rn, int Rd, int Rm); + virtual void SWI(int cc, uint32_t comment); + + virtual void PLD(int Rn, uint32_t offset); + virtual void CLZ(int cc, int Rd, int Rm); + virtual void QADD(int cc, int Rd, int Rm, int Rn); + virtual void QDADD(int cc, int Rd, int Rm, int Rn); + virtual void QSUB(int cc, int Rd, int Rm, int Rn); + virtual void QDSUB(int cc, int Rd, int Rm, int Rn); + virtual void SMUL(int cc, int xy, + int Rd, int Rm, int Rs); + virtual void SMULW(int cc, int y, + int Rd, int Rm, int Rs); + virtual void SMLA(int cc, int xy, + int Rd, int Rm, int Rs, int Rn); + virtual void SMLAL(int cc, int xy, + int RdHi, int RdLo, int Rs, int Rm); + virtual void SMLAW(int cc, int y, + int Rd, int Rm, int Rs, int Rn); + virtual void UXTB16(int cc, int Rd, int Rm, int rotate); + virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width); + +private: + ArmToAarch64Assembler(const ArmToAarch64Assembler& rhs); + ArmToAarch64Assembler& operator = (const ArmToAarch64Assembler& rhs); + + // ----------------------------------------------------------------------- + // helper functions + // ----------------------------------------------------------------------- + + void dataTransfer(int operation, int cc, int Rd, int Rn, + uint32_t operand_type, uint32_t size = 32); + void dataProcessingCommon(int opcode, int s, + int Rd, int Rn, uint32_t Op2); + + // ----------------------------------------------------------------------- + // Aarch64 instructions + // ----------------------------------------------------------------------- + uint32_t A64_B_COND(uint32_t cc, uint32_t offset); + uint32_t A64_RET(uint32_t Rn); + + uint32_t A64_LDRSTR_Wm_SXTW_0(uint32_t operation, + uint32_t size, uint32_t Rt, + uint32_t Rn, uint32_t Rm); + + uint32_t A64_STR_IMM_PreIndex(uint32_t Rt, uint32_t Rn, int32_t simm); + uint32_t A64_LDR_IMM_PostIndex(uint32_t Rt,uint32_t Rn, int32_t simm); + + uint32_t A64_ADD_X_Wm_SXTW(uint32_t Rd, uint32_t Rn, uint32_t Rm, + uint32_t amount); + uint32_t A64_SUB_X_Wm_SXTW(uint32_t Rd, uint32_t Rn, uint32_t Rm, + uint32_t amount); + + uint32_t A64_ADD_IMM_X(uint32_t Rd, uint32_t Rn, + uint32_t imm, uint32_t shift = 0); + uint32_t A64_SUB_IMM_X(uint32_t Rd, uint32_t Rn, + uint32_t imm, uint32_t shift = 0); + + uint32_t A64_ADD_X(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t shift = 0, uint32_t amount = 0); + uint32_t A64_ADD_W(uint32_t Rd, uint32_t Rn, uint32_t Rm, + uint32_t shift = 0, uint32_t amount = 0); + uint32_t A64_SUB_W(uint32_t Rd, uint32_t Rn, uint32_t Rm, + uint32_t shift = 0, uint32_t amount = 0, + uint32_t setflag = 0); + uint32_t A64_AND_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t shift = 0, uint32_t amount = 0); + uint32_t A64_ORR_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t shift = 0, uint32_t amount = 0); + uint32_t A64_ORN_W(uint32_t Rd, uint32_t Rn, + uint32_t Rm, uint32_t shift = 0, uint32_t amount = 0); + + uint32_t A64_MOVZ_W(uint32_t Rd, uint32_t imm, uint32_t shift); + uint32_t A64_MOVZ_X(uint32_t Rd, uint32_t imm, uint32_t shift); + uint32_t A64_MOVK_W(uint32_t Rd, uint32_t imm, uint32_t shift); + + uint32_t A64_SMADDL(uint32_t Rd, uint32_t Rn, uint32_t Rm, uint32_t Ra); + uint32_t A64_MADD_W(uint32_t Rd, uint32_t Rn, uint32_t Rm, uint32_t Ra); + + uint32_t A64_SBFM_W(uint32_t Rd, uint32_t Rn, + uint32_t immr, uint32_t imms); + uint32_t A64_UBFM_W(uint32_t Rd, uint32_t Rn, + uint32_t immr, uint32_t imms); + uint32_t A64_UBFM_X(uint32_t Rd, uint32_t Rn, + uint32_t immr, uint32_t imms); + + uint32_t A64_EXTR_W(uint32_t Rd, uint32_t Rn, uint32_t Rm, uint32_t lsb); + uint32_t A64_CSEL_X(uint32_t Rd, uint32_t Rn, uint32_t Rm, uint32_t cond); + uint32_t A64_CSEL_W(uint32_t Rd, uint32_t Rn, uint32_t Rm, uint32_t cond); + + uint32_t* mBase; + uint32_t* mPC; + uint32_t* mPrologPC; + int64_t mDuration; + uint32_t mTmpReg1, mTmpReg2, mTmpReg3, mZeroReg; + + struct branch_target_t { + inline branch_target_t() : label(0), pc(0) { } + inline branch_target_t(const char* l, uint32_t* p) + : label(l), pc(p) { } + const char* label; + uint32_t* pc; + }; + + sp<Assembly> mAssembly; + Vector<branch_target_t> mBranchTargets; + KeyedVector< const char*, uint32_t* > mLabels; + KeyedVector< uint32_t*, const char* > mLabelsInverseMapping; + KeyedVector< uint32_t*, const char* > mComments; + + enum operand_type_t + { + OPERAND_REG = 0x20, + OPERAND_IMM, + OPERAND_REG_IMM, + OPERAND_REG_OFFSET, + OPERAND_UNSUPPORTED + }; + + struct addr_mode_t { + int32_t immediate; + bool writeback; + bool preindex; + bool postindex; + int32_t reg_imm_Rm; + int32_t reg_imm_type; + uint32_t reg_imm_shift; + int32_t reg_offset; + } mAddrMode; + +}; + +}; // namespace android + +#endif //ANDROID_AARCH64ASSEMBLER_H diff --git a/libpixelflinger/codeflinger/Aarch64Disassembler.cpp b/libpixelflinger/codeflinger/Aarch64Disassembler.cpp new file mode 100644 index 0000000..4bb97b4 --- /dev/null +++ b/libpixelflinger/codeflinger/Aarch64Disassembler.cpp @@ -0,0 +1,316 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdio.h> +#include <inttypes.h> +#include <string.h> + +struct disasm_table_entry_t +{ + uint32_t mask; + uint32_t value; + const char* instr_template; +}; + + +static disasm_table_entry_t disasm_table[] = +{ + {0xff000000, 0x91000000, "add <xd|sp>, <xn|sp>, #<imm1>, <shift1>"}, + {0xff000000, 0xd1000000, "sub <xd|sp>, <xn|sp>, #<imm1>, <shift1>"}, + {0xff200000, 0x8b000000, "add <xd>, <xn>, <xm>, <shift2> #<amt1>"}, + {0xff200000, 0x0b000000, "add <wd>, <wn>, <wm>, <shift2> #<amt1>"}, + {0xff200000, 0x4b000000, "sub <wd>, <wn>, <wm>, <shift2> #<amt1>"}, + {0xff200000, 0x6b000000, "subs <wd>, <wn>, <wm>, <shift2> #<amt1>"}, + {0xff200000, 0x0a000000, "and <wd>, <wn>, <wm>, <shift2> #<amt1>"}, + {0xff200000, 0x2a000000, "orr <wd>, <wn>, <wm>, <shift2> #<amt1>"}, + {0xff200000, 0x2a200000, "orn <wd>, <wn>, <wm>, <shift2> #<amt1>"}, + {0xff800000, 0x72800000, "movk <wd>, #<imm2>, lsl #<shift3>"}, + {0xff800000, 0x52800000, "movz <wd>, #<imm2>, lsl #<shift3>"}, + {0xff800000, 0xd2800000, "movz <xd>, #<imm2>, lsl #<shift3>"}, + {0xffe00c00, 0x1a800000, "csel <wd>, <wn>, <wm>, <cond1>"}, + {0xffe00c00, 0x9a800000, "csel <xd>, <xn>, <xm>, <cond1>"}, + {0xffe00c00, 0x5a800000, "csinv <wd>, <wn>, <wm>, <cond1>"}, + {0xffe08000, 0x1b000000, "madd <wd>, <wn>, <wm>, <wa>"}, + {0xffe08000, 0x9b200000, "smaddl <xd>, <wn>, <wm>, <xa>"}, + {0xffe04c00, 0xb8604800, "ldr <wt>, [<xn|sp>, <r1><m1>, <ext1> #<amt2>]"}, + {0xffe04c00, 0xb8204800, "str <wt>, [<xn|sp>, <r1><m1>, <ext1> #<amt2>]"}, + {0xffe04c00, 0xf8604800, "ldr <xt>, [<xn|sp>, <r1><m1>, <ext1> #<amt3>]"}, + {0xffe04c00, 0xf8204800, "str <xt>, [<xn|sp>, <r1><m1>, <ext1> #<amt3>]"}, + {0xffe04c00, 0x38604800, "ldrb <wt>, [<xn|sp>, <r1><m1>, <ext1> <amt5>]"}, + {0xffe04c00, 0x38204800, "strb <wt>, [<xn|sp>, <r1><m1>, <ext1> <amt5>]"}, + {0xffe04c00, 0x78604800, "ldrh <wt>, [<xn|sp>, <r1><m1>, <ext1> #<amt6>]"}, + {0xffe04c00, 0x78204800, "strh <wt>, [<xn|sp>, <r1><m1>, <ext1> #<amt6>]"}, + {0xffe00c00, 0xb8400400, "ldr <wt>, [<xn|sp>], #<simm1>"}, + {0xffe00c00, 0xb8000c00, "str <wt>, [<xn|sp>, #<simm1>]!"}, + {0xffc00000, 0x13000000, "sbfm <wd>, <wn>, #<immr1>, #<imms1>"}, + {0xffc00000, 0x53000000, "ubfm <wd>, <wn>, #<immr1>, #<imms1>"}, + {0xffc00000, 0xd3400000, "ubfm <xd>, <xn>, #<immr1>, #<imms1>"}, + {0xffe00000, 0x13800000, "extr <wd>, <wn>, <wm>, #<lsb1>"}, + {0xff000000, 0x54000000, "b.<cond2> <label1>"}, + {0xfffffc1f, 0xd65f0000, "ret <xn>"}, + {0xffe00000, 0x8b200000, "add <xd|sp>, <xn|sp>, <r2><m1>, <ext2> #<amt4>"}, + {0xffe00000, 0xcb200000, "sub <xd|sp>, <xn|sp>, <r2><m1>, <ext2> #<amt4>"} +}; + +static int32_t bits_signed(uint32_t instr, uint32_t msb, uint32_t lsb) +{ + int32_t value; + value = ((int32_t)instr) << (31 - msb); + value >>= (31 - msb); + value >>= lsb; + return value; +} +static uint32_t bits_unsigned(uint32_t instr, uint32_t msb, uint32_t lsb) +{ + uint32_t width = msb - lsb + 1; + uint32_t mask = (1 << width) - 1; + return ((instr >> lsb) & mask); +} + +static void get_token(const char *instr, uint32_t index, char *token) +{ + uint32_t i, j; + for(i = index, j = 0; i < strlen(instr); ++i) + { + if(instr[index] == '<' && instr[i] == '>') + { + token[j++] = instr[i]; + break; + } + else if(instr[index] != '<' && instr[i] == '<') + { + break; + } + else + { + token[j++] = instr[i]; + } + } + token[j] = '\0'; + return; +} + + +static const char * token_cc_table[] = +{ + "eq", "ne", "cs", "cc", "mi", + "pl", "vs", "vc", "hi", "ls", + "ge", "lt", "gt", "le", "al", "nv" +}; + +static void decode_rx_zr_token(uint32_t reg, const char *prefix, char *instr_part) +{ + if(reg == 31) + sprintf(instr_part, "%s%s", prefix, "zr"); + else + sprintf(instr_part, "%s%d", prefix, reg); +} + +static void decode_token(uint32_t code, char *token, char *instr_part) +{ + if(strcmp(token, "<imm1>") == 0) + sprintf(instr_part, "0x%x", bits_unsigned(code, 21,10)); + else if(strcmp(token, "<imm2>") == 0) + sprintf(instr_part, "0x%x", bits_unsigned(code, 20,5)); + else if(strcmp(token, "<shift1>") == 0) + sprintf(instr_part, "lsl #%d", bits_unsigned(code, 23,22) * 12); + else if(strcmp(token, "<shift2>") == 0) + { + static const char * shift2_table[] = { "lsl", "lsr", "asr", "ror"}; + sprintf(instr_part, "%s", shift2_table[bits_unsigned(code, 23,22)]); + } + else if(strcmp(token, "<shift3>") == 0) + sprintf(instr_part, "%d", bits_unsigned(code, 22,21) * 16); + else if(strcmp(token, "<amt1>") == 0) + sprintf(instr_part, "%d", bits_unsigned(code, 15,10)); + else if(strcmp(token, "<amt2>") == 0) + sprintf(instr_part, "%d", bits_unsigned(code, 12,12) * 2); + else if(strcmp(token, "<amt3>") == 0) + sprintf(instr_part, "%d", bits_unsigned(code, 12,12) * 3); + else if(strcmp(token, "<amt4>") == 0) + sprintf(instr_part, "%d", bits_unsigned(code, 12,10)); + else if(strcmp(token, "<amt5>") == 0) + { + static const char * amt5_table[] = {"", "#0"}; + sprintf(instr_part, "%s", amt5_table[bits_unsigned(code, 12,12)]); + } + else if(strcmp(token, "<amt6>") == 0) + sprintf(instr_part, "%d", bits_unsigned(code, 12,12)); + else if(strcmp(token, "<simm1>") == 0) + sprintf(instr_part, "%d", bits_signed(code, 20,12)); + else if(strcmp(token, "<immr1>") == 0) + sprintf(instr_part, "%d", bits_unsigned(code, 21,16)); + else if(strcmp(token, "<imms1>") == 0) + sprintf(instr_part, "%d", bits_unsigned(code, 15,10)); + else if(strcmp(token, "<lsb1>") == 0) + sprintf(instr_part, "%d", bits_unsigned(code, 15,10)); + else if(strcmp(token, "<cond1>") == 0) + sprintf(instr_part, "%s", token_cc_table[bits_unsigned(code, 15,12)]); + else if(strcmp(token, "<cond2>") == 0) + sprintf(instr_part, "%s", token_cc_table[bits_unsigned(code, 4,0)]); + else if(strcmp(token, "<r1>") == 0) + { + const char * token_r1_table[] = + { + "reserved", "reserved", "w", "x", + "reserved", "reserved", "w", "x" + }; + sprintf(instr_part, "%s", token_r1_table[bits_unsigned(code, 15,13)]); + } + else if(strcmp(token, "<r2>") == 0) + { + static const char * token_r2_table[] = + { + "w","w","w", "x", "w", "w", "w", "x" + }; + sprintf(instr_part, "%s", token_r2_table[bits_unsigned(code, 15,13)]); + } + else if(strcmp(token, "<m1>") == 0) + { + uint32_t reg = bits_unsigned(code, 20,16); + if(reg == 31) + sprintf(instr_part, "%s", "zr"); + else + sprintf(instr_part, "%d", reg); + } + else if(strcmp(token, "<ext1>") == 0) + { + static const char * token_ext1_table[] = + { + "reserved","reserved","uxtw", "lsl", + "reserved","reserved", "sxtw", "sxtx" + }; + sprintf(instr_part, "%s", token_ext1_table[bits_unsigned(code, 15,13)]); + } + else if(strcmp(token, "<ext2>") == 0) + { + static const char * token_ext2_table[] = + { + "uxtb","uxth","uxtw","uxtx", + "sxtb","sxth","sxtw","sxtx" + }; + sprintf(instr_part, "%s", token_ext2_table[bits_unsigned(code, 15,13)]); + } + else if (strcmp(token, "<label1>") == 0) + { + int32_t offset = bits_signed(code, 23,5) * 4; + if(offset > 0) + sprintf(instr_part, "#.+%d", offset); + else + sprintf(instr_part, "#.-%d", -offset); + } + else if (strcmp(token, "<xn|sp>") == 0) + { + uint32_t reg = bits_unsigned(code, 9, 5); + if(reg == 31) + sprintf(instr_part, "%s", "sp"); + else + sprintf(instr_part, "x%d", reg); + } + else if (strcmp(token, "<xd|sp>") == 0) + { + uint32_t reg = bits_unsigned(code, 4, 0); + if(reg == 31) + sprintf(instr_part, "%s", "sp"); + else + sprintf(instr_part, "x%d", reg); + } + else if (strcmp(token, "<xn>") == 0) + decode_rx_zr_token(bits_unsigned(code, 9, 5), "x", instr_part); + else if (strcmp(token, "<xd>") == 0) + decode_rx_zr_token(bits_unsigned(code, 4, 0), "x", instr_part); + else if (strcmp(token, "<xm>") == 0) + decode_rx_zr_token(bits_unsigned(code, 20, 16), "x", instr_part); + else if (strcmp(token, "<xa>") == 0) + decode_rx_zr_token(bits_unsigned(code, 14, 10), "x", instr_part); + else if (strcmp(token, "<xt>") == 0) + decode_rx_zr_token(bits_unsigned(code, 4, 0), "x", instr_part); + else if (strcmp(token, "<wn>") == 0) + decode_rx_zr_token(bits_unsigned(code, 9, 5), "w", instr_part); + else if (strcmp(token, "<wd>") == 0) + decode_rx_zr_token(bits_unsigned(code, 4, 0), "w", instr_part); + else if (strcmp(token, "<wm>") == 0) + decode_rx_zr_token(bits_unsigned(code, 20, 16), "w", instr_part); + else if (strcmp(token, "<wa>") == 0) + decode_rx_zr_token(bits_unsigned(code, 14, 10), "w", instr_part); + else if (strcmp(token, "<wt>") == 0) + decode_rx_zr_token(bits_unsigned(code, 4, 0), "w", instr_part); + else + { + sprintf(instr_part, "error"); + } + return; +} + +int aarch64_disassemble(uint32_t code, char* instr) +{ + uint32_t i; + char token[256]; + char instr_part[256]; + + if(instr == NULL) + return -1; + + bool matched = false; + disasm_table_entry_t *entry = NULL; + for(i = 0; i < sizeof(disasm_table)/sizeof(disasm_table_entry_t); ++i) + { + entry = &disasm_table[i]; + if((code & entry->mask) == entry->value) + { + matched = true; + break; + } + } + if(matched == false) + { + strcpy(instr, "Unknown Instruction"); + return -1; + } + else + { + uint32_t index = 0; + uint32_t length = strlen(entry->instr_template); + instr[0] = '\0'; + do + { + get_token(entry->instr_template, index, token); + if(token[0] == '<') + { + decode_token(code, token, instr_part); + strcat(instr, instr_part); + } + else + { + strcat(instr, token); + } + index += strlen(token); + }while(index < length); + return 0; + } +} diff --git a/libpixelflinger/codeflinger/Aarch64Disassembler.h b/libpixelflinger/codeflinger/Aarch64Disassembler.h new file mode 100644 index 0000000..177d692 --- /dev/null +++ b/libpixelflinger/codeflinger/Aarch64Disassembler.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef ANDROID_AARCH64DISASSEMBLER_H +#define ANDROID_AARCH64DISASSEMBLER_H + +#include <inttypes.h> +int aarch64_disassemble(uint32_t code, char* instr); + +#endif //ANDROID_AARCH64ASSEMBLER_H diff --git a/libpixelflinger/codeflinger/CodeCache.cpp b/libpixelflinger/codeflinger/CodeCache.cpp index 58fde7e..4fe30d9 100644 --- a/libpixelflinger/codeflinger/CodeCache.cpp +++ b/libpixelflinger/codeflinger/CodeCache.cpp @@ -34,7 +34,7 @@ namespace android { // ---------------------------------------------------------------------------- -#if defined(__arm__) +#if defined(__arm__) || defined(__aarch64__) #include <unistd.h> #include <errno.h> #endif @@ -201,7 +201,7 @@ int CodeCache::cache( const AssemblyKeyBase& keyBase, mCacheInUse += assemblySize; mWhen++; // synchronize caches... -#if defined(__arm__) || defined(__mips__) +#if defined(__arm__) || defined(__mips__) || defined(__aarch64__) const long base = long(assembly->base()); const long curr = base + long(assembly->size()); err = cacheflush(base, curr, 0); diff --git a/libpixelflinger/codeflinger/GGLAssembler.cpp b/libpixelflinger/codeflinger/GGLAssembler.cpp index 725495f..7f088db 100644 --- a/libpixelflinger/codeflinger/GGLAssembler.cpp +++ b/libpixelflinger/codeflinger/GGLAssembler.cpp @@ -901,6 +901,10 @@ void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits) AND( AL, 0, d, s, imm(mask) ); return; } + else if (getCodegenArch() == CODEGEN_ARCH_AARCH64) { + AND( AL, 0, d, s, imm(mask) ); + return; + } int negative_logic = !isValidImmediate(mask); if (negative_logic) { diff --git a/libpixelflinger/scanline.cpp b/libpixelflinger/scanline.cpp index 96a71f3..bc774f3 100644 --- a/libpixelflinger/scanline.cpp +++ b/libpixelflinger/scanline.cpp @@ -31,8 +31,11 @@ #include "codeflinger/CodeCache.h" #include "codeflinger/GGLAssembler.h" +#if defined(__arm__) #include "codeflinger/ARMAssembler.h" -#if defined(__mips__) +#elif defined(__aarch64__) +#include "codeflinger/Aarch64Assembler.h" +#elif defined(__mips__) #include "codeflinger/MIPSAssembler.h" #endif //#include "codeflinger/ARMAssemblerOptimizer.h" @@ -52,7 +55,7 @@ # define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED #endif -#if defined(__arm__) || defined(__mips__) +#if defined(__arm__) || defined(__mips__) || defined(__aarch64__) # define ANDROID_ARM_CODEGEN 1 #else # define ANDROID_ARM_CODEGEN 0 @@ -68,6 +71,8 @@ #ifdef __mips__ #define ASSEMBLY_SCRATCH_SIZE 4096 +#elif defined(__aarch64__) +#define ASSEMBLY_SCRATCH_SIZE 8192 #else #define ASSEMBLY_SCRATCH_SIZE 2048 #endif @@ -122,6 +127,9 @@ extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct); extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct); +#elif defined(__aarch64__) +extern "C" void scanline_t32cb16blend_aarch64(uint16_t*, uint32_t*, size_t); +extern "C" void scanline_col32cb16blend_aarch64(uint16_t *dst, uint32_t col, size_t ct); #elif defined(__mips__) extern "C" void scanline_t32cb16blend_mips(uint16_t*, uint32_t*, size_t); #endif @@ -276,6 +284,8 @@ static const needs_filter_t fill16noblend = { #if defined(__mips__) static CodeCache gCodeCache(32 * 1024); +#elif defined(__aarch64__) +static CodeCache gCodeCache(48 * 1024); #else static CodeCache gCodeCache(12 * 1024); #endif @@ -394,6 +404,8 @@ static void pick_scanline(context_t* c) #endif #if defined(__mips__) GGLAssembler assembler( new ArmToMipsAssembler(a) ); +#elif defined(__aarch64__) + GGLAssembler assembler( new ArmToAarch64Assembler(a) ); #endif // generate the scanline code for the given needs int err = assembler.scanline(c->state.needs, c); @@ -2085,6 +2097,8 @@ void scanline_col32cb16blend(context_t* c) #else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct); #endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN +#elif ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__aarch64__)) + scanline_col32cb16blend_aarch64(dst, GGL_RGBA_TO_HOST(c->packed8888), ct); #else uint32_t s = GGL_RGBA_TO_HOST(c->packed8888); int sA = (s>>24); @@ -2157,7 +2171,7 @@ last_one: void scanline_t32cb16blend(context_t* c) { -#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__mips))) +#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__mips__) || defined(__aarch64__))) int32_t x = c->iterators.xl; size_t ct = c->iterators.xr - x; int32_t y = c->iterators.y; @@ -2171,7 +2185,9 @@ void scanline_t32cb16blend(context_t* c) #ifdef __arm__ scanline_t32cb16blend_arm(dst, src, ct); -#else +#elif defined(__aarch64__) + scanline_t32cb16blend_aarch64(dst, src, ct); +#elif defined(__mips__) scanline_t32cb16blend_mips(dst, src, ct); #endif #else diff --git a/libpixelflinger/tests/arch-aarch64/Android.mk b/libpixelflinger/tests/arch-aarch64/Android.mk new file mode 100644 index 0000000..f096491 --- /dev/null +++ b/libpixelflinger/tests/arch-aarch64/Android.mk @@ -0,0 +1,3 @@ +ifeq ($(TARGET_ARCH),aarch64) +include $(all-subdir-makefiles) +endif diff --git a/libpixelflinger/tests/arch-aarch64/assembler/Android.mk b/libpixelflinger/tests/arch-aarch64/assembler/Android.mk new file mode 100644 index 0000000..10e06c4 --- /dev/null +++ b/libpixelflinger/tests/arch-aarch64/assembler/Android.mk @@ -0,0 +1,19 @@ +LOCAL_PATH:= $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_SRC_FILES:= \ + aarch64_assembler_test.cpp\ + asm_test_jacket.S + +LOCAL_SHARED_LIBRARIES := \ + libcutils \ + libpixelflinger + +LOCAL_C_INCLUDES := \ + system/core/libpixelflinger + +LOCAL_MODULE:= test-pixelflinger-aarch64-assembler-test + +LOCAL_MODULE_TAGS := tests + +include $(BUILD_EXECUTABLE) diff --git a/libpixelflinger/tests/arch-aarch64/assembler/aarch64_assembler_test.cpp b/libpixelflinger/tests/arch-aarch64/assembler/aarch64_assembler_test.cpp new file mode 100644 index 0000000..d3e57b3 --- /dev/null +++ b/libpixelflinger/tests/arch-aarch64/assembler/aarch64_assembler_test.cpp @@ -0,0 +1,782 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> + +#include <sys/mman.h> +#include <cutils/ashmem.h> +#include <cutils/atomic.h> + +#define __STDC_FORMAT_MACROS +#include <inttypes.h> + +#include "codeflinger/ARMAssemblerInterface.h" +#include "codeflinger/Aarch64Assembler.h" +using namespace android; + +#define TESTS_DATAOP_ENABLE 1 +#define TESTS_DATATRANSFER_ENABLE 1 +#define TESTS_LDMSTM_ENABLE 1 +#define TESTS_REG_CORRUPTION_ENABLE 0 + +void *instrMem; +uint32_t instrMemSize = 128 * 1024; +char dataMem[8192]; + +typedef void (*asm_function_t)(); +extern "C" void asm_test_jacket(asm_function_t function, + int64_t regs[], int32_t flags[]); + +#define MAX_32BIT (uint32_t)(((uint64_t)1 << 32) - 1) +const uint32_t NA = 0; +const uint32_t NUM_REGS = 32; +const uint32_t NUM_FLAGS = 16; + +enum instr_t +{ + INSTR_ADD, + INSTR_SUB, + INSTR_AND, + INSTR_ORR, + INSTR_RSB, + INSTR_BIC, + INSTR_CMP, + INSTR_MOV, + INSTR_MVN, + INSTR_MUL, + INSTR_MLA, + INSTR_SMULBB, + INSTR_SMULBT, + INSTR_SMULTB, + INSTR_SMULTT, + INSTR_SMULWB, + INSTR_SMULWT, + INSTR_SMLABB, + INSTR_UXTB16, + INSTR_UBFX, + INSTR_ADDR_ADD, + INSTR_ADDR_SUB, + INSTR_LDR, + INSTR_LDRB, + INSTR_LDRH, + INSTR_ADDR_LDR, + INSTR_LDM, + INSTR_STR, + INSTR_STRB, + INSTR_STRH, + INSTR_ADDR_STR, + INSTR_STM +}; + +enum shift_t +{ + SHIFT_LSL, + SHIFT_LSR, + SHIFT_ASR, + SHIFT_ROR, + SHIFT_NONE +}; + +enum offset_t +{ + REG_SCALE_OFFSET, + REG_OFFSET, + IMM8_OFFSET, + IMM12_OFFSET, + NO_OFFSET +}; + +enum cond_t +{ + EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV, + HS = CS, + LO = CC +}; + +const char * cc_code[] = +{ + "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC", + "HI", "LS","GE","LT", "GT", "LE", "AL", "NV" +}; + + +struct dataOpTest_t +{ + uint32_t id; + instr_t op; + uint32_t preFlag; + cond_t cond; + bool setFlags; + uint64_t RnValue; + uint64_t RsValue; + bool immediate; + uint32_t immValue; + uint64_t RmValue; + uint32_t shiftMode; + uint32_t shiftAmount; + uint64_t RdValue; + bool checkRd; + uint64_t postRdValue; + bool checkFlag; + uint32_t postFlag; +}; + +struct dataTransferTest_t +{ + uint32_t id; + instr_t op; + uint32_t preFlag; + cond_t cond; + bool setMem; + uint64_t memOffset; + uint64_t memValue; + uint64_t RnValue; + offset_t offsetType; + uint64_t RmValue; + uint32_t immValue; + bool writeBack; + bool preIndex; + bool postIndex; + uint64_t RdValue; + uint64_t postRdValue; + uint64_t postRnValue; + bool checkMem; + uint64_t postMemOffset; + uint32_t postMemLength; + uint64_t postMemValue; +}; + + +dataOpTest_t dataOpTests [] = +{ + {0xA000,INSTR_ADD,AL,AL,0,1,NA,1,MAX_32BIT ,NA,NA,NA,NA,1,0,0,0}, + {0xA001,INSTR_ADD,AL,AL,0,1,NA,1,MAX_32BIT -1,NA,NA,NA,NA,1,MAX_32BIT,0,0}, + {0xA002,INSTR_ADD,AL,AL,0,1,NA,0,NA,MAX_32BIT ,NA,NA,NA,1,0,0,0}, + {0xA003,INSTR_ADD,AL,AL,0,1,NA,0,NA,MAX_32BIT -1,NA,NA,NA,1,MAX_32BIT,0,0}, + {0xA004,INSTR_ADD,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,0,NA,1,0,0,0}, + {0xA005,INSTR_ADD,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,NA,1,0x80000001,0,0}, + {0xA006,INSTR_ADD,AL,AL,0,1,NA,0,0,3,SHIFT_LSR,1,NA,1,2,0,0}, + {0xA007,INSTR_ADD,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,1,2,0,0}, + {0xA008,INSTR_ADD,AL,AL,0,0,NA,0,0,3,SHIFT_ASR,1,NA,1,1,0,0}, + {0xA009,INSTR_ADD,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,1,0,0,0}, + {0xA010,INSTR_AND,AL,AL,0,1,NA,1,MAX_32BIT ,0,0,0,NA,1,1,0,0}, + {0xA011,INSTR_AND,AL,AL,0,1,NA,1,MAX_32BIT -1,0,0,0,NA,1,0,0,0}, + {0xA012,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT ,0,0,NA,1,1,0,0}, + {0xA013,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT -1,0,0,NA,1,0,0,0}, + {0xA014,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,0,NA,1,1,0,0}, + {0xA015,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,NA,1,0,0,0}, + {0xA016,INSTR_AND,AL,AL,0,1,NA,0,0,3,SHIFT_LSR,1,NA,1,1,0,0}, + {0xA017,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,1,1,0,0}, + {0xA018,INSTR_AND,AL,AL,0,0,NA,0,0,3,SHIFT_ASR,1,NA,1,0,0,0}, + {0xA019,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,1,1,0,0}, + {0xA020,INSTR_ORR,AL,AL,0,3,NA,1,MAX_32BIT ,0,0,0,NA,1,MAX_32BIT,0,0}, + {0xA021,INSTR_ORR,AL,AL,0,2,NA,1,MAX_32BIT -1,0,0,0,NA,1,MAX_32BIT-1,0,0}, + {0xA022,INSTR_ORR,AL,AL,0,3,NA,0,0,MAX_32BIT ,0,0,NA,1,MAX_32BIT,0,0}, + {0xA023,INSTR_ORR,AL,AL,0,2,NA,0,0,MAX_32BIT -1,0,0,NA,1,MAX_32BIT-1,0,0}, + {0xA024,INSTR_ORR,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,0,NA,1,MAX_32BIT,0,0}, + {0xA025,INSTR_ORR,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,NA,1,0x80000001,0,0}, + {0xA026,INSTR_ORR,AL,AL,0,1,NA,0,0,3,SHIFT_LSR,1,NA,1,1,0,0}, + {0xA027,INSTR_ORR,AL,AL,0,0,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,1,1,0,0}, + {0xA028,INSTR_ORR,AL,AL,0,0,NA,0,0,3,SHIFT_ASR,1,NA,1,1,0,0}, + {0xA029,INSTR_ORR,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,1,MAX_32BIT ,0,0}, + {0xA030,INSTR_CMP,AL,AL,1,0x10000,NA,1,0x10000,0,0,0,NA,0,0,1,HS}, + {0xA031,INSTR_CMP,AL,AL,1,0x00000,NA,1,0x10000,0,0,0,NA,0,0,1,CC}, + {0xA032,INSTR_CMP,AL,AL,1,0x00000,NA,0,0,0x10000,0,0,NA,0,0,1,LT}, + {0xA033,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x10000,0,0,NA,0,0,1,EQ}, + {0xA034,INSTR_CMP,AL,AL,1,0x00000,NA,0,0,0x10000,0,0,NA,0,0,1,LS}, + {0xA035,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x10000,0,0,NA,0,0,1,LS}, + {0xA036,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x00000,0,0,NA,0,0,1,HI}, + {0xA037,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x10000,0,0,NA,0,0,1,HS}, + {0xA038,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x00000,0,0,NA,0,0,1,HS}, + {0xA039,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x00000,0,0,NA,0,0,1,NE}, + {0xA040,INSTR_CMP,AL,AL,1,0,NA,0,0,MAX_32BIT ,SHIFT_LSR,1,NA,0,0,1,LT}, + {0xA041,INSTR_CMP,AL,AL,1,1,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,0,0,1,EQ}, + {0xA042,INSTR_CMP,AL,AL,1,0,NA,0,0,0x10000,SHIFT_LSR,31,NA,0,0,1,LS}, + {0xA043,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x30000,SHIFT_LSR,1,NA,0,0,1,LS}, + {0xA044,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x00000,SHIFT_LSR,31,NA,0,0,1,HI}, + {0xA045,INSTR_CMP,AL,AL,1,1,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,0,0,1,HS}, + {0xA046,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x2000,SHIFT_LSR,1,NA,0,0,1,HS}, + {0xA047,INSTR_CMP,AL,AL,1,0,NA,0,0,MAX_32BIT ,SHIFT_LSR,1,NA,0,0,1,NE}, + {0xA048,INSTR_CMP,AL,AL,1,0,NA,0,0,0x10000,SHIFT_ASR,2,NA,0,0,1,LT}, + {0xA049,INSTR_CMP,AL,AL,1,MAX_32BIT ,NA,0,0,MAX_32BIT ,SHIFT_ASR,1,NA,0,0,1,EQ}, + {0xA050,INSTR_CMP,AL,AL,1,MAX_32BIT ,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,0,0,1,LS}, + {0xA051,INSTR_CMP,AL,AL,1,0,NA,0,0,0x10000,SHIFT_ASR,1,NA,0,0,1,LS}, + {0xA052,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x10000,SHIFT_ASR,1,NA,0,0,1,HI}, + {0xA053,INSTR_CMP,AL,AL,1,1,NA,0,0,0x10000,SHIFT_ASR,31,NA,0,0,1,HS}, + {0xA054,INSTR_CMP,AL,AL,1,1,NA,0,0,0x10000,SHIFT_ASR,16,NA,0,0,1,HS}, + {0xA055,INSTR_CMP,AL,AL,1,1,NA,0,0,MAX_32BIT ,SHIFT_ASR,1,NA,0,0,1,NE}, + {0xA056,INSTR_MUL,AL,AL,0,0,0x10000,0,0,0x10000,0,0,NA,1,0,0,0}, + {0xA057,INSTR_MUL,AL,AL,0,0,0x1000,0,0,0x10000,0,0,NA,1,0x10000000,0,0}, + {0xA058,INSTR_MUL,AL,AL,0,0,MAX_32BIT ,0,0,1,0,0,NA,1,MAX_32BIT ,0,0}, + {0xA059,INSTR_MLA,AL,AL,0,0x10000,0x10000,0,0,0x10000,0,0,NA,1,0x10000,0,0}, + {0xA060,INSTR_MLA,AL,AL,0,0x10000,0x1000,0,0,0x10000,0,0,NA,1,0x10010000,0,0}, + {0xA061,INSTR_MLA,AL,AL,1,1,MAX_32BIT ,0,0,1,0,0,NA,1,0,1,PL}, + {0xA062,INSTR_MLA,AL,AL,1,0,MAX_32BIT ,0,0,1,0,0,NA,1,MAX_32BIT ,1,MI}, + {0xA063,INSTR_SUB,AL,AL,1,1 << 16,NA,1,1 << 16,NA,NA,NA,NA,1,0,1,PL}, + {0xA064,INSTR_SUB,AL,AL,1,(1 << 16) + 1,NA,1,1 << 16,NA,NA,NA,NA,1,1,1,PL}, + {0xA065,INSTR_SUB,AL,AL,1,0,NA,1,1 << 16,NA,NA,NA,NA,1,(uint32_t)(0 - (1<<16)),1,MI}, + {0xA066,INSTR_SUB,MI,MI,0,2,NA,0,NA,1,NA,NA,2,1,1,0,NA}, + {0xA067,INSTR_SUB,EQ,MI,0,2,NA,0,NA,1,NA,NA,2,1,2,0,NA}, + {0xA068,INSTR_SUB,GT,GE,0,2,NA,1,1,NA,NA,NA,2,1,1,0,NA}, + {0xA069,INSTR_SUB,LT,GE,0,2,NA,1,1,NA,NA,NA,2,1,2,0,NA}, + {0xA070,INSTR_SUB,CS,HS,0,2,NA,1,1,NA,NA,NA,2,1,1,0,NA}, + {0xA071,INSTR_SUB,CC,HS,0,2,NA,1,1,NA,NA,NA,2,1,2,0,NA}, + {0xA072,INSTR_SUB,AL,AL,0,1,NA,1,1 << 16,0,0,0,NA,1,(uint32_t)(1 - (1 << 16)),0,NA}, + {0xA073,INSTR_SUB,AL,AL,0,MAX_32BIT,NA,1,1,0,0,0,NA,1,MAX_32BIT - 1,0,NA}, + {0xA074,INSTR_SUB,AL,AL,0,1,NA,1,1,0,0,0,NA,1,0,0,NA}, + {0xA075,INSTR_SUB,AL,AL,0,1,NA,0,NA,1 << 16,0,0,NA,1,(uint32_t)(1 - (1 << 16)),0,NA}, + {0xA076,INSTR_SUB,AL,AL,0,MAX_32BIT,NA,0,NA,1,0,0,NA,1,MAX_32BIT - 1,0,NA}, + {0xA077,INSTR_SUB,AL,AL,0,1,NA,0,NA,1,0,0,NA,1,0,0,NA}, + {0xA078,INSTR_SUB,AL,AL,0,1,NA,0,NA,1,SHIFT_LSL,16,NA,1,(uint32_t)(1 - (1 << 16)),0,NA}, + {0xA079,INSTR_SUB,AL,AL,0,0x80000001,NA,0,NA,MAX_32BIT ,SHIFT_LSL,31,NA,1,1,0,NA}, + {0xA080,INSTR_SUB,AL,AL,0,1,NA,0,NA,3,SHIFT_LSR,1,NA,1,0,0,NA}, + {0xA081,INSTR_SUB,AL,AL,0,1,NA,0,NA,MAX_32BIT ,SHIFT_LSR,31,NA,1,0,0,NA}, + {0xA082,INSTR_RSB,GT,GE,0,2,NA,1,0,NA,NA,NA,2,1,(uint32_t)-2,0,NA}, + {0xA083,INSTR_RSB,LT,GE,0,2,NA,1,0,NA,NA,NA,2,1,2,0,NA}, + {0xA084,INSTR_RSB,AL,AL,0,1,NA,1,1 << 16,NA,NA,NA,NA,1,(1 << 16) - 1,0,NA}, + {0xA085,INSTR_RSB,AL,AL,0,MAX_32BIT,NA,1,1,NA,NA,NA,NA,1,(uint32_t) (1 - MAX_32BIT),0,NA}, + {0xA086,INSTR_RSB,AL,AL,0,1,NA,1,1,NA,NA,NA,NA,1,0,0,NA}, + {0xA087,INSTR_RSB,AL,AL,0,1,NA,0,NA,1 << 16,0,0,NA,1,(1 << 16) - 1,0,NA}, + {0xA088,INSTR_RSB,AL,AL,0,MAX_32BIT,NA,0,NA,1,0,0,NA,1,(uint32_t) (1 - MAX_32BIT),0,NA}, + {0xA089,INSTR_RSB,AL,AL,0,1,NA,0,NA,1,0,0,NA,1,0,0,NA}, + {0xA090,INSTR_RSB,AL,AL,0,1,NA,0,NA,1,SHIFT_LSL,16,NA,1,(1 << 16) - 1,0,NA}, + {0xA091,INSTR_RSB,AL,AL,0,0x80000001,NA,0,NA,MAX_32BIT ,SHIFT_LSL,31,NA,1,(uint32_t)-1,0,NA}, + {0xA092,INSTR_RSB,AL,AL,0,1,NA,0,NA,3,SHIFT_LSR,1,NA,1,0,0,NA}, + {0xA093,INSTR_RSB,AL,AL,0,1,NA,0,NA,MAX_32BIT ,SHIFT_LSR,31,NA,1,0,0,NA}, + {0xA094,INSTR_MOV,AL,AL,0,NA,NA,1,0x80000001,NA,NA,NA,NA,1,0x80000001,0,0}, + {0xA095,INSTR_MOV,AL,AL,0,NA,NA,0,0,0x80000001,0,0,NA,1,0x80000001,0,0}, + {0xA096,INSTR_MOV,AL,AL,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,NA,1,MAX_32BIT -1,0,0}, + {0xA097,INSTR_MOV,AL,AL,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,NA,1,0x80000000,0,0}, + {0xA098,INSTR_MOV,AL,AL,0,NA,NA,0,0,3,SHIFT_LSR,1,NA,1,1,0,0}, + {0xA099,INSTR_MOV,AL,AL,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,1,1,0,0}, + {0xA100,INSTR_MOV,AL,AL,0,NA,NA,0,0,3,SHIFT_ASR,1,NA,1,1,0,0}, + {0xA101,INSTR_MOV,AL,AL,0,NA,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,1,MAX_32BIT ,0,0}, + {0xA102,INSTR_MOV,AL,AL,0,NA,NA,0,0,3,SHIFT_ROR,1,NA,1,0x80000001,0,0}, + {0xA103,INSTR_MOV,AL,AL,0,NA,NA,0,0,0x80000001,SHIFT_ROR,31,NA,1,3,0,0}, + {0xA104,INSTR_MOV,AL,AL,1,NA,NA,0,0,MAX_32BIT -1,SHIFT_ASR,1,NA,1,MAX_32BIT,1,MI}, + {0xA105,INSTR_MOV,AL,AL,1,NA,NA,0,0,3,SHIFT_ASR,1,NA,1,1,1,PL}, + {0xA106,INSTR_MOV,PL,MI,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,2,0,0}, + {0xA107,INSTR_MOV,MI,MI,0,NA,NA,0,0,0x80000001,0,0,2,1,0x80000001,0,0}, + {0xA108,INSTR_MOV,EQ,LT,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,2,0,0}, + {0xA109,INSTR_MOV,LT,LT,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,0x80000001,0,0}, + {0xA110,INSTR_MOV,GT,GE,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,2,1,MAX_32BIT -1,0,0}, + {0xA111,INSTR_MOV,EQ,GE,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,2,1,0x80000000,0,0}, + {0xA112,INSTR_MOV,LT,GE,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,2,1,2,0,0}, + {0xA113,INSTR_MOV,GT,LE,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,2,1,2,0,0}, + {0xA114,INSTR_MOV,EQ,LE,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,0x80000001,0,0}, + {0xA115,INSTR_MOV,LT,LE,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,2,1,0x80000000,0,0}, + {0xA116,INSTR_MOV,EQ,GT,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,2,0,0}, + {0xA117,INSTR_MOV,GT,GT,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,0x80000001,0,0}, + {0xA118,INSTR_MOV,LE,GT,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,2,0,0}, + {0xA119,INSTR_MOV,EQ,GT,0,NA,NA,0,0,0x80000001,0,0,2,1,2,0,0}, + {0xA120,INSTR_MOV,GT,GT,0,NA,NA,0,0,0x80000001,0,0,2,1,0x80000001,0,0}, + {0xA121,INSTR_MOV,LE,GT,0,NA,NA,0,0,0x80000001,0,0,2,1,2,0,0}, + {0xA122,INSTR_MOV,EQ,GT,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,2,1,2,0,0}, + {0xA123,INSTR_MOV,GT,GT,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,2,1,MAX_32BIT -1,0,0}, + {0xA124,INSTR_MOV,LE,GT,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,2,1,2,0,0}, + {0xA125,INSTR_MOV,LO,HS,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,2,0,0}, + {0xA126,INSTR_MOV,HS,HS,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,0x80000001,0,0}, + {0xA127,INSTR_MVN,LO,HS,0,NA,NA,1,MAX_32BIT -1,NA,NA,NA,2,1,2,0,0}, + {0xA128,INSTR_MVN,HS,HS,0,NA,NA,1,MAX_32BIT -1,NA,NA,NA,2,1,1,0,0}, + {0xA129,INSTR_MVN,AL,AL,0,NA,NA,1,0,NA,NA,NA,2,1,MAX_32BIT,0,NA}, + {0xA130,INSTR_MVN,AL,AL,0,NA,NA,0,NA,MAX_32BIT -1,NA,0,2,1,1,0,NA}, + {0xA131,INSTR_MVN,AL,AL,0,NA,NA,0,NA,0x80000001,NA,0,2,1,0x7FFFFFFE,0,NA}, + {0xA132,INSTR_BIC,AL,AL,0,1,NA,1,MAX_32BIT ,NA,NA,NA,NA,1,0,0,0}, + {0xA133,INSTR_BIC,AL,AL,0,1,NA,1,MAX_32BIT -1,NA,NA,NA,NA,1,1,0,0}, + {0xA134,INSTR_BIC,AL,AL,0,1,NA,0,0,MAX_32BIT ,0,0,NA,1,0,0,0}, + {0xA135,INSTR_BIC,AL,AL,0,1,NA,0,0,MAX_32BIT -1,0,0,NA,1,1,0,0}, + {0xA136,INSTR_BIC,AL,AL,0,0xF0,NA,0,0,3,SHIFT_ASR,1,NA,1,0xF0,0,0}, + {0xA137,INSTR_BIC,AL,AL,0,0xF0,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,1,0,0,0}, + {0xA138,INSTR_SMULBB,AL,AL,0,NA,0xABCDFFFF,0,NA,0xABCD0001,NA,NA,NA,1,0xFFFFFFFF,0,0}, + {0xA139,INSTR_SMULBB,AL,AL,0,NA,0xABCD0001,0,NA,0xABCD0FFF,NA,NA,NA,1,0x00000FFF,0,0}, + {0xA140,INSTR_SMULBB,AL,AL,0,NA,0xABCD0001,0,NA,0xABCDFFFF,NA,NA,NA,1,0xFFFFFFFF,0,0}, + {0xA141,INSTR_SMULBB,AL,AL,0,NA,0xABCDFFFF,0,NA,0xABCDFFFF,NA,NA,NA,1,1,0,0}, + {0xA142,INSTR_SMULBT,AL,AL,0,NA,0xFFFFABCD,0,NA,0xABCD0001,NA,NA,NA,1,0xFFFFFFFF,0,0}, + {0xA143,INSTR_SMULBT,AL,AL,0,NA,0x0001ABCD,0,NA,0xABCD0FFF,NA,NA,NA,1,0x00000FFF,0,0}, + {0xA144,INSTR_SMULBT,AL,AL,0,NA,0x0001ABCD,0,NA,0xABCDFFFF,NA,NA,NA,1,0xFFFFFFFF,0,0}, + {0xA145,INSTR_SMULBT,AL,AL,0,NA,0xFFFFABCD,0,NA,0xABCDFFFF,NA,NA,NA,1,1,0,0}, + {0xA146,INSTR_SMULTB,AL,AL,0,NA,0xABCDFFFF,0,NA,0x0001ABCD,NA,NA,NA,1,0xFFFFFFFF,0,0}, + {0xA147,INSTR_SMULTB,AL,AL,0,NA,0xABCD0001,0,NA,0x0FFFABCD,NA,NA,NA,1,0x00000FFF,0,0}, + {0xA148,INSTR_SMULTB,AL,AL,0,NA,0xABCD0001,0,NA,0xFFFFABCD,NA,NA,NA,1,0xFFFFFFFF,0,0}, + {0xA149,INSTR_SMULTB,AL,AL,0,NA,0xABCDFFFF,0,NA,0xFFFFABCD,NA,NA,NA,1,1,0,0}, + {0xA150,INSTR_SMULTT,AL,AL,0,NA,0xFFFFABCD,0,NA,0x0001ABCD,NA,NA,NA,1,0xFFFFFFFF,0,0}, + {0xA151,INSTR_SMULTT,AL,AL,0,NA,0x0001ABCD,0,NA,0x0FFFABCD,NA,NA,NA,1,0x00000FFF,0,0}, + {0xA152,INSTR_SMULTT,AL,AL,0,NA,0x0001ABCD,0,NA,0xFFFFABCD,NA,NA,NA,1,0xFFFFFFFF,0,0}, + {0xA153,INSTR_SMULTT,AL,AL,0,NA,0xFFFFABCD,0,NA,0xFFFFABCD,NA,NA,NA,1,1,0,0}, + {0xA154,INSTR_SMULWB,AL,AL,0,NA,0xABCDFFFF,0,NA,0x0001ABCD,NA,NA,NA,1,0xFFFFFFFE,0,0}, + {0xA155,INSTR_SMULWB,AL,AL,0,NA,0xABCD0001,0,NA,0x0FFFABCD,NA,NA,NA,1,0x00000FFF,0,0}, + {0xA156,INSTR_SMULWB,AL,AL,0,NA,0xABCD0001,0,NA,0xFFFFABCD,NA,NA,NA,1,0xFFFFFFFF,0,0}, + {0xA157,INSTR_SMULWB,AL,AL,0,NA,0xABCDFFFF,0,NA,0xFFFFABCD,NA,NA,NA,1,0,0,0}, + {0xA158,INSTR_SMULWT,AL,AL,0,NA,0xFFFFABCD,0,NA,0x0001ABCD,NA,NA,NA,1,0xFFFFFFFE,0,0}, + {0xA159,INSTR_SMULWT,AL,AL,0,NA,0x0001ABCD,0,NA,0x0FFFABCD,NA,NA,NA,1,0x00000FFF,0,0}, + {0xA160,INSTR_SMULWT,AL,AL,0,NA,0x0001ABCD,0,NA,0xFFFFABCD,NA,NA,NA,1,0xFFFFFFFF,0,0}, + {0xA161,INSTR_SMULWT,AL,AL,0,NA,0xFFFFABCD,0,NA,0xFFFFABCD,NA,NA,NA,1,0,0,0}, + {0xA162,INSTR_SMLABB,AL,AL,0,1,0xABCDFFFF,0,NA,0xABCD0001,NA,NA,NA,1,0,0,0}, + {0xA163,INSTR_SMLABB,AL,AL,0,1,0xABCD0001,0,NA,0xABCD0FFF,NA,NA,NA,1,0x00001000,0,0}, + {0xA164,INSTR_SMLABB,AL,AL,0,0xFFFFFFFF,0xABCD0001,0,NA,0xABCDFFFF,NA,NA,NA,1,0xFFFFFFFE,0,0}, + {0xA165,INSTR_SMLABB,AL,AL,0,0xFFFFFFFF,0xABCDFFFF,0,NA,0xABCDFFFF,NA,NA,NA,1,0,0,0}, + {0xA166,INSTR_UXTB16,AL,AL,0,NA,NA,0,NA,0xABCDEF01,SHIFT_ROR,0,NA,1,0x00CD0001,0,0}, + {0xA167,INSTR_UXTB16,AL,AL,0,NA,NA,0,NA,0xABCDEF01,SHIFT_ROR,1,NA,1,0x00AB00EF,0,0}, + {0xA168,INSTR_UXTB16,AL,AL,0,NA,NA,0,NA,0xABCDEF01,SHIFT_ROR,2,NA,1,0x000100CD,0,0}, + {0xA169,INSTR_UXTB16,AL,AL,0,NA,NA,0,NA,0xABCDEF01,SHIFT_ROR,3,NA,1,0x00EF00AB,0,0}, + {0xA170,INSTR_UBFX,AL,AL,0,0xABCDEF01,4,0,NA,24,NA,NA,NA,1,0x00BCDEF0,0,0}, + {0xA171,INSTR_UBFX,AL,AL,0,0xABCDEF01,1,0,NA,2,NA,NA,NA,1,0,0,0}, + {0xA172,INSTR_UBFX,AL,AL,0,0xABCDEF01,16,0,NA,8,NA,NA,NA,1,0xCD,0,0}, + {0xA173,INSTR_UBFX,AL,AL,0,0xABCDEF01,31,0,NA,1,NA,NA,NA,1,1,0,0}, + {0xA174,INSTR_ADDR_ADD,AL,AL,0,0xCFFFFFFFF,NA,0,NA,0x1,SHIFT_LSL,1,NA,1,0xD00000001,0,0}, + {0xA175,INSTR_ADDR_ADD,AL,AL,0,0x01,NA,0,NA,0x1,SHIFT_LSL,2,NA,1,0x5,0,0}, + {0xA176,INSTR_ADDR_ADD,AL,AL,0,0xCFFFFFFFF,NA,0,NA,0x1,NA,0,NA,1,0xD00000000,0,0}, + {0xA177,INSTR_ADDR_SUB,AL,AL,0,0xD00000001,NA,0,NA,0x010000,SHIFT_LSR,15,NA,1,0xCFFFFFFFF,0,0}, + {0xA178,INSTR_ADDR_SUB,AL,AL,0,0xCFFFFFFFF,NA,0,NA,0x020000,SHIFT_LSR,15,NA,1,0xCFFFFFFFB,0,0}, + {0xA179,INSTR_ADDR_SUB,AL,AL,0,3,NA,0,NA,0x010000,SHIFT_LSR,15,NA,1,1,0,0}, +}; + +dataTransferTest_t dataTransferTests [] = +{ + {0xB000,INSTR_LDR,AL,AL,1,24,0xABCDEF0123456789,0,REG_SCALE_OFFSET,24,NA,NA,NA,NA,NA,0x23456789,0,0,NA,NA,NA}, + {0xB001,INSTR_LDR,AL,AL,1,4064,0xABCDEF0123456789,0,IMM12_OFFSET,NA,4068,0,1,0,NA,0xABCDEF01,0,0,NA,NA,NA}, + {0xB002,INSTR_LDR,AL,AL,1,0,0xABCDEF0123456789,0,IMM12_OFFSET,NA,4,1,0,1,NA,0x23456789,4,0,NA,NA,NA}, + {0xB003,INSTR_LDR,AL,AL,1,0,0xABCDEF0123456789,0,NO_OFFSET,NA,NA,0,0,0,NA,0x23456789,0,0,NA,NA,NA}, + {0xB004,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,0,REG_SCALE_OFFSET,4064,NA,NA,NA,NA,NA,0x89,0,0,NA,NA,NA}, + {0xB005,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,0,IMM12_OFFSET,NA,4065,0,1,0,NA,0x67,0,0,NA,NA,NA}, + {0xB006,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,4065,IMM12_OFFSET,NA,0,0,1,0,NA,0x67,4065,0,NA,NA,NA}, + {0xB007,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,4065,IMM12_OFFSET,NA,1,0,1,0,NA,0x45,4065,0,NA,NA,NA}, + {0xB008,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,4065,IMM12_OFFSET,NA,2,0,1,0,NA,0x23,4065,0,NA,NA,NA}, + {0xB009,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,4065,IMM12_OFFSET,NA,1,1,0,1,NA,0x67,4066,0,NA,NA,NA}, + {0xB010,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,0,NO_OFFSET,NA,NA,0,0,0,NA,0x89,0,0,NA,NA,NA}, + {0xB011,INSTR_LDRH,AL,AL,1,0,0xABCDEF0123456789,0,IMM8_OFFSET,NA,2,1,0,1,NA,0x6789,2,0,NA,NA,NA}, + {0xB012,INSTR_LDRH,AL,AL,1,4064,0xABCDEF0123456789,0,REG_OFFSET,4064,0,0,1,0,NA,0x6789,0,0,NA,NA,NA}, + {0xB013,INSTR_LDRH,AL,AL,1,4064,0xABCDEF0123456789,0,REG_OFFSET,4066,0,0,1,0,NA,0x2345,0,0,NA,NA,NA}, + {0xB014,INSTR_LDRH,AL,AL,1,0,0xABCDEF0123456789,0,NO_OFFSET,NA,0,0,0,0,NA,0x6789,0,0,NA,NA,NA}, + {0xB015,INSTR_LDRH,AL,AL,1,0,0xABCDEF0123456789,2,NO_OFFSET,NA,0,0,0,0,NA,0x2345,2,0,NA,NA,NA}, + {0xB016,INSTR_ADDR_LDR,AL,AL,1,4064,0xABCDEF0123456789,0,IMM12_OFFSET,NA,4064,0,1,0,NA,0xABCDEF0123456789,0,0,NA,NA,NA}, + {0xB017,INSTR_STR,AL,AL,1,2,0xDEADBEEFDEADBEEF,4,IMM12_OFFSET,NA,4,1,0,1,0xABCDEF0123456789,0xABCDEF0123456789,8,1,2,8,0xDEAD23456789BEEF}, + {0xB018,INSTR_STR,AL,AL,1,2,0xDEADBEEFDEADBEEF,4,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4,1,2,8,0xDEAD23456789BEEF}, + {0xB019,INSTR_STR,AL,AL,1,4066,0xDEADBEEFDEADBEEF,4,IMM12_OFFSET,NA,4064,0,1,0,0xABCDEF0123456789,0xABCDEF0123456789,4,1,4066,8,0xDEAD23456789BEEF}, + {0xB020,INSTR_STRB,AL,AL,1,0,0xDEADBEEFDEADBEEF,1,IMM12_OFFSET,NA,0,0,1,0,0xABCDEF0123456789,0xABCDEF0123456789,1,1,0,8,0xDEADBEEFDEAD89EF}, + {0xB021,INSTR_STRB,AL,AL,1,0,0xDEADBEEFDEADBEEF,1,IMM12_OFFSET,NA,1,0,1,0,0xABCDEF0123456789,0xABCDEF0123456789,1,1,0,8,0xDEADBEEFDE89BEEF}, + {0xB022,INSTR_STRB,AL,AL,1,0,0xDEADBEEFDEADBEEF,1,IMM12_OFFSET,NA,2,0,1,0,0xABCDEF0123456789,0xABCDEF0123456789,1,1,0,8,0xDEADBEEF89ADBEEF}, + {0xB023,INSTR_STRB,AL,AL,1,0,0xDEADBEEFDEADBEEF,1,IMM12_OFFSET,NA,4,1,0,1,0xABCDEF0123456789,0xABCDEF0123456789,5,1,0,8,0xDEADBEEFDEAD89EF}, + {0xB024,INSTR_STRB,AL,AL,1,0,0xDEADBEEFDEADBEEF,1,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,1,1,0,8,0xDEADBEEFDEAD89EF}, + {0xB025,INSTR_STRH,AL,AL,1,4066,0xDEADBEEFDEADBEEF,4070,IMM12_OFFSET,NA,2,1,0,1,0xABCDEF0123456789,0xABCDEF0123456789,4072,1,4066,8,0xDEAD6789DEADBEEF}, + {0xB026,INSTR_STRH,AL,AL,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF}, + {0xB027,INSTR_STRH,EQ,NE,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF}, + {0xB028,INSTR_STRH,NE,NE,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF}, + {0xB029,INSTR_STRH,NE,EQ,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF}, + {0xB030,INSTR_STRH,EQ,EQ,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF}, + {0xB031,INSTR_STRH,HI,LS,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF}, + {0xB032,INSTR_STRH,LS,LS,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF}, + {0xB033,INSTR_STRH,LS,HI,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF}, + {0xB034,INSTR_STRH,HI,HI,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF}, + {0xB035,INSTR_STRH,CC,HS,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF}, + {0xB036,INSTR_STRH,CS,HS,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF}, + {0xB037,INSTR_STRH,GE,LT,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF}, + {0xB038,INSTR_STRH,LT,LT,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF}, + {0xB039,INSTR_ADDR_STR,AL,AL,1,4064,0xDEADBEEFDEADBEEF,4,IMM12_OFFSET,NA,4060,0,1,0,0xABCDEF0123456789,0xABCDEF0123456789,4,1,4064,8,0xABCDEF0123456789}, +}; + + +int flushcache() +{ + const long base = long(instrMem); + const long curr = base + long(instrMemSize); + return cacheflush(base, curr, 0); +} +void dataOpTest(dataOpTest_t test, ARMAssemblerInterface *a64asm, uint32_t Rd = 0, + uint32_t Rn = 1, uint32_t Rm = 2, uint32_t Rs = 3) +{ + int64_t regs[NUM_REGS] = {0}; + int32_t flags[NUM_FLAGS] = {0}; + int64_t savedRegs[NUM_REGS] = {0}; + uint32_t i; + uint32_t op2; + + for(i = 0; i < NUM_REGS; ++i) + { + regs[i] = i; + } + + regs[Rd] = test.RdValue; + regs[Rn] = test.RnValue; + regs[Rs] = test.RsValue; + flags[test.preFlag] = 1; + a64asm->reset(); + a64asm->prolog(); + if(test.immediate == true) + { + op2 = a64asm->imm(test.immValue); + } + else if(test.immediate == false && test.shiftAmount == 0) + { + op2 = Rm; + regs[Rm] = test.RmValue; + } + else + { + op2 = a64asm->reg_imm(Rm, test.shiftMode, test.shiftAmount); + regs[Rm] = test.RmValue; + } + switch(test.op) + { + case INSTR_ADD: a64asm->ADD(test.cond, test.setFlags, Rd,Rn,op2); break; + case INSTR_SUB: a64asm->SUB(test.cond, test.setFlags, Rd,Rn,op2); break; + case INSTR_RSB: a64asm->RSB(test.cond, test.setFlags, Rd,Rn,op2); break; + case INSTR_AND: a64asm->AND(test.cond, test.setFlags, Rd,Rn,op2); break; + case INSTR_ORR: a64asm->ORR(test.cond, test.setFlags, Rd,Rn,op2); break; + case INSTR_BIC: a64asm->BIC(test.cond, test.setFlags, Rd,Rn,op2); break; + case INSTR_MUL: a64asm->MUL(test.cond, test.setFlags, Rd,Rm,Rs); break; + case INSTR_MLA: a64asm->MLA(test.cond, test.setFlags, Rd,Rm,Rs,Rn); break; + case INSTR_CMP: a64asm->CMP(test.cond, Rn,op2); break; + case INSTR_MOV: a64asm->MOV(test.cond, test.setFlags,Rd,op2); break; + case INSTR_MVN: a64asm->MVN(test.cond, test.setFlags,Rd,op2); break; + case INSTR_SMULBB:a64asm->SMULBB(test.cond, Rd,Rm,Rs); break; + case INSTR_SMULBT:a64asm->SMULBT(test.cond, Rd,Rm,Rs); break; + case INSTR_SMULTB:a64asm->SMULTB(test.cond, Rd,Rm,Rs); break; + case INSTR_SMULTT:a64asm->SMULTT(test.cond, Rd,Rm,Rs); break; + case INSTR_SMULWB:a64asm->SMULWB(test.cond, Rd,Rm,Rs); break; + case INSTR_SMULWT:a64asm->SMULWT(test.cond, Rd,Rm,Rs); break; + case INSTR_SMLABB:a64asm->SMLABB(test.cond, Rd,Rm,Rs,Rn); break; + case INSTR_UXTB16:a64asm->UXTB16(test.cond, Rd,Rm,test.shiftAmount); break; + case INSTR_UBFX: + { + int32_t lsb = test.RsValue; + int32_t width = test.RmValue; + a64asm->UBFX(test.cond, Rd,Rn,lsb, width); + break; + } + case INSTR_ADDR_ADD: a64asm->ADDR_ADD(test.cond, test.setFlags, Rd,Rn,op2); break; + case INSTR_ADDR_SUB: a64asm->ADDR_SUB(test.cond, test.setFlags, Rd,Rn,op2); break; + default: printf("Error"); return; + } + a64asm->epilog(0); + flushcache(); + + asm_function_t asm_function = (asm_function_t)(instrMem); + + for(i = 0; i < NUM_REGS; ++i) + savedRegs[i] = regs[i]; + + asm_test_jacket(asm_function, regs, flags); + + /* Check if all regs except Rd is same */ + for(i = 0; i < NUM_REGS; ++i) + { + if(i == Rd) continue; + if(regs[i] != savedRegs[i]) + { + printf("Test %x failed Reg(%d) tampered Expected(0x%"PRIx64")," + "Actual(0x%"PRIx64") t\n", test.id, i, savedRegs[i], regs[i]); + return; + } + } + + if(test.checkRd == 1 && (uint64_t)regs[Rd] != test.postRdValue) + { + printf("Test %x failed, Expected(%"PRIx64"), Actual(%"PRIx64")\n", + test.id, test.postRdValue, regs[Rd]); + } + else if(test.checkFlag == 1 && flags[test.postFlag] == 0) + { + printf("Test %x failed Flag(%s) NOT set\n", + test.id,cc_code[test.postFlag]); + } + else + { + printf("Test %x passed\n", test.id); + } +} + + +void dataTransferTest(dataTransferTest_t test, ARMAssemblerInterface *a64asm, + uint32_t Rd = 0, uint32_t Rn = 1,uint32_t Rm = 2) +{ + int64_t regs[NUM_REGS] = {0}; + int64_t savedRegs[NUM_REGS] = {0}; + int32_t flags[NUM_FLAGS] = {0}; + uint32_t i; + for(i = 0; i < NUM_REGS; ++i) + { + regs[i] = i; + } + + uint32_t op2; + + regs[Rd] = test.RdValue; + regs[Rn] = (uint64_t)(&dataMem[test.RnValue]); + regs[Rm] = test.RmValue; + flags[test.preFlag] = 1; + + if(test.setMem == true) + { + unsigned char *mem = (unsigned char *)&dataMem[test.memOffset]; + uint64_t value = test.memValue; + for(int j = 0; j < 8; ++j) + { + mem[j] = value & 0x00FF; + value >>= 8; + } + } + a64asm->reset(); + a64asm->prolog(); + if(test.offsetType == REG_SCALE_OFFSET) + { + op2 = a64asm->reg_scale_pre(Rm); + } + else if(test.offsetType == REG_OFFSET) + { + op2 = a64asm->reg_pre(Rm); + } + else if(test.offsetType == IMM12_OFFSET && test.preIndex == true) + { + op2 = a64asm->immed12_pre(test.immValue, test.writeBack); + } + else if(test.offsetType == IMM12_OFFSET && test.postIndex == true) + { + op2 = a64asm->immed12_post(test.immValue); + } + else if(test.offsetType == IMM8_OFFSET && test.preIndex == true) + { + op2 = a64asm->immed8_pre(test.immValue, test.writeBack); + } + else if(test.offsetType == IMM8_OFFSET && test.postIndex == true) + { + op2 = a64asm->immed8_post(test.immValue); + } + else if(test.offsetType == NO_OFFSET) + { + op2 = a64asm->__immed12_pre(0); + } + else + { + printf("Error - Unknown offset\n"); return; + } + + switch(test.op) + { + case INSTR_LDR: a64asm->LDR(test.cond, Rd,Rn,op2); break; + case INSTR_LDRB: a64asm->LDRB(test.cond, Rd,Rn,op2); break; + case INSTR_LDRH: a64asm->LDRH(test.cond, Rd,Rn,op2); break; + case INSTR_ADDR_LDR: a64asm->ADDR_LDR(test.cond, Rd,Rn,op2); break; + case INSTR_STR: a64asm->STR(test.cond, Rd,Rn,op2); break; + case INSTR_STRB: a64asm->STRB(test.cond, Rd,Rn,op2); break; + case INSTR_STRH: a64asm->STRH(test.cond, Rd,Rn,op2); break; + case INSTR_ADDR_STR: a64asm->ADDR_STR(test.cond, Rd,Rn,op2); break; + default: printf("Error"); return; + } + a64asm->epilog(0); + flushcache(); + + asm_function_t asm_function = (asm_function_t)(instrMem); + + for(i = 0; i < NUM_REGS; ++i) + savedRegs[i] = regs[i]; + + + asm_test_jacket(asm_function, regs, flags); + + /* Check if all regs except Rd/Rn are same */ + for(i = 0; i < NUM_REGS; ++i) + { + if(i == Rd || i == Rn) continue; + if(regs[i] != savedRegs[i]) + { + printf("Test %x failed Reg(%d) tampered" + " Expected(0x%"PRIx64"), Actual(0x%"PRIx64") t\n", + test.id, i, savedRegs[i], regs[i]); + return; + } + } + + if((uint64_t)regs[Rd] != test.postRdValue) + { + printf("Test %x failed, " + "Expected in Rd(0x%"PRIx64"), Actual(0x%"PRIx64")\n", + test.id, test.postRdValue, regs[Rd]); + } + else if((uint64_t)regs[Rn] != (uint64_t)(&dataMem[test.postRnValue])) + { + printf("Test %x failed, " + "Expected in Rn(0x%"PRIx64"), Actual(0x%"PRIx64")\n", + test.id, test.postRnValue, regs[Rn] - (uint64_t)dataMem); + } + else if(test.checkMem == true) + { + unsigned char *addr = (unsigned char *)&dataMem[test.postMemOffset]; + uint64_t value; + value = 0; + for(uint32_t j = 0; j < test.postMemLength; ++j) + value = (value << 8) | addr[test.postMemLength-j-1]; + if(value != test.postMemValue) + { + printf("Test %x failed, " + "Expected in Mem(0x%"PRIx64"), Actual(0x%"PRIx64")\n", + test.id, test.postMemValue, value); + } + else + { + printf("Test %x passed\n", test.id); + } + } + else + { + printf("Test %x passed\n", test.id); + } +} + +void dataTransferLDMSTM(ARMAssemblerInterface *a64asm) +{ + int64_t regs[NUM_REGS] = {0}; + int32_t flags[NUM_FLAGS] = {0}; + const uint32_t numArmv7Regs = 16; + + uint32_t Rn = ARMAssemblerInterface::SP; + + uint32_t patterns[] = + { + 0x5A03, + 0x4CF0, + 0x1EA6, + 0x0DBF, + }; + + uint32_t i, j; + for(i = 0; i < sizeof(patterns)/sizeof(uint32_t); ++i) + { + for(j = 0; j < NUM_REGS; ++j) + { + regs[j] = j; + } + a64asm->reset(); + a64asm->prolog(); + a64asm->STM(AL,ARMAssemblerInterface::DB,Rn,1,patterns[i]); + for(j = 0; j < numArmv7Regs; ++j) + { + uint32_t op2 = a64asm->imm(0x31); + a64asm->MOV(AL, 0,j,op2); + } + a64asm->LDM(AL,ARMAssemblerInterface::IA,Rn,1,patterns[i]); + a64asm->epilog(0); + flushcache(); + + asm_function_t asm_function = (asm_function_t)(instrMem); + asm_test_jacket(asm_function, regs, flags); + + for(j = 0; j < numArmv7Regs; ++j) + { + if((1 << j) & patterns[i]) + { + if(regs[j] != j) + { + printf("LDM/STM Test %x failed " + "Reg%d expected(0x%x) Actual(0x%"PRIx64") \n", + patterns[i],j,j,regs[j]); + break; + } + } + } + if(j == numArmv7Regs) + printf("LDM/STM Test %x passed\n", patterns[i]); + } +} + +int main(void) +{ + uint32_t i; + + /* Allocate memory to store instructions generated by ArmToAarch64Assembler */ + { + int fd = ashmem_create_region("code cache", instrMemSize); + if(fd < 0) + printf("Creating code cache, ashmem_create_region " + "failed with error '%s'", strerror(errno)); + instrMem = mmap(NULL, instrMemSize, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE, fd, 0); + } + + ArmToAarch64Assembler a64asm(instrMem); + + if(TESTS_DATAOP_ENABLE) + { + printf("Running data processing tests\n"); + for(i = 0; i < sizeof(dataOpTests)/sizeof(dataOpTest_t); ++i) + dataOpTest(dataOpTests[i], &a64asm); + } + + if(TESTS_DATATRANSFER_ENABLE) + { + printf("Running data transfer tests\n"); + for(i = 0; i < sizeof(dataTransferTests)/sizeof(dataTransferTest_t); ++i) + dataTransferTest(dataTransferTests[i], &a64asm); + } + + if(TESTS_LDMSTM_ENABLE) + { + printf("Running LDM/STM tests\n"); + dataTransferLDMSTM(&a64asm); + } + + + if(TESTS_REG_CORRUPTION_ENABLE) + { + uint32_t reg_list[] = {0,1,12,14}; + uint32_t Rd, Rm, Rs, Rn; + uint32_t i; + uint32_t numRegs = sizeof(reg_list)/sizeof(uint32_t); + + printf("Running Register corruption tests\n"); + for(i = 0; i < sizeof(dataOpTests)/sizeof(dataOpTest_t); ++i) + { + for(Rd = 0; Rd < numRegs; ++Rd) + { + for(Rn = 0; Rn < numRegs; ++Rn) + { + for(Rm = 0; Rm < numRegs; ++Rm) + { + for(Rs = 0; Rs < numRegs;++Rs) + { + if(Rd == Rn || Rd == Rm || Rd == Rs) continue; + if(Rn == Rm || Rn == Rs) continue; + if(Rm == Rs) continue; + printf("Testing combination Rd(%d), Rn(%d)," + " Rm(%d), Rs(%d): ", + reg_list[Rd], reg_list[Rn], reg_list[Rm], reg_list[Rs]); + dataOpTest(dataOpTests[i], &a64asm, reg_list[Rd], + reg_list[Rn], reg_list[Rm], reg_list[Rs]); + } + } + } + } + } + } + return 0; +} diff --git a/libpixelflinger/tests/arch-aarch64/assembler/asm_test_jacket.S b/libpixelflinger/tests/arch-aarch64/assembler/asm_test_jacket.S new file mode 100644 index 0000000..a1392c2 --- /dev/null +++ b/libpixelflinger/tests/arch-aarch64/assembler/asm_test_jacket.S @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + .text + .align + + .global asm_test_jacket + + // Set the register and flag values + // Calls the asm function + // Reads the register/flag values to output register + + // Parameters + // X0 - Function to jump + // X1 - register values array + // X2 - flag values array +asm_test_jacket: + // Save registers to stack + stp x29, x30, [sp,#-16]! + stp x27, x28, [sp,#-16]! + + mov x30, x0 + mov x28, x1 + mov x27, x2 + + //Set the flags based on flag array + //EQ + ldr w0, [x27,#0] + cmp w0, #1 + b.ne bt_aeq + cmp w0,#1 + b bt_end +bt_aeq: + + //NE + ldr w0, [x27,#4] + cmp w0, #1 + b.ne bt_ane + cmp w0,#2 + b bt_end +bt_ane: + + //CS + ldr w0, [x27,#8] + cmp w0, #1 + b.ne bt_acs + cmp w0,#0 + b bt_end +bt_acs: + + //CC + ldr w0, [x27,#12] + cmp w0, #1 + b.ne bt_acc + cmp w0,#2 + b bt_end +bt_acc: + + //MI + ldr w0, [x27,#16] + cmp w0, #1 + b.ne bt_ami + subs w0,w0,#2 + b bt_end +bt_ami: + + //PL + ldr w0, [x27,#20] + cmp w0, #1 + b.ne bt_apl + subs w0,w0,#0 + b bt_end +bt_apl: + //HI - (C==1) && (Z==0) + ldr w0, [x27,#32] + cmp w0, #1 + b.ne bt_ahi + cmp w0,#0 + b bt_end +bt_ahi: + + //LS - (C==0) || (Z==1) + ldr w0, [x27,#36] + cmp w0, #1 + b.ne bt_als + cmp w0,#1 + b bt_end +bt_als: + + //GE + ldr w0, [x27,#40] + cmp w0, #1 + b.ne bt_age + cmp w0,#0 + b bt_end +bt_age: + + //LT + ldr w0, [x27,#44] + cmp w0, #1 + b.ne bt_alt + cmp w0,#2 + b bt_end +bt_alt: + + //GT + ldr w0, [x27,#48] + cmp w0, #1 + b.ne bt_agt + cmp w0,#0 + b bt_end +bt_agt: + + //LE + ldr w0, [x27,#52] + cmp w0, #1 + b.ne bt_ale + cmp w0,#2 + b bt_end +bt_ale: + + +bt_end: + + // Load the registers from reg array + ldr x0, [x28,#0] + ldr x1, [x28,#8] + ldr x2, [x28,#16] + ldr x3, [x28,#24] + ldr x4, [x28,#32] + ldr x5, [x28,#40] + ldr x6, [x28,#48] + ldr x7, [x28,#56] + ldr x8, [x28,#64] + ldr x9, [x28,#72] + ldr x10, [x28,#80] + ldr x11, [x28,#88] + ldr x12, [x28,#96] + ldr x14, [x28,#112] + + // Call the function + blr X30 + + // Save the registers to reg array + str x0, [x28,#0] + str x1, [x28,#8] + str x2, [x28,#16] + str x3, [x28,#24] + str x4, [x28,#32] + str x5, [x28,#40] + str x6, [x28,#48] + str x7, [x28,#56] + str x8, [x28,#64] + str x9, [x28,#72] + str x10, [x28,#80] + str x11, [x28,#88] + str x12, [x28,#96] + str x14, [x28,#112] + + //Set the flags array based on result flags + movz w0, #0 + movz w1, #1 + csel w2, w1, w0, EQ + str w2, [x27,#0] + csel w2, w1, w0, NE + str w2, [x27,#4] + csel w2, w1, w0, CS + str w2, [x27,#8] + csel w2, w1, w0, CC + str w2, [x27,#12] + csel w2, w1, w0, MI + str w2, [x27,#16] + csel w2, w1, w0, PL + str w2, [x27,#20] + csel w2, w1, w0, VS + str w2, [x27,#24] + csel w2, w1, w0, VC + str w2, [x27,#28] + csel w2, w1, w0, HI + str w2, [x27,#32] + csel w2, w1, w0, LS + str w2, [x27,#36] + csel w2, w1, w0, GE + str w2, [x27,#40] + csel w2, w1, w0, LT + str w2, [x27,#44] + csel w2, w1, w0, GT + str w2, [x27,#48] + csel w2, w1, w0, LE + str w2, [x27,#52] + + // Restore registers from stack + ldp x27, x28, [sp],#16 + ldp x29, x30, [sp],#16 + ret + diff --git a/libpixelflinger/tests/arch-aarch64/col32cb16blend/Android.mk b/libpixelflinger/tests/arch-aarch64/col32cb16blend/Android.mk new file mode 100644 index 0000000..7445fc8 --- /dev/null +++ b/libpixelflinger/tests/arch-aarch64/col32cb16blend/Android.mk @@ -0,0 +1,16 @@ +LOCAL_PATH:= $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_SRC_FILES:= \ + col32cb16blend_test.c \ + ../../../arch-aarch64/col32cb16blend.S + +LOCAL_SHARED_LIBRARIES := + +LOCAL_C_INCLUDES := + +LOCAL_MODULE:= test-pixelflinger-aarch64-col32cb16blend + +LOCAL_MODULE_TAGS := tests + +include $(BUILD_EXECUTABLE) diff --git a/libpixelflinger/tests/arch-aarch64/col32cb16blend/col32cb16blend_test.c b/libpixelflinger/tests/arch-aarch64/col32cb16blend/col32cb16blend_test.c new file mode 100644 index 0000000..f057884 --- /dev/null +++ b/libpixelflinger/tests/arch-aarch64/col32cb16blend/col32cb16blend_test.c @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> + + +#define ARGB_8888_MAX 0xFFFFFFFF +#define ARGB_8888_MIN 0x00000000 +#define RGB_565_MAX 0xFFFF +#define RGB_565_MIN 0x0000 + +struct test_t +{ + char name[256]; + uint32_t dst_color; + uint32_t src_color; + size_t count; +}; + +struct test_t tests[] = +{ + {"Count 1, Src=Max, Dst=Min", ARGB_8888_MAX, RGB_565_MIN, 1}, + {"Count 2, Src=Min, Dst=Max", ARGB_8888_MIN, RGB_565_MAX, 2}, + {"Count 3, Src=Max, Dst=Max", ARGB_8888_MAX, RGB_565_MAX, 3}, + {"Count 4, Src=Min, Dst=Min", ARGB_8888_MAX, RGB_565_MAX, 4}, + {"Count 1, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 1}, + {"Count 2, Src=Rand, Dst=Rand", 0xABCDEF12, 0x2345, 2}, + {"Count 3, Src=Rand, Dst=Rand", 0x11111111, 0xEDFE, 3}, + {"Count 4, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 4}, + {"Count 5, Src=Rand, Dst=Rand", 0xEFEFFEFE, 0xFACC, 5}, + {"Count 10, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 10} +}; + +void scanline_col32cb16blend_aarch64(uint16_t *dst, int32_t src, size_t count); +void scanline_col32cb16blend_c(uint16_t * dst, int32_t src, size_t count) +{ + int srcAlpha = (src>>24); + int f = 0x100 - (srcAlpha + (srcAlpha>>7)); + while (count--) + { + uint16_t d = *dst; + int dstR = (d>>11)&0x1f; + int dstG = (d>>5)&0x3f; + int dstB = (d)&0x1f; + int srcR = (src >> ( 3))&0x1F; + int srcG = (src >> ( 8+2))&0x3F; + int srcB = (src >> (16+3))&0x1F; + srcR += (f*dstR)>>8; + srcG += (f*dstG)>>8; + srcB += (f*dstB)>>8; + *dst++ = (uint16_t)((srcR<<11)|(srcG<<5)|srcB); + } +} + +void scanline_col32cb16blend_test() +{ + uint16_t dst_c[16], dst_asm[16]; + uint32_t i, j; + + for(i = 0; i < sizeof(tests)/sizeof(struct test_t); ++i) + { + struct test_t test = tests[i]; + + printf("Testing - %s:",test.name); + + memset(dst_c, 0, sizeof(dst_c)); + memset(dst_asm, 0, sizeof(dst_asm)); + + for(j = 0; j < test.count; ++j) + { + dst_c[j] = test.dst_color; + dst_asm[j] = test.dst_color; + } + + + scanline_col32cb16blend_c(dst_c, test.src_color, test.count); + scanline_col32cb16blend_aarch64(dst_asm, test.src_color, test.count); + + + if(memcmp(dst_c, dst_asm, sizeof(dst_c)) == 0) + printf("Passed\n"); + else + printf("Failed\n"); + + for(j = 0; j < test.count; ++j) + { + printf("dst_c[%d] = %x, dst_asm[%d] = %x \n", j, dst_c[j], j, dst_asm[j]); + } + } +} + +int main() +{ + scanline_col32cb16blend_test(); + return 0; +} diff --git a/libpixelflinger/tests/arch-aarch64/disassembler/Android.mk b/libpixelflinger/tests/arch-aarch64/disassembler/Android.mk new file mode 100644 index 0000000..376c3b7 --- /dev/null +++ b/libpixelflinger/tests/arch-aarch64/disassembler/Android.mk @@ -0,0 +1,17 @@ +LOCAL_PATH:= $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_SRC_FILES:= \ + aarch64_diassembler_test.cpp \ + ../../../codeflinger/Aarch64Disassembler.cpp + +LOCAL_SHARED_LIBRARIES := + +LOCAL_C_INCLUDES := \ + system/core/libpixelflinger/codeflinger + +LOCAL_MODULE:= test-pixelflinger-aarch64-disassembler-test + +LOCAL_MODULE_TAGS := tests + +include $(BUILD_EXECUTABLE) diff --git a/libpixelflinger/tests/arch-aarch64/disassembler/aarch64_diassembler_test.cpp b/libpixelflinger/tests/arch-aarch64/disassembler/aarch64_diassembler_test.cpp new file mode 100644 index 0000000..17caee1 --- /dev/null +++ b/libpixelflinger/tests/arch-aarch64/disassembler/aarch64_diassembler_test.cpp @@ -0,0 +1,321 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <stdio.h> +#include <inttypes.h> +#include <string.h> + +int aarch64_disassemble(uint32_t code, char* instr); + +struct test_table_entry_t +{ + uint32_t code; + const char *instr; +}; +static test_table_entry_t test_table [] = +{ + { 0x91000240, "add x0, x18, #0x0, lsl #0" }, + { 0x9140041f, "add sp, x0, #0x1, lsl #12" }, + { 0x917ffff2, "add x18, sp, #0xfff, lsl #12" }, + + { 0xd13ffe40, "sub x0, x18, #0xfff, lsl #0" }, + { 0xd140001f, "sub sp, x0, #0x0, lsl #12" }, + { 0xd14007f2, "sub x18, sp, #0x1, lsl #12" }, + + { 0x8b1e0200, "add x0, x16, x30, lsl #0" }, + { 0x8b507fdf, "add xzr, x30, x16, lsr #31" }, + { 0x8b8043f0, "add x16, xzr, x0, asr #16" }, + { 0x8b5f401e, "add x30, x0, xzr, lsr #16" }, + + + { 0x4b1e0200, "sub w0, w16, w30, lsl #0" }, + { 0x4b507fdf, "sub wzr, w30, w16, lsr #31" }, + { 0x4b8043f0, "sub w16, wzr, w0, asr #16" }, + { 0x4b5f401e, "sub w30, w0, wzr, lsr #16" }, + + { 0x6b1e0200, "subs w0, w16, w30, lsl #0" }, + { 0x6b507fdf, "subs wzr, w30, w16, lsr #31" }, + { 0x6b8043f0, "subs w16, wzr, w0, asr #16" }, + { 0x6b5f401e, "subs w30, w0, wzr, lsr #16" }, + + { 0x0a1e0200, "and w0, w16, w30, lsl #0" }, + { 0x0a507fdf, "and wzr, w30, w16, lsr #31" }, + { 0x0a8043f0, "and w16, wzr, w0, asr #16" }, + { 0x0adf401e, "and w30, w0, wzr, ror #16" }, + + { 0x2a1e0200, "orr w0, w16, w30, lsl #0" }, + { 0x2a507fdf, "orr wzr, w30, w16, lsr #31" }, + { 0x2a8043f0, "orr w16, wzr, w0, asr #16" }, + { 0x2adf401e, "orr w30, w0, wzr, ror #16" }, + + { 0x2a3e0200, "orn w0, w16, w30, lsl #0" }, + { 0x2a707fdf, "orn wzr, w30, w16, lsr #31" }, + { 0x2aa043f0, "orn w16, wzr, w0, asr #16" }, + { 0x2aff401e, "orn w30, w0, wzr, ror #16" }, + + { 0x729fffe0, "movk w0, #0xffff, lsl #0" }, + { 0x72a0000f, "movk w15, #0x0, lsl #16" }, + { 0x7281fffe, "movk w30, #0xfff, lsl #0" }, + { 0x72a0003f, "movk wzr, #0x1, lsl #16" }, + + { 0x529fffe0, "movz w0, #0xffff, lsl #0" }, + { 0x52a0000f, "movz w15, #0x0, lsl #16" }, + { 0x5281fffe, "movz w30, #0xfff, lsl #0" }, + { 0x52a0003f, "movz wzr, #0x1, lsl #16" }, + + { 0xd29fffe0, "movz x0, #0xffff, lsl #0" }, + { 0xd2a0000f, "movz x15, #0x0, lsl #16" }, + { 0xd2c1fffe, "movz x30, #0xfff, lsl #32" }, + { 0xd2e0003f, "movz xzr, #0x1, lsl #48" }, + + { 0x1a8003e0, "csel w0, wzr, w0, eq" }, + { 0x1a831001, "csel w1, w0, w3, ne" }, + { 0x1a9e2022, "csel w2, w1, w30, cs" }, + { 0x1a8a3083, "csel w3, w4, w10, cc" }, + { 0x1a8b40e4, "csel w4, w7, w11, mi" }, + { 0x1a9b5105, "csel w5, w8, w27, pl" }, + { 0x1a846167, "csel w7, w11, w4, vs" }, + { 0x1a8671c8, "csel w8, w14, w6, vc" }, + { 0x1a878289, "csel w9, w20, w7, hi" }, + { 0x1a8c92aa, "csel w10, w21, w12, ls" }, + { 0x1a8ea2ce, "csel w14, w22, w14, ge" }, + { 0x1a9fb3b2, "csel w18, w29, wzr, lt" }, + { 0x1a9fc3d8, "csel w24, w30, wzr, gt" }, + { 0x1a82d17e, "csel w30, w11, w2, le" }, + { 0x1a81e19f, "csel wzr, w12, w1, al" }, + + { 0x9a8003e0, "csel x0, xzr, x0, eq" }, + { 0x9a831001, "csel x1, x0, x3, ne" }, + { 0x9a9e2022, "csel x2, x1, x30, cs" }, + { 0x9a8a3083, "csel x3, x4, x10, cc" }, + { 0x9a8b40e4, "csel x4, x7, x11, mi" }, + { 0x9a9b5105, "csel x5, x8, x27, pl" }, + { 0x9a846167, "csel x7, x11, x4, vs" }, + { 0x9a8671c8, "csel x8, x14, x6, vc" }, + { 0x9a878289, "csel x9, x20, x7, hi" }, + { 0x9a8c92aa, "csel x10, x21, x12, ls" }, + { 0x9a8ea2ce, "csel x14, x22, x14, ge" }, + { 0x9a9fb3b2, "csel x18, x29, xzr, lt" }, + { 0x9a9fc3d8, "csel x24, x30, xzr, gt" }, + { 0x9a82d17e, "csel x30, x11, x2, le" }, + { 0x9a81e19f, "csel xzr, x12, x1, al" }, + + { 0x5a8003e0, "csinv w0, wzr, w0, eq" }, + { 0x5a831001, "csinv w1, w0, w3, ne" }, + { 0x5a9e2022, "csinv w2, w1, w30, cs" }, + { 0x5a8a3083, "csinv w3, w4, w10, cc" }, + { 0x5a8b40e4, "csinv w4, w7, w11, mi" }, + { 0x5a9b5105, "csinv w5, w8, w27, pl" }, + { 0x5a846167, "csinv w7, w11, w4, vs" }, + { 0x5a8671c8, "csinv w8, w14, w6, vc" }, + { 0x5a878289, "csinv w9, w20, w7, hi" }, + { 0x5a8c92aa, "csinv w10, w21, w12, ls" }, + { 0x5a8ea2ce, "csinv w14, w22, w14, ge" }, + { 0x5a9fb3b2, "csinv w18, w29, wzr, lt" }, + { 0x5a9fc3d8, "csinv w24, w30, wzr, gt" }, + { 0x5a82d17e, "csinv w30, w11, w2, le" }, + { 0x5a81e19f, "csinv wzr, w12, w1, al" }, + + { 0x1b1f3fc0, "madd w0, w30, wzr, w15" }, + { 0x1b0079ef, "madd w15, w15, w0, w30" }, + { 0x1b0f7ffe, "madd w30, wzr, w15, wzr" }, + { 0x1b1e001f, "madd wzr, w0, w30, w0" }, + + { 0x9b3f3fc0, "smaddl x0, w30, wzr, x15" }, + { 0x9b2079ef, "smaddl x15, w15, w0, x30" }, + { 0x9b2f7ffe, "smaddl x30, wzr, w15, xzr" }, + { 0x9b3e001f, "smaddl xzr, w0, w30, x0" }, + + { 0xd65f0000, "ret x0" }, + { 0xd65f01e0, "ret x15" }, + { 0xd65f03c0, "ret x30" }, + { 0xd65f03e0, "ret xzr" }, + + { 0xb87f4be0, "ldr w0, [sp, wzr, uxtw #0]" }, + { 0xb87ed80f, "ldr w15, [x0, w30, sxtw #2]" }, + { 0xb86fc9fe, "ldr w30, [x15, w15, sxtw #0]" }, + { 0xb8605bdf, "ldr wzr, [x30, w0, uxtw #2]" }, + { 0xb87febe0, "ldr w0, [sp, xzr, sxtx #0]" }, + { 0xb87e780f, "ldr w15, [x0, x30, lsl #2]" }, + { 0xb86f69fe, "ldr w30, [x15, x15, lsl #0]" }, + { 0xb860fbdf, "ldr wzr, [x30, x0, sxtx #2]" }, + + { 0xb83f4be0, "str w0, [sp, wzr, uxtw #0]" }, + { 0xb83ed80f, "str w15, [x0, w30, sxtw #2]" }, + { 0xb82fc9fe, "str w30, [x15, w15, sxtw #0]" }, + { 0xb8205bdf, "str wzr, [x30, w0, uxtw #2]" }, + { 0xb83febe0, "str w0, [sp, xzr, sxtx #0]" }, + { 0xb83e780f, "str w15, [x0, x30, lsl #2]" }, + { 0xb82f69fe, "str w30, [x15, x15, lsl #0]" }, + { 0xb820fbdf, "str wzr, [x30, x0, sxtx #2]" }, + + { 0x787f4be0, "ldrh w0, [sp, wzr, uxtw #0]" }, + { 0x787ed80f, "ldrh w15, [x0, w30, sxtw #1]" }, + { 0x786fc9fe, "ldrh w30, [x15, w15, sxtw #0]" }, + { 0x78605bdf, "ldrh wzr, [x30, w0, uxtw #1]" }, + { 0x787febe0, "ldrh w0, [sp, xzr, sxtx #0]" }, + { 0x787e780f, "ldrh w15, [x0, x30, lsl #1]" }, + { 0x786f69fe, "ldrh w30, [x15, x15, lsl #0]" }, + { 0x7860fbdf, "ldrh wzr, [x30, x0, sxtx #1]" }, + + { 0x783f4be0, "strh w0, [sp, wzr, uxtw #0]" }, + { 0x783ed80f, "strh w15, [x0, w30, sxtw #1]" }, + { 0x782fc9fe, "strh w30, [x15, w15, sxtw #0]" }, + { 0x78205bdf, "strh wzr, [x30, w0, uxtw #1]" }, + { 0x783febe0, "strh w0, [sp, xzr, sxtx #0]" }, + { 0x783e780f, "strh w15, [x0, x30, lsl #1]" }, + { 0x782f69fe, "strh w30, [x15, x15, lsl #0]" }, + { 0x7820fbdf, "strh wzr, [x30, x0, sxtx #1]" }, + + { 0x387f5be0, "ldrb w0, [sp, wzr, uxtw #0]" }, + { 0x387ec80f, "ldrb w15, [x0, w30, sxtw ]" }, + { 0x386fd9fe, "ldrb w30, [x15, w15, sxtw #0]" }, + { 0x38604bdf, "ldrb wzr, [x30, w0, uxtw ]" }, + { 0x387ffbe0, "ldrb w0, [sp, xzr, sxtx #0]" }, + { 0x387e780f, "ldrb w15, [x0, x30, lsl #0]" }, + { 0x386f79fe, "ldrb w30, [x15, x15, lsl #0]" }, + { 0x3860ebdf, "ldrb wzr, [x30, x0, sxtx ]" }, + + { 0x383f5be0, "strb w0, [sp, wzr, uxtw #0]" }, + { 0x383ec80f, "strb w15, [x0, w30, sxtw ]" }, + { 0x382fd9fe, "strb w30, [x15, w15, sxtw #0]" }, + { 0x38204bdf, "strb wzr, [x30, w0, uxtw ]" }, + { 0x383ffbe0, "strb w0, [sp, xzr, sxtx #0]" }, + { 0x383e780f, "strb w15, [x0, x30, lsl #0]" }, + { 0x382f79fe, "strb w30, [x15, x15, lsl #0]" }, + { 0x3820ebdf, "strb wzr, [x30, x0, sxtx ]" }, + + { 0xf87f4be0, "ldr x0, [sp, wzr, uxtw #0]" }, + { 0xf87ed80f, "ldr x15, [x0, w30, sxtw #3]" }, + { 0xf86fc9fe, "ldr x30, [x15, w15, sxtw #0]" }, + { 0xf8605bdf, "ldr xzr, [x30, w0, uxtw #3]" }, + { 0xf87febe0, "ldr x0, [sp, xzr, sxtx #0]" }, + { 0xf87e780f, "ldr x15, [x0, x30, lsl #3]" }, + { 0xf86f69fe, "ldr x30, [x15, x15, lsl #0]" }, + { 0xf860fbdf, "ldr xzr, [x30, x0, sxtx #3]" }, + + { 0xf83f4be0, "str x0, [sp, wzr, uxtw #0]" }, + { 0xf83ed80f, "str x15, [x0, w30, sxtw #3]" }, + { 0xf82fc9fe, "str x30, [x15, w15, sxtw #0]" }, + { 0xf8205bdf, "str xzr, [x30, w0, uxtw #3]" }, + { 0xf83febe0, "str x0, [sp, xzr, sxtx #0]" }, + { 0xf83e780f, "str x15, [x0, x30, lsl #3]" }, + { 0xf82f69fe, "str x30, [x15, x15, lsl #0]" }, + { 0xf820fbdf, "str xzr, [x30, x0, sxtx #3]" }, + + { 0xb85007e0, "ldr w0, [sp], #-256" }, + { 0xb840040f, "ldr w15, [x0], #0" }, + { 0xb84015fe, "ldr w30, [x15], #1" }, + { 0xb84ff7df, "ldr wzr, [x30], #255" }, + { 0xb8100fe0, "str w0, [sp, #-256]!" }, + { 0xb8000c0f, "str w15, [x0, #0]!" }, + { 0xb8001dfe, "str w30, [x15, #1]!" }, + { 0xb80fffdf, "str wzr, [x30, #255]!" }, + + { 0x13017be0, "sbfm w0, wzr, #1, #30" }, + { 0x131e7fcf, "sbfm w15, w30, #30, #31" }, + { 0x131f01fe, "sbfm w30, w15, #31, #0" }, + { 0x1300041f, "sbfm wzr, w0, #0, #1" }, + + { 0x53017be0, "ubfm w0, wzr, #1, #30" }, + { 0x531e7fcf, "ubfm w15, w30, #30, #31" }, + { 0x531f01fe, "ubfm w30, w15, #31, #0" }, + { 0x5300041f, "ubfm wzr, w0, #0, #1" }, + { 0xd3417fe0, "ubfm x0, xzr, #1, #31" }, + { 0xd35fffcf, "ubfm x15, x30, #31, #63" }, + { 0xd35f01fe, "ubfm x30, x15, #31, #0" }, + { 0xd340041f, "ubfm xzr, x0, #0, #1" }, + + { 0x139e7be0, "extr w0, wzr, w30, #30" }, + { 0x138f7fcf, "extr w15, w30, w15, #31" }, + { 0x138001fe, "extr w30, w15, w0, #0" }, + { 0x139f041f, "extr wzr, w0, wzr, #1" }, + + { 0x54000020, "b.eq #.+4" }, + { 0x54000201, "b.ne #.+64" }, + { 0x54000802, "b.cs #.+256" }, + { 0x54002003, "b.cc #.+1024" }, + { 0x54008004, "b.mi #.+4096" }, + { 0x54ffffe5, "b.pl #.-4" }, + { 0x54ffff06, "b.vs #.-32" }, + { 0x54fffc07, "b.vc #.-128" }, + { 0x54fff008, "b.hi #.-512" }, + { 0x54000049, "b.ls #.+8" }, + { 0x5400006a, "b.ge #.+12" }, + { 0x5400008b, "b.lt #.+16" }, + { 0x54ffffcc, "b.gt #.-8" }, + { 0x54ffffad, "b.le #.-12" }, + { 0x54ffff8e, "b.al #.-16" }, + + { 0x8b2001e0, "add x0, x15, w0, uxtb #0" }, + { 0x8b2f27cf, "add x15, x30, w15, uxth #1" }, + { 0x8b3e4bfe, "add x30, sp, w30, uxtw #2" }, + { 0x8b3f6c1f, "add sp, x0, xzr, uxtx #3" }, + { 0x8b2091e0, "add x0, x15, w0, sxtb #4" }, + { 0x8b2fa3cf, "add x15, x30, w15, sxth #0" }, + { 0x8b3ec7fe, "add x30, sp, w30, sxtw #1" }, + { 0x8b3fe81f, "add sp, x0, xzr, sxtx #2" }, + + { 0xcb2001e0, "sub x0, x15, w0, uxtb #0" }, + { 0xcb2f27cf, "sub x15, x30, w15, uxth #1" }, + { 0xcb3e4bfe, "sub x30, sp, w30, uxtw #2" }, + { 0xcb3f6c1f, "sub sp, x0, xzr, uxtx #3" }, + { 0xcb2091e0, "sub x0, x15, w0, sxtb #4" }, + { 0xcb2fa3cf, "sub x15, x30, w15, sxth #0" }, + { 0xcb3ec7fe, "sub x30, sp, w30, sxtw #1" }, + { 0xcb3fe81f, "sub sp, x0, xzr, sxtx #2" } +}; + +int main() +{ + char instr[256]; + uint32_t failed = 0; + for(uint32_t i = 0; i < sizeof(test_table)/sizeof(test_table_entry_t); ++i) + { + test_table_entry_t *test; + test = &test_table[i]; + aarch64_disassemble(test->code, instr); + if(strcmp(instr, test->instr) != 0) + { + printf("Test Failed \n" + "Code : 0x%0x\n" + "Expected : %s\n" + "Actual : %s\n", test->code, test->instr, instr); + failed++; + } + } + if(failed == 0) + { + printf("All tests PASSED\n"); + return 0; + } + else + { + printf("%d tests FAILED\n", failed); + return -1; + } +} diff --git a/libpixelflinger/tests/arch-aarch64/t32cb16blend/Android.mk b/libpixelflinger/tests/arch-aarch64/t32cb16blend/Android.mk new file mode 100644 index 0000000..a67f0e3 --- /dev/null +++ b/libpixelflinger/tests/arch-aarch64/t32cb16blend/Android.mk @@ -0,0 +1,16 @@ +LOCAL_PATH:= $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_SRC_FILES:= \ + t32cb16blend_test.c \ + ../../../arch-aarch64/t32cb16blend.S + +LOCAL_SHARED_LIBRARIES := + +LOCAL_C_INCLUDES := + +LOCAL_MODULE:= test-pixelflinger-aarch64-t32cb16blend + +LOCAL_MODULE_TAGS := tests + +include $(BUILD_EXECUTABLE) diff --git a/libpixelflinger/tests/arch-aarch64/t32cb16blend/t32cb16blend_test.c b/libpixelflinger/tests/arch-aarch64/t32cb16blend/t32cb16blend_test.c new file mode 100644 index 0000000..bcde3e6 --- /dev/null +++ b/libpixelflinger/tests/arch-aarch64/t32cb16blend/t32cb16blend_test.c @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> + +#define ARGB_8888_MAX 0xFFFFFFFF +#define ARGB_8888_MIN 0x00000000 +#define RGB_565_MAX 0xFFFF +#define RGB_565_MIN 0x0000 + +struct test_t +{ + char name[256]; + uint32_t src_color; + uint16_t dst_color; + size_t count; +}; + +struct test_t tests[] = +{ + {"Count 0", 0, 0, 0}, + {"Count 1, Src=Max, Dst=Min", ARGB_8888_MAX, RGB_565_MIN, 1}, + {"Count 2, Src=Min, Dst=Max", ARGB_8888_MIN, RGB_565_MAX, 2}, + {"Count 3, Src=Max, Dst=Max", ARGB_8888_MAX, RGB_565_MAX, 3}, + {"Count 4, Src=Min, Dst=Min", ARGB_8888_MAX, RGB_565_MAX, 4}, + {"Count 1, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 1}, + {"Count 2, Src=Rand, Dst=Rand", 0xABCDEF12, 0x2345, 2}, + {"Count 3, Src=Rand, Dst=Rand", 0x11111111, 0xEDFE, 3}, + {"Count 4, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 4}, + {"Count 5, Src=Rand, Dst=Rand", 0xEFEFFEFE, 0xFACC, 5}, + {"Count 10, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 10} + +}; + +void scanline_t32cb16blend_aarch64(uint16_t*, uint32_t*, size_t); +void scanline_t32cb16blend_c(uint16_t * dst, uint32_t* src, size_t count) +{ + while (count--) + { + uint16_t d = *dst; + uint32_t s = *src++; + int dstR = (d>>11)&0x1f; + int dstG = (d>>5)&0x3f; + int dstB = (d)&0x1f; + int srcR = (s >> ( 3))&0x1F; + int srcG = (s >> ( 8+2))&0x3F; + int srcB = (s >> (16+3))&0x1F; + int srcAlpha = (s>>24) & 0xFF; + + + int f = 0x100 - (srcAlpha + ((srcAlpha>>7) & 0x1)); + srcR += (f*dstR)>>8; + srcG += (f*dstG)>>8; + srcB += (f*dstB)>>8; + srcR = srcR > 0x1F? 0x1F: srcR; + srcG = srcG > 0x3F? 0x3F: srcG; + srcB = srcB > 0x1F? 0x1F: srcB; + *dst++ = (uint16_t)((srcR<<11)|(srcG<<5)|srcB); + } +} + +void scanline_t32cb16blend_test() +{ + uint16_t dst_c[16], dst_asm[16]; + uint32_t src[16]; + uint32_t i; + uint32_t j; + + for(i = 0; i < sizeof(tests)/sizeof(struct test_t); ++i) + { + struct test_t test = tests[i]; + + printf("Testing - %s:",test.name); + + memset(dst_c, 0, sizeof(dst_c)); + memset(dst_asm, 0, sizeof(dst_asm)); + + for(j = 0; j < test.count; ++j) + { + dst_c[j] = test.dst_color; + dst_asm[j] = test.dst_color; + src[j] = test.src_color; + } + + scanline_t32cb16blend_c(dst_c,src,test.count); + scanline_t32cb16blend_aarch64(dst_asm,src,test.count); + + + if(memcmp(dst_c, dst_asm, sizeof(dst_c)) == 0) + printf("Passed\n"); + else + printf("Failed\n"); + + for(j = 0; j < test.count; ++j) + { + printf("dst_c[%d] = %x, dst_asm[%d] = %x \n", j, dst_c[j], j, dst_asm[j]); + } + } +} + +int main() +{ + scanline_t32cb16blend_test(); + return 0; +} diff --git a/libpixelflinger/tests/codegen/codegen.cpp b/libpixelflinger/tests/codegen/codegen.cpp index 3d5a040..e8a4f5e 100644 --- a/libpixelflinger/tests/codegen/codegen.cpp +++ b/libpixelflinger/tests/codegen/codegen.cpp @@ -10,8 +10,9 @@ #include "codeflinger/GGLAssembler.h" #include "codeflinger/ARMAssembler.h" #include "codeflinger/MIPSAssembler.h" +#include "codeflinger/Aarch64Assembler.h" -#if defined(__arm__) || defined(__mips__) +#if defined(__arm__) || defined(__mips__) || defined(__aarch64__) # define ANDROID_ARM_CODEGEN 1 #else # define ANDROID_ARM_CODEGEN 0 @@ -19,6 +20,8 @@ #if defined (__mips__) #define ASSEMBLY_SCRATCH_SIZE 4096 +#elif defined(__aarch64__) +#define ASSEMBLY_SCRATCH_SIZE 8192 #else #define ASSEMBLY_SCRATCH_SIZE 2048 #endif @@ -53,13 +56,17 @@ static void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1) GGLAssembler assembler( new ArmToMipsAssembler(a) ); #endif +#if defined(__aarch64__) + GGLAssembler assembler( new ArmToAarch64Assembler(a) ); +#endif + int err = assembler.scanline(needs, (context_t*)c); if (err != 0) { printf("error %08x (%s)\n", err, strerror(-err)); } gglUninit(c); #else - printf("This test runs only on ARM or MIPS\n"); + printf("This test runs only on ARM, Aarch64 or MIPS\n"); #endif } diff --git a/libpixelflinger/tests/gglmul/Android.mk b/libpixelflinger/tests/gglmul/Android.mk new file mode 100644 index 0000000..64f88b7 --- /dev/null +++ b/libpixelflinger/tests/gglmul/Android.mk @@ -0,0 +1,16 @@ +LOCAL_PATH:= $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_SRC_FILES:= \ + gglmul_test.cpp + +LOCAL_SHARED_LIBRARIES := + +LOCAL_C_INCLUDES := \ + system/core/libpixelflinger + +LOCAL_MODULE:= test-pixelflinger-gglmul + +LOCAL_MODULE_TAGS := tests + +include $(BUILD_EXECUTABLE) diff --git a/libpixelflinger/tests/gglmul/gglmul_test.cpp b/libpixelflinger/tests/gglmul/gglmul_test.cpp new file mode 100644 index 0000000..103e4e9 --- /dev/null +++ b/libpixelflinger/tests/gglmul/gglmul_test.cpp @@ -0,0 +1,279 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdint.h> + +#include "private/pixelflinger/ggl_fixed.h" + +// gglClampx() tests +struct gglClampx_test_t +{ + GGLfixed input; + GGLfixed output; +}; + +gglClampx_test_t gglClampx_tests[] = +{ + {FIXED_ONE + 1, FIXED_ONE}, + {FIXED_ONE, FIXED_ONE}, + {FIXED_ONE - 1, FIXED_ONE - 1}, + {1, 1}, + {0, 0}, + {FIXED_MIN,0} +}; + +void gglClampx_test() +{ + uint32_t i; + + printf("Testing gglClampx\n"); + for(i = 0; i < sizeof(gglClampx_tests)/sizeof(gglClampx_test_t); ++i) + { + gglClampx_test_t *test = &gglClampx_tests[i]; + printf("Test input=0x%08x output=0x%08x :", + test->input, test->output); + if(gglClampx(test->input) == test->output) + printf("Passed\n"); + else + printf("Failed\n"); + } +} + +// gglClz() tests +struct gglClz_test_t +{ + GGLfixed input; + GGLfixed output; +}; + +gglClz_test_t gglClz_tests[] = +{ + {0, 32}, + {1, 31}, + {-1,0} +}; + +void gglClz_test() +{ + uint32_t i; + + printf("Testing gglClz\n"); + for(i = 0; i < sizeof(gglClz_tests)/sizeof(gglClz_test_t); ++i) + { + gglClz_test_t *test = &gglClz_tests[i]; + printf("Test input=0x%08x output=%2d :", test->input, test->output); + if(gglClz(test->input) == test->output) + printf("Passed\n"); + else + printf("Failed\n"); + } +} + +// gglMulx() tests +struct gglMulx_test_t +{ + GGLfixed x; + GGLfixed y; + int shift; +}; + +gglMulx_test_t gglMulx_tests[] = +{ + {1,1,1}, + {0,1,1}, + {FIXED_ONE,FIXED_ONE,16}, + {FIXED_MIN,FIXED_MAX,16}, + {FIXED_MAX,FIXED_MAX,16}, + {FIXED_MIN,FIXED_MIN,16}, + {FIXED_HALF,FIXED_ONE,16}, + {FIXED_MAX,FIXED_MAX,31}, + {FIXED_ONE,FIXED_MAX,31} +}; + +void gglMulx_test() +{ + uint32_t i; + GGLfixed actual, expected; + + printf("Testing gglMulx\n"); + for(i = 0; i < sizeof(gglMulx_tests)/sizeof(gglMulx_test_t); ++i) + { + gglMulx_test_t *test = &gglMulx_tests[i]; + printf("Test x=0x%08x y=0x%08x shift=%2d :", + test->x, test->y, test->shift); + actual = gglMulx(test->x, test->y, test->shift); + expected = + ((int64_t)test->x * test->y + (1 << (test->shift-1))) >> test->shift; + if(actual == expected) + printf(" Passed\n"); + else + printf(" Failed Actual(0x%08x) Expected(0x%08x)\n", + actual, expected); + } +} +// gglMulAddx() tests +struct gglMulAddx_test_t +{ + GGLfixed x; + GGLfixed y; + int shift; + GGLfixed a; +}; + +gglMulAddx_test_t gglMulAddx_tests[] = +{ + {1,2,1,1}, + {0,1,1,1}, + {FIXED_ONE,FIXED_ONE,16, 0}, + {FIXED_MIN,FIXED_MAX,16, FIXED_HALF}, + {FIXED_MAX,FIXED_MAX,16, FIXED_MIN}, + {FIXED_MIN,FIXED_MIN,16, FIXED_MAX}, + {FIXED_HALF,FIXED_ONE,16,FIXED_ONE}, + {FIXED_MAX,FIXED_MAX,31, FIXED_HALF}, + {FIXED_ONE,FIXED_MAX,31, FIXED_HALF} +}; + +void gglMulAddx_test() +{ + uint32_t i; + GGLfixed actual, expected; + + printf("Testing gglMulAddx\n"); + for(i = 0; i < sizeof(gglMulAddx_tests)/sizeof(gglMulAddx_test_t); ++i) + { + gglMulAddx_test_t *test = &gglMulAddx_tests[i]; + printf("Test x=0x%08x y=0x%08x shift=%2d a=0x%08x :", + test->x, test->y, test->shift, test->a); + actual = gglMulAddx(test->x, test->y,test->a, test->shift); + expected = (((int64_t)test->x * test->y) >> test->shift) + test->a; + + if(actual == expected) + printf(" Passed\n"); + else + printf(" Failed Actual(0x%08x) Expected(0x%08x)\n", + actual, expected); + } +} +// gglMulSubx() tests +struct gglMulSubx_test_t +{ + GGLfixed x; + GGLfixed y; + int shift; + GGLfixed a; +}; + +gglMulSubx_test_t gglMulSubx_tests[] = +{ + {1,2,1,1}, + {0,1,1,1}, + {FIXED_ONE,FIXED_ONE,16, 0}, + {FIXED_MIN,FIXED_MAX,16, FIXED_HALF}, + {FIXED_MAX,FIXED_MAX,16, FIXED_MIN}, + {FIXED_MIN,FIXED_MIN,16, FIXED_MAX}, + {FIXED_HALF,FIXED_ONE,16,FIXED_ONE}, + {FIXED_MAX,FIXED_MAX,31, FIXED_HALF}, + {FIXED_ONE,FIXED_MAX,31, FIXED_HALF} +}; + +void gglMulSubx_test() +{ + uint32_t i; + GGLfixed actual, expected; + + printf("Testing gglMulSubx\n"); + for(i = 0; i < sizeof(gglMulSubx_tests)/sizeof(gglMulSubx_test_t); ++i) + { + gglMulSubx_test_t *test = &gglMulSubx_tests[i]; + printf("Test x=0x%08x y=0x%08x shift=%2d a=0x%08x :", + test->x, test->y, test->shift, test->a); + actual = gglMulSubx(test->x, test->y, test->a, test->shift); + expected = (((int64_t)test->x * test->y) >> test->shift) - test->a; + + if(actual == expected) + printf(" Passed\n"); + else + printf(" Failed Actual(0x%08x) Expected(0x%08x)\n", + actual, expected); + } +} + +// gglMulii() tests +const int32_t INT32_MAX = 0x7FFFFFFF; +const int32_t INT32_MIN = 0x80000000; + +struct gglMulii_test_t +{ + int32_t x; + int32_t y; +}; + +gglMulii_test_t gglMulii_tests[] = +{ + {1,INT32_MIN}, + {1,INT32_MAX}, + {0,INT32_MIN}, + {0,INT32_MAX}, + {INT32_MIN, INT32_MAX}, + {INT32_MAX, INT32_MIN}, + {INT32_MIN, INT32_MIN}, + {INT32_MAX, INT32_MAX} +}; + +void gglMulii_test() +{ + uint32_t i; + int64_t actual, expected; + + printf("Testing gglMulii\n"); + for(i = 0; i < sizeof(gglMulii_tests)/sizeof(gglMulii_test_t); ++i) + { + gglMulii_test_t *test = &gglMulii_tests[i]; + printf("Test x=0x%08x y=0x%08x :", test->x, test->y); + actual = gglMulii(test->x, test->y); + expected = ((int64_t)test->x * test->y); + + if(actual == expected) + printf(" Passed\n"); + else + printf(" Failed Actual(%ld) Expected(%ld)\n", + actual, expected); + } +} + +int main(int argc, char** argv) +{ + gglClampx_test(); + gglClz_test(); + gglMulx_test(); + gglMulAddx_test(); + gglMulSubx_test(); + gglMulii_test(); + return 0; +} |