diff options
Diffstat (limited to 'libacc/acc.cpp')
| -rw-r--r-- | libacc/acc.cpp | 1629 |
1 files changed, 1629 insertions, 0 deletions
diff --git a/libacc/acc.cpp b/libacc/acc.cpp new file mode 100644 index 0000000..567e25e --- /dev/null +++ b/libacc/acc.cpp @@ -0,0 +1,1629 @@ +/* + Obfuscated Tiny C Compiler + + Copyright (C) 2001-2003 Fabrice Bellard + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product and its documentation + *is* required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + */ + +#include <ctype.h> +#include <dlfcn.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if defined(__arm__) +#include <unistd.h> +#endif + +#include "disassem.h" + +namespace acc { + +class compiler { + class CodeBuf { + char* ind; + char* pProgramBase; + + void release() { + if (pProgramBase != 0) { + free(pProgramBase); + pProgramBase = 0; + } + } + + public: + CodeBuf() { + pProgramBase = 0; + ind = 0; + } + + ~CodeBuf() { + release(); + } + + void init(int size) { + release(); + pProgramBase = (char*) calloc(1, size); + ind = pProgramBase; + } + + void o(int n) { + /* cannot use unsigned, so we must do a hack */ + while (n && n != -1) { + *ind++ = n; + n = n >> 8; + } + } + + int o4(int n) { + int result = (int) ind; + * (int*) ind = n; + ind += 4; + return result; + } + + /* + * Output a byte. Handles all values, 0..ff. + */ + void ob(int n) { + *ind++ = n; + } + + /* output a symbol and patch all calls to it */ + void gsym(int t) { + int n; + while (t) { + n = *(int *) t; /* next value */ + *(int *) t = ((int) ind) - t - 4; + t = n; + } + } + + /* psym is used to put an instruction with a data field which is a + reference to a symbol. It is in fact the same as oad ! */ + int psym(int n, int t) { + return oad(n, t); + } + + /* instruction + address */ + int oad(int n, int t) { + o(n); + *(int *) ind = t; + t = (int) ind; + ind = ind + 4; + return t; + } + + inline void* getBase() { + return (void*) pProgramBase; + } + + int getSize() { + return ind - pProgramBase; + } + + int getPC() { + return (int) ind; + } + }; + + class CodeGenerator { + public: + CodeGenerator() {} + virtual ~CodeGenerator() {} + + virtual void init(CodeBuf* pCodeBuf) { + this->pCodeBuf = pCodeBuf; + } + + /* returns address to patch with local variable size + */ + virtual int functionEntry(int argCount) = 0; + + virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) = 0; + + /* load immediate value */ + virtual void li(int t) = 0; + + virtual int gjmp(int t) = 0; + + /* l = 0: je, l == 1: jne */ + virtual int gtst(bool l, int t) = 0; + + virtual void gcmp(int op) = 0; + + virtual void genOp(int op) = 0; + + virtual void clearECX() = 0; + + virtual void pushEAX() = 0; + + virtual void popECX() = 0; + + virtual void storeEAXToAddressECX(bool isInt) = 0; + + virtual void loadEAXIndirect(bool isInt) = 0; + + virtual void leaEAX(int ea) = 0; + + virtual void storeEAX(int ea) = 0; + + virtual void loadEAX(int ea, bool isIncDec, int op) = 0; + + virtual int beginFunctionCallArguments() = 0; + + virtual void storeEAToArg(int l) = 0; + + virtual void endFunctionCallArguments(int a, int l) = 0; + + + virtual int callForward(int symbol) = 0; + + virtual void callRelative(int t) = 0; + + virtual void callIndirect(int l) = 0; + + virtual void adjustStackAfterCall(int l, bool isIndirect) = 0; + + virtual int disassemble(FILE* out) = 0; + + /* output a symbol and patch all calls to it */ + virtual void gsym(int t) { + pCodeBuf->gsym(t); + } + + virtual int finishCompile() { +#if defined(__arm__) + const long base = long(pCodeBuf->getBase()); + const long curr = base + long(pCodeBuf->getSize()); + int err = cacheflush(base, curr, 0); + return err; +#else + return 0; +#endif + } + + /** + * Adjust relative branches by this amount. + */ + virtual int jumpOffset() = 0; + + protected: + void o(int n) { + pCodeBuf->o(n); + } + + /* + * Output a byte. Handles all values, 0..ff. + */ + void ob(int n) { + pCodeBuf->ob(n); + } + + /* psym is used to put an instruction with a data field which is a + reference to a symbol. It is in fact the same as oad ! */ + int psym(int n, int t) { + return oad(n, t); + } + + /* instruction + address */ + int oad(int n, int t) { + return pCodeBuf->oad(n,t); + } + + int getBase() { + return (int) pCodeBuf->getBase(); + } + + int getPC() { + return pCodeBuf->getPC(); + } + + int o4(int data) { + return pCodeBuf->o4(data); + } + private: + CodeBuf* pCodeBuf; + }; + + class ARMCodeGenerator : public CodeGenerator { + public: + ARMCodeGenerator() {} + virtual ~ARMCodeGenerator() {} + + /* returns address to patch with local variable size + */ + virtual int functionEntry(int argCount) { + fprintf(stderr, "functionEntry(%d);\n", argCount); + // sp -> arg4 arg5 ... + // Push our register-based arguments back on the stack + if (argCount > 0) { + int regArgCount = argCount <= 4 ? argCount : 4; + o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd sp!, {} + } + // sp -> arg0 arg1 ... + o4(0xE92D4800); // stmfd sp!, {fp, lr} + // sp, fp -> oldfp, retadr, arg0 arg1 .... + o4(0xE1A0B00D); // mov fp, sp + return o4(0xE24DD000); // sub sp, sp, # <local variables> + } + + virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) { + fprintf(stderr, "functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize); + // Patch local variable allocation code: + if (localVariableSize < 0 || localVariableSize > 255) { + error("localVariables out of range: %d", localVariableSize); + } + *(char*) (localVariableAddress) = localVariableSize; + + // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ... + o4(0xE1A0E00B); // mov lr, fp + o4(0xE59BB000); // ldr fp, [fp] + o4(0xE28ED004); // add sp, lr, #4 + // sp -> retadr, arg0, ... + o4(0xE8BD4000); // ldmfd sp!, {lr} + // sp -> arg0 .... + if (argCount > 0) { + // We store the PC into the lr so we can adjust the sp before + // returning. We need to pull off the registers we pushed + // earlier. We don't need to actually store them anywhere, + // just adjust the stack. + int regArgCount = argCount <= 4 ? argCount : 4; + o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2 + } + o4(0xE12FFF1E); // bx lr + } + + /* load immediate value */ + virtual void li(int t) { + fprintf(stderr, "li(%d);\n", t); + if (t >= 0 && t < 255) { + o4(0xE3A00000 + t); // mov r0, #0 + } else if (t >= -256 && t < 0) { + // mvn means move constant ^ ~0 + o4(0xE3E00001 - t); // mvn r0, #0 + } else { + o4(0xE51F0000); // ldr r0, .L3 + o4(0xEA000000); // b .L99 + o4(t); // .L3: .word 0 + // .L99: + } + } + + virtual int gjmp(int t) { + fprintf(stderr, "gjmp(%d);\n", t); + return o4(0xEA000000 | encodeAddress(t)); // b .L33 + } + + /* l = 0: je, l == 1: jne */ + virtual int gtst(bool l, int t) { + fprintf(stderr, "gtst(%d, %d);\n", l, t); + o4(0xE3500000); // cmp r0,#0 + int branch = l ? 0x1A000000 : 0x0A000000; // bne : beq + return o4(branch | encodeAddress(t)); + } + + virtual void gcmp(int op) { + fprintf(stderr, "gcmp(%d);\n", op); + o4(0xE1510000); // cmp r1, r1 + switch(op) { + case OP_EQUALS: + o4(0x03A00001); // moveq r0,#1 + o4(0x13A00000); // movne r0,#0 + break; + case OP_NOT_EQUALS: + o4(0x03A00000); // moveq r0,#0 + o4(0x13A00001); // movne r0,#1 + break; + case OP_LESS_EQUAL: + o4(0xD3A00001); // movle r0,#1 + o4(0xC3A00000); // movgt r0,#0 + break; + case OP_GREATER: + o4(0xD3A00000); // movle r0,#0 + o4(0xC3A00001); // movgt r0,#1 + break; + case OP_GREATER_EQUAL: + o4(0xA3A00001); // movge r0,#1 + o4(0xB3A00000); // movlt r0,#0 + break; + case OP_LESS: + o4(0xA3A00000); // movge r0,#0 + o4(0xB3A00001); // movlt r0,#1 + break; + default: + error("Unknown comparison op %d", op); + break; + } + } + + virtual void genOp(int op) { + fprintf(stderr, "genOp(%d);\n", op); + switch(op) { + case OP_MUL: + o4(0x0E0000091); // mul r0,r1,r0 + break; + case OP_DIV: + callRuntime(runtime_DIV); + break; + case OP_MOD: + callRuntime(runtime_MOD); + break; + case OP_PLUS: + o4(0xE0810000); // add r0,r1,r0 + break; + case OP_MINUS: + o4(0xE0410000); // sub r0,r1,r0 + break; + case OP_SHIFT_LEFT: + o4(0xE1A00011); // lsl r0,r1,r0 + break; + case OP_SHIFT_RIGHT: + o4(0xE1A00051); // asr r0,r1,r0 + break; + case OP_BIT_AND: + o4(0xE0010000); // and r0,r1,r0 + break; + case OP_BIT_XOR: + o4(0xE0210000); // eor r0,r1,r0 + break; + case OP_BIT_OR: + o4(0xE1810000); // orr r0,r1,r0 + break; + case OP_BIT_NOT: + o4(0xE1E00000); // mvn r0, r0 + break; + default: + error("Unimplemented op %d\n", op); + break; + } +#if 0 + o(decodeOp(op)); + if (op == OP_MOD) + o(0x92); /* xchg %edx, %eax */ +#endif + } + + virtual void clearECX() { + fprintf(stderr, "clearECX();\n"); + o4(0xE3A01000); // mov r1, #0 + } + + virtual void pushEAX() { + fprintf(stderr, "pushEAX();\n"); + o4(0xE92D0001); // stmfd sp!,{r0} + } + + virtual void popECX() { + fprintf(stderr, "popECX();\n"); + o4(0xE8BD0002); // ldmfd sp!,{r1} + } + + virtual void storeEAXToAddressECX(bool isInt) { + fprintf(stderr, "storeEAXToAddressECX(%d);\n", isInt); + if (isInt) { + o4(0xE5810000); // str r0, [r1] + } else { + o4(0xE5C10000); // strb r0, [r1] + } + } + + virtual void loadEAXIndirect(bool isInt) { + fprintf(stderr, "loadEAXIndirect(%d);\n", isInt); + if (isInt) + o4(0xE5900000); // ldr r0, [r0] + else + o4(0xE5D00000); // ldrb r0, [r0] + } + + virtual void leaEAX(int ea) { + fprintf(stderr, "leaEAX(%d);\n", ea); + if (ea < LOCAL) { + // Local, fp relative + if (ea < -1023 || ea > 1023 || ((ea & 3) != 0)) { + error("Offset out of range: %08x", ea); + } + if (ea < 0) { + o4(0xE24B0F00 | (0xff & ((-ea) >> 2))); // sub r0, fp, #ea + } else { + o4(0xE28B0F00 | (0xff & (ea >> 2))); // add r0, fp, #ea + } + } else { + // Global, absolute. + o4(0xE59F0000); // ldr r0, .L1 + o4(0xEA000000); // b .L99 + o4(ea); // .L1: .word 0 + // .L99: + } + } + + virtual void storeEAX(int ea) { + fprintf(stderr, "storeEAX(%d);\n", ea); + if (ea < LOCAL) { + // Local, fp relative + if (ea < -4095 || ea > 4095) { + error("Offset out of range: %08x", ea); + } + if (ea < 0) { + o4(0xE50B0000 | (0xfff & (-ea))); // str r0, [fp,#-ea] + } else { + o4(0xE58B0000 | (0xfff & ea)); // str r0, [fp,#ea] + } + } else{ + // Global, absolute + o4(0xE59F1000); // ldr r1, .L1 + o4(0xEA000000); // b .L99 + o4(ea); // .L1: .word 0 + o4(0xE5810000); // .L99: str r0, [r1] + } + } + + virtual void loadEAX(int ea, bool isIncDec, int op) { + fprintf(stderr, "loadEAX(%d, %d, %d);\n", ea, isIncDec, op); + if (ea < LOCAL) { + // Local, fp relative + if (ea < -4095 || ea > 4095) { + error("Offset out of range: %08x", ea); + } + if (ea < 0) { + o4(0xE51B0000 | (0xfff & (-ea))); // ldr r0, [fp,#-ea] + } else { + o4(0xE59B0000 | (0xfff & ea)); // ldr r0, [fp,#ea] + } + } else { + // Global, absolute + o4(0xE59F2000); // ldr r2, .L1 + o4(0xEA000000); // b .L99 + o4(ea); // .L1: .word ea + o4(0xE5920000); // .L99: ldr r0, [r2] + } + + if (isIncDec) { + switch (op) { + case OP_INCREMENT: + o4(0xE2801001); // add r1, r0, #1 + break; + case OP_DECREMENT: + o4(0xE2401001); // sub r1, r0, #1 + break; + default: + error("unknown opcode: %d", op); + } + if (ea < LOCAL) { + // Local, fp relative + // Don't need range check, was already checked above + if (ea < 0) { + o4(0xE50B1000 | (0xfff & (-ea))); // str r1, [fp,#-ea] + } else { + o4(0xE58B1000 | (0xfff & ea)); // str r1, [fp,#ea] + } + } else{ + // Global, absolute + // r2 is already set up from before. + o4(0xE5821000); // str r1, [r2] + } + } + } + + virtual int beginFunctionCallArguments() { + fprintf(stderr, "beginFunctionCallArguments();\n"); + return o4(0xE24DDF00); // Placeholder + } + + virtual void storeEAToArg(int l) { + fprintf(stderr, "storeEAToArg(%d);\n", l); + if (l < 0 || l > 4096-4) { + error("l out of range for stack offset: 0x%08x", l); + } + o4(0xE58D0000 + l); // str r0, [sp, #4] + } + + virtual void endFunctionCallArguments(int a, int l) { + fprintf(stderr, "endFunctionCallArguments(0x%08x, %d);\n", a, l); + if (l < 0 || l > 0x3FC) { + error("L out of range for stack adjustment: 0x%08x", l); + } + * (int*) a = 0xE24DDF00 | (l >> 2); // sub sp, sp, #0 << 2 + int argCount = l >> 2; + if (argCount > 0) { + int regArgCount = argCount > 4 ? 4 : argCount; + o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd sp!,{} + } + } + + virtual int callForward(int symbol) { + fprintf(stderr, "callForward(%d);\n", symbol); + // Forward calls are always short (local) + return o4(0xEB000000 | encodeAddress(symbol)); + } + + virtual void callRelative(int t) { + fprintf(stderr, "callRelative(%d);\n", t); + int abs = t + getPC() + jumpOffset(); + fprintf(stderr, "abs=%d (0x%08x)\n", abs, abs); + if (t >= - (1 << 25) && t < (1 << 25)) { + o4(0xEB000000 | encodeAddress(t)); + } else { + // Long call. + o4(0xE59FC000); // ldr r12, .L1 + o4(0xEA000000); // b .L99 + o4(t - 12); // .L1: .word 0 + o4(0xE08CC00F); // .L99: add r12,pc + o4(0xE12FFF3C); // blx r12 + } + } + + virtual void callIndirect(int l) { + fprintf(stderr, "callIndirect(%d);\n", l); + int argCount = l >> 2; + int poppedArgs = argCount > 4 ? 4 : argCount; + int adjustedL = l - (poppedArgs << 2); + if (adjustedL < 0 || adjustedL > 4096-4) { + error("l out of range for stack offset: 0x%08x", l); + } + o4(0xE59DC000 | (0xfff & adjustedL)); // ldr r12, [sp,#adjustedL] + o4(0xE12FFF3C); // blx r12 + } + + virtual void adjustStackAfterCall(int l, bool isIndirect) { + fprintf(stderr, "adjustStackAfterCall(%d, %d);\n", l, isIndirect); + int argCount = l >> 2; + int stackArgs = argCount > 4 ? argCount - 4 : 0; + int stackUse = stackArgs + (isIndirect ? 1 : 0); + if (stackUse) { + if (stackUse < 0 || stackUse > 255) { + error("L out of range for stack adjustment: 0x%08x", l); + } + o4(0xE28DDF00 | stackUse); // add sp, sp, #stackUse << 2 + } + } + + virtual int jumpOffset() { + return 8; + } + + /* output a symbol and patch all calls to it */ + virtual void gsym(int t) { + fprintf(stderr, "gsym(0x%x)\n", t); + int n; + int base = getBase(); + int pc = getPC(); + fprintf(stderr, "pc = 0x%x\n", pc); + while (t) { + int data = * (int*) t; + int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2); + if (decodedOffset == 0) { + n = 0; + } else { + n = base + decodedOffset; /* next value */ + } + *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK) + | encodeRelAddress(pc - t - 8); + t = n; + } + } + + virtual int disassemble(FILE* out) { + disasmOut = out; + disasm_interface_t di; + di.di_readword = disassemble_readword; + di.di_printaddr = disassemble_printaddr; + di.di_printf = disassemble_printf; + + int base = getBase(); + int pc = getPC(); + for(int i = base; i < pc; i += 4) { + fprintf(out, "%08x: %08x ", i, *(int*) i); + ::disasm(&di, i, 0); + } + return 0; + } + + private: + static FILE* disasmOut; + + static u_int + disassemble_readword(u_int address) + { + return(*((u_int *)address)); + } + + static void + disassemble_printaddr(u_int address) + { + fprintf(disasmOut, "0x%08x", address); + } + + static void + disassemble_printf(const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(disasmOut, fmt, ap); + va_end(ap); + } + + static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff; + + /** Encode a relative address that might also be + * a label. + */ + int encodeAddress(int value) { + int base = getBase(); + if (value >= base && value <= getPC() ) { + // This is a label, encode it relative to the base. + value = value - base; + } + return encodeRelAddress(value); + } + + int encodeRelAddress(int value) { + return BRANCH_REL_ADDRESS_MASK & (value >> 2); + } + + typedef int (*int2FnPtr)(int a, int b); + void callRuntime(int2FnPtr fn) { + o4(0xE59F2000); // ldr r2, .L1 + o4(0xEA000000); // b .L99 + o4((int) fn); //.L1: .word fn + o4(0xE12FFF32); //.L99: blx r2 + } + + static int runtime_DIV(int a, int b) { + return b / a; + } + + static int runtime_MOD(int a, int b) { + return b % a; + } + + void error(const char* fmt,...) { + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(12); + } + }; + + class X86CodeGenerator : public CodeGenerator { + public: + X86CodeGenerator() {} + virtual ~X86CodeGenerator() {} + + /* returns address to patch with local variable size + */ + virtual int functionEntry(int argCount) { + o(0xe58955); /* push %ebp, mov %esp, %ebp */ + return oad(0xec81, 0); /* sub $xxx, %esp */ + } + + virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) { + o(0xc3c9); /* leave, ret */ + *(int *) localVariableAddress = localVariableSize; /* save local variables */ + } + + /* load immediate value */ + virtual void li(int t) { + oad(0xb8, t); /* mov $xx, %eax */ + } + + virtual int gjmp(int t) { + return psym(0xe9, t); + } + + /* l = 0: je, l == 1: jne */ + virtual int gtst(bool l, int t) { + o(0x0fc085); /* test %eax, %eax, je/jne xxx */ + return psym(0x84 + l, t); + } + + virtual void gcmp(int op) { + int t = decodeOp(op); + o(0xc139); /* cmp %eax,%ecx */ + li(0); + o(0x0f); /* setxx %al */ + o(t + 0x90); + o(0xc0); + } + + virtual void genOp(int op) { + o(decodeOp(op)); + if (op == OP_MOD) + o(0x92); /* xchg %edx, %eax */ + } + + virtual void clearECX() { + oad(0xb9, 0); /* movl $0, %ecx */ + } + + virtual void pushEAX() { + o(0x50); /* push %eax */ + } + + virtual void popECX() { + o(0x59); /* pop %ecx */ + } + + virtual void storeEAXToAddressECX(bool isInt) { + o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */ + } + + virtual void loadEAXIndirect(bool isInt) { + if (isInt) + o(0x8b); /* mov (%eax), %eax */ + else + o(0xbe0f); /* movsbl (%eax), %eax */ + ob(0); /* add zero in code */ + } + + virtual void leaEAX(int ea) { + gmov(10, ea); /* leal EA, %eax */ + } + + virtual void storeEAX(int ea) { + gmov(6, ea); /* mov %eax, EA */ + } + + virtual void loadEAX(int ea, bool isIncDec, int op) { + gmov(8, ea); /* mov EA, %eax */ + if (isIncDec) { + /* Implement post-increment or post decrement. + */ + gmov(0, ea); /* 83 ADD */ + o(decodeOp(op)); + } + } + + virtual int beginFunctionCallArguments() { + return oad(0xec81, 0); /* sub $xxx, %esp */ + } + + virtual void storeEAToArg(int l) { + oad(0x248489, l); /* movl %eax, xxx(%esp) */ + } + + virtual void endFunctionCallArguments(int a, int l) { + * (int*) a = l; + } + + virtual int callForward(int symbol) { + return psym(0xe8, symbol); /* call xxx */ + } + + virtual void callRelative(int t) { + psym(0xe8, t); /* call xxx */ + } + + virtual void callIndirect(int l) { + oad(0x2494ff, l); /* call *xxx(%esp) */ + } + + virtual void adjustStackAfterCall(int l, bool isIndirect) { + if (isIndirect) { + l += 4; + } + oad(0xc481, l); /* add $xxx, %esp */ + } + + virtual int jumpOffset() { + return 5; + } + + virtual int disassemble(FILE* out) { + return 1; + } + + private: + static const int operatorHelper[]; + + int decodeOp(int op) { + if (op < 0 || op > OP_COUNT) { + fprintf(stderr, "Out-of-range operator: %d\n", op); + exit(1); + } + return operatorHelper[op]; + } + + void gmov(int l, int t) { + o(l + 0x83); + oad((t < LOCAL) << 7 | 5, t); + } + }; + + /* vars: value of variables + loc : local variable index + glo : global variable index + ind : output code ptr + rsym: return symbol + prog: output code + dstk: define stack + dptr, dch: macro state + */ + int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk, + dptr, dch, last_id; + void* pSymbolBase; + void* pGlobalBase; + void* pVarsBase; + FILE* file; + + CodeBuf codeBuf; + CodeGenerator* pGen; + + static const int ALLOC_SIZE = 99999; + + /* depends on the init string */ + static const int TOK_STR_SIZE = 48; + static const int TOK_IDENT = 0x100; + static const int TOK_INT = 0x100; + static const int TOK_IF = 0x120; + static const int TOK_ELSE = 0x138; + static const int TOK_WHILE = 0x160; + static const int TOK_BREAK = 0x190; + static const int TOK_RETURN = 0x1c0; + static const int TOK_FOR = 0x1f8; + static const int TOK_DEFINE = 0x218; + static const int TOK_MAIN = 0x250; + + static const int TOK_DUMMY = 1; + static const int TOK_NUM = 2; + + static const int LOCAL = 0x200; + + static const int SYM_FORWARD = 0; + static const int SYM_DEFINE = 1; + + /* tokens in string heap */ + static const int TAG_TOK = ' '; + static const int TAG_MACRO = 2; + + static const int OP_INCREMENT = 0; + static const int OP_DECREMENT = 1; + static const int OP_MUL = 2; + static const int OP_DIV = 3; + static const int OP_MOD = 4; + static const int OP_PLUS = 5; + static const int OP_MINUS = 6; + static const int OP_SHIFT_LEFT = 7; + static const int OP_SHIFT_RIGHT = 8; + static const int OP_LESS_EQUAL = 9; + static const int OP_GREATER_EQUAL = 10; + static const int OP_LESS = 11; + static const int OP_GREATER = 12; + static const int OP_EQUALS = 13; + static const int OP_NOT_EQUALS = 14; + static const int OP_LOGICAL_AND = 15; + static const int OP_LOGICAL_OR = 16; + static const int OP_BIT_AND = 17; + static const int OP_BIT_XOR = 18; + static const int OP_BIT_OR = 19; + static const int OP_BIT_NOT = 20; + static const int OP_LOGICAL_NOT = 21; + static const int OP_COUNT = 22; + + /* Operators are searched from front, the two-character operators appear + * before the single-character operators with the same first character. + * @ is used to pad out single-character operators. + */ + static const char* operatorChars; + static const char operatorLevel[]; + + void pdef(int t) { + *(char *) dstk++ = t; + } + + void inp() { + if (dptr) { + ch = *(char *) dptr++; + if (ch == TAG_MACRO) { + dptr = 0; + ch = dch; + } + } else + ch = fgetc(file); + /* printf("ch=%c 0x%x\n", ch, ch); */ + } + + int isid() { + return isalnum(ch) | (ch == '_'); + } + + /* read a character constant */ + void getq() { + if (ch == '\\') { + inp(); + if (ch == 'n') + ch = '\n'; + } + } + + void next() { + int l, a; + + while (isspace(ch) | (ch == '#')) { + if (ch == '#') { + inp(); + next(); + if (tok == TOK_DEFINE) { + next(); + pdef(TAG_TOK); /* fill last ident tag */ + *(int *) tok = SYM_DEFINE; + *(int *) (tok + 4) = dstk; /* define stack */ + } + /* well we always save the values ! */ + while (ch != '\n') { + pdef(ch); + inp(); + } + pdef(ch); + pdef(TAG_MACRO); + } + inp(); + } + tokl = 0; + tok = ch; + /* encode identifiers & numbers */ + if (isid()) { + pdef(TAG_TOK); + last_id = dstk; + while (isid()) { + pdef(ch); + inp(); + } + if (isdigit(tok)) { + tokc = strtol((char*) last_id, 0, 0); + tok = TOK_NUM; + } else { + *(char *) dstk = TAG_TOK; /* no need to mark end of string (we + suppose data is initialized to zero by calloc) */ + tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1)) + - sym_stk); + *(char *) dstk = 0; /* mark real end of ident for dlsym() */ + tok = tok * 8 + TOK_IDENT; + if (tok > TOK_DEFINE) { + tok = vars + tok; + /* printf("tok=%s %x\n", last_id, tok); */ + /* define handling */ + if (*(int *) tok == SYM_DEFINE) { + dptr = *(int *) (tok + 4); + dch = ch; + inp(); + next(); + } + } + } + } else { + inp(); + if (tok == '\'') { + tok = TOK_NUM; + getq(); + tokc = ch; + inp(); + inp(); + } else if ((tok == '/') & (ch == '*')) { + inp(); + while (ch) { + while (ch != '*') + inp(); + inp(); + if (ch == '/') + ch = 0; + } + inp(); + next(); + } else if ((tok == '/') & (ch == '/')) { + inp(); + while (ch && (ch != '\n')) { + inp(); + } + inp(); + next(); + } else { + const char* t = operatorChars; + int opIndex = 0; + while ((l = *t++) != 0) { + a = *t++; + tokl = operatorLevel[opIndex]; + tokc = opIndex; + if ((l == tok) & ((a == ch) | (a == '@'))) { +#if 0 + printf("%c%c -> tokl=%d tokc=0x%x\n", + l, a, tokl, tokc); +#endif + if (a == ch) { + inp(); + tok = TOK_DUMMY; /* dummy token for double tokens */ + } + break; + } + opIndex++; + } + if (l == 0) { + tokl = 0; + tokc = 0; + } + } + } +#if 0 + { + int p; + + printf("tok=0x%x ", tok); + if (tok >= TOK_IDENT) { + printf("'"); + if (tok> TOK_DEFINE) + p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8; + else + p = sym_stk + 1 + (tok - TOK_IDENT) / 8; + while (*(char *)p != TAG_TOK && *(char *)p) + printf("%c", *(char *)p++); + printf("'\n"); + } else if (tok == TOK_NUM) { + printf("%d\n", tokc); + } else { + printf("'%c'\n", tok); + } + } +#endif + } + + void error(const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "%ld: ", ftell((FILE *) file)); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(1); + } + + void skip(int c) { + if (tok != c) { + error("'%c' expected", c); + } + next(); + } + + /* l is one if '=' parsing wanted (quick hack) */ + void unary(int l) { + int n, t, a, c; + t = 0; + n = 1; /* type of expression 0 = forward, 1 = value, other = + lvalue */ + if (tok == '\"') { + pGen->li(glo); + while (ch != '\"') { + getq(); + *(char *) glo++ = ch; + inp(); + } + *(char *) glo = 0; + glo = (glo + 4) & -4; /* align heap */ + inp(); + next(); + } else { + c = tokl; + a = tokc; + t = tok; + next(); + if (t == TOK_NUM) { + pGen->li(a); + } else if (c == 2) { + /* -, +, !, ~ */ + unary(0); + pGen->clearECX(); + if (t == '!') + pGen->gcmp(a); + else + pGen->genOp(a); + } else if (t == '(') { + expr(); + skip(')'); + } else if (t == '*') { + /* parse cast */ + skip('('); + t = tok; /* get type */ + next(); /* skip int/char/void */ + next(); /* skip '*' or '(' */ + if (tok == '*') { + /* function type */ + skip('*'); + skip(')'); + skip('('); + skip(')'); + t = 0; + } + skip(')'); + unary(0); + if (tok == '=') { + next(); + pGen->pushEAX(); + expr(); + pGen->popECX(); + pGen->storeEAXToAddressECX(t == TOK_INT); + } else if (t) { + pGen->loadEAXIndirect(t == TOK_INT); + } + } else if (t == '&') { + pGen->leaEAX(*(int *) tok); + next(); + } else { + n = *(int *) t; + /* forward reference: try dlsym */ + if (!n) { + n = (int) dlsym(RTLD_DEFAULT, (char*) last_id); + } + if ((tok == '=') & l) { + /* assignment */ + next(); + expr(); + pGen->storeEAX(n); + } else if (tok != '(') { + /* variable */ + pGen->loadEAX(n, tokl == 11, tokc); + if (tokl == 11) { + next(); + } + } + } + } + + /* function call */ + if (tok == '(') { + if (n == 1) + pGen->pushEAX(); + + /* push args and invert order */ + a = pGen->beginFunctionCallArguments(); + next(); + l = 0; + while (tok != ')') { + expr(); + pGen->storeEAToArg(l); + if (tok == ',') + next(); + l = l + 4; + } + pGen->endFunctionCallArguments(a, l); + next(); + if (!n) { + /* forward reference */ + t = t + 4; + *(int *) t = pGen->callForward(*(int *) t); + } else if (n == 1) { + pGen->callIndirect(l); + } else { + pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset()); + } + if (l | (n == 1)) + pGen->adjustStackAfterCall(l, n == 1); + } + } + + void sum(int l) { + int t, n, a; + t = 0; + if (l-- == 1) + unary(1); + else { + sum(l); + a = 0; + while (l == tokl) { + n = tok; + t = tokc; + next(); + + if (l > 8) { + a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */ + sum(l); + } else { + pGen->pushEAX(); + sum(l); + pGen->popECX(); + + if ((l == 4) | (l == 5)) { + pGen->gcmp(t); + } else { + pGen->genOp(t); + } + } + } + /* && and || output code generation */ + if (a && l > 8) { + a = pGen->gtst(t == OP_LOGICAL_OR, a); + pGen->li(t != OP_LOGICAL_OR); + pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */ + pGen->gsym(a); + pGen->li(t == OP_LOGICAL_OR); + } + } + } + + void expr() { + sum(11); + } + + int test_expr() { + expr(); + return pGen->gtst(0, 0); + } + + void block(int l) { + int a, n, t; + + if (tok == TOK_IF) { + next(); + skip('('); + a = test_expr(); + skip(')'); + block(l); + if (tok == TOK_ELSE) { + next(); + n = pGen->gjmp(0); /* jmp */ + pGen->gsym(a); + block(l); + pGen->gsym(n); /* patch else jmp */ + } else { + pGen->gsym(a); /* patch if test */ + } + } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) { + t = tok; + next(); + skip('('); + if (t == TOK_WHILE) { + n = codeBuf.getPC(); // top of loop, target of "next" iteration + a = test_expr(); + } else { + if (tok != ';') + expr(); + skip(';'); + n = codeBuf.getPC(); + a = 0; + if (tok != ';') + a = test_expr(); + skip(';'); + if (tok != ')') { + t = pGen->gjmp(0); + expr(); + pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); + pGen->gsym(t); + n = t + 4; + } + } + skip(')'); + block((int) &a); + pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */ + pGen->gsym(a); + } else if (tok == '{') { + next(); + /* declarations */ + decl(1); + while (tok != '}') + block(l); + next(); + } else { + if (tok == TOK_RETURN) { + next(); + if (tok != ';') + expr(); + rsym = pGen->gjmp(rsym); /* jmp */ + } else if (tok == TOK_BREAK) { + next(); + *(int *) l = pGen->gjmp(*(int *) l); + } else if (tok != ';') + expr(); + skip(';'); + } + } + + /* 'l' is true if local declarations */ + void decl(int l) { + int a; + + while ((tok == TOK_INT) | ((tok != -1) & (!l))) { + if (tok == TOK_INT) { + next(); + while (tok != ';') { + if (l) { + loc = loc + 4; + *(int *) tok = -loc; + } else { + *(int *) tok = glo; + glo = glo + 4; + } + next(); + if (tok == ',') + next(); + } + skip(';'); + } else { + /* patch forward references (XXX: do not work for function + pointers) */ + pGen->gsym(*(int *) (tok + 4)); + /* put function address */ + *(int *) tok = codeBuf.getPC(); + next(); + skip('('); + a = 8; + int argCount = 0; + while (tok != ')') { + /* read param name and compute offset */ + *(int *) tok = a; + a = a + 4; + next(); + if (tok == ',') + next(); + argCount++; + } + next(); /* skip ')' */ + rsym = loc = 0; + a = pGen->functionEntry(argCount); + block(0); + pGen->gsym(rsym); + pGen->functionExit(argCount, a, loc); + } + } + } + + void cleanup() { + if (sym_stk != 0) { + free((void*) sym_stk); + sym_stk = 0; + } + if (pGlobalBase != 0) { + free((void*) pGlobalBase); + pGlobalBase = 0; + } + if (pVarsBase != 0) { + free(pVarsBase); + pVarsBase = 0; + } + if (pGen) { + delete pGen; + pGen = 0; + } + } + + void clear() { + tok = 0; + tokc = 0; + tokl = 0; + ch = 0; + vars = 0; + rsym = 0; + loc = 0; + glo = 0; + sym_stk = 0; + dstk = 0; + dptr = 0; + dch = 0; + last_id = 0; + file = 0; + pGlobalBase = 0; + pVarsBase = 0; + pGen = 0; + } + + void setArchitecture(const char* architecture) { + delete pGen; + pGen = 0; + + if (architecture != NULL) { + if (strcmp(architecture, "arm") == 0) { + pGen = new ARMCodeGenerator(); + } else if (strcmp(architecture, "x86") == 0) { + pGen = new X86CodeGenerator(); + } else { + fprintf(stderr, "Unknown architecture %s", architecture); + } + } + + if (pGen == NULL) { + pGen = new ARMCodeGenerator(); + } + } + +public: + struct args { + args() { + architecture = 0; + } + const char* architecture; + }; + + compiler() { + clear(); + } + + ~compiler() { + cleanup(); + } + + int compile(FILE* in, args& args) { + cleanup(); + clear(); + codeBuf.init(ALLOC_SIZE); + setArchitecture(args.architecture); + pGen->init(&codeBuf); + file = in; + sym_stk = (int) calloc(1, ALLOC_SIZE); + dstk = (int) strcpy((char*) sym_stk, + " int if else while break return for define main ") + + TOK_STR_SIZE; + pGlobalBase = calloc(1, ALLOC_SIZE); + glo = (int) pGlobalBase; + pVarsBase = calloc(1, ALLOC_SIZE); + vars = (int) pVarsBase; + inp(); + next(); + decl(0); + pGen->finishCompile(); + return 0; + } + + int run(int argc, char** argv) { + typedef int (*mainPtr)(int argc, char** argv); + mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN); + if (!aMain) { + fprintf(stderr, "Could not find function \"main\".\n"); + return -1; + } + return aMain(argc, argv); + } + + int dump(FILE* out) { + fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out); + return 0; + } + + int disassemble(FILE* out) { + return pGen->disassemble(out); + } + +}; + +const char* compiler::operatorChars = + "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@"; + +const char compiler::operatorLevel[] = + {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, + 5, 5, /* ==, != */ + 9, 10, /* &&, || */ + 6, 7, 8, /* & ^ | */ + 2, 2 /* ~ ! */ + }; + +FILE* compiler::ARMCodeGenerator::disasmOut; + +const int compiler::X86CodeGenerator::operatorHelper[] = { + 0x1, // ++ + 0xff, // -- + 0xc1af0f, // * + 0xf9f79991, // / + 0xf9f79991, // % (With manual assist to swap results) + 0xc801, // + + 0xd8f7c829, // - + 0xe0d391, // << + 0xf8d391, // >> + 0xe, // <= + 0xd, // >= + 0xc, // < + 0xf, // > + 0x4, // == + 0x5, // != + 0x0, // && + 0x1, // || + 0xc821, // & + 0xc831, // ^ + 0xc809, // | + 0xd0f7, // ~ + 0x4 // ! +}; + +} // namespace acc + +// This is a separate function so it can easily be set by breakpoint in gdb. +int run(acc::compiler& c, int argc, char** argv) { + return c.run(argc, argv); +} + +int main(int argc, char** argv) { + bool doDump = false; + bool doDisassemble = false; + const char* inFile = NULL; + const char* outFile = NULL; + const char* architecture = "arm"; + int i; + for (i = 1; i < argc; i++) { + char* arg = argv[i]; + if (arg[0] == '-') { + switch (arg[1]) { + case 'a': + if (i + 1 >= argc) { + fprintf(stderr, "Expected architecture after -a\n"); + return 2; + } + architecture = argv[i+1]; + i += 1; + break; + case 'd': + if (i + 1 >= argc) { + fprintf(stderr, "Expected filename after -d\n"); + return 2; + } + doDump = true; + outFile = argv[i + 1]; + i += 1; + break; + case 'S': + doDisassemble = true; + break; + default: + fprintf(stderr, "Unrecognized flag %s\n", arg); + return 3; + } + } else if (inFile == NULL) { + inFile = arg; + } else { + break; + } + } + + FILE* in = stdin; + if (inFile) { + in = fopen(inFile, "r"); + if (!in) { + fprintf(stderr, "Could not open input file %s\n", inFile); + return 1; + } + } + acc::compiler compiler; + acc::compiler::args args; + args.architecture = architecture; + int compileResult = compiler.compile(in, args); + if (in != stdin) { + fclose(in); + } + if (compileResult) { + fprintf(stderr, "Compile failed: %d\n", compileResult); + return 6; + } + if (doDisassemble) { + compiler.disassemble(stderr); + } + if (doDump) { + FILE* save = fopen(outFile, "w"); + if (!save) { + fprintf(stderr, "Could not open output file %s\n", outFile); + return 5; + } + compiler.dump(save); + fclose(save); + } else { + fprintf(stderr, "Executing compiled code:\n"); + int codeArgc = argc - i + 1; + char** codeArgv = argv + i - 1; + codeArgv[0] = (char*) (inFile ? inFile : "stdin"); + int result = run(compiler, codeArgc, codeArgv); + fprintf(stderr, "result: %d\n", result); + return result; + } + + return 0; +} |
