summaryrefslogtreecommitdiffstats
path: root/libacc/acc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libacc/acc.cpp')
-rw-r--r--libacc/acc.cpp1246
1 files changed, 1021 insertions, 225 deletions
diff --git a/libacc/acc.cpp b/libacc/acc.cpp
index de36ce5..7d14e3f 100644
--- a/libacc/acc.cpp
+++ b/libacc/acc.cpp
@@ -10,12 +10,17 @@
#include <ctype.h>
#include <dlfcn.h>
-#include <setjmp.h>
+#include <errno.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <cutils/hashmap.h>
+
+#if defined(__i386__)
+#include <sys/mman.h>
+#endif
#if defined(__arm__)
#include <unistd.h>
@@ -43,6 +48,8 @@
// #define LOG_API(...) fprintf (stderr, __VA_ARGS__)
// #define ENABLE_ARM_DISASSEMBLY
+// #define PROVIDE_TRACE_CODEGEN
+
namespace acc {
class ErrorSink {
@@ -63,6 +70,7 @@ class Compiler : public ErrorSink {
char* pProgramBase;
ErrorSink* mErrorSink;
int mSize;
+ bool mOverflowed;
void release() {
if (pProgramBase != 0) {
@@ -71,13 +79,16 @@ class Compiler : public ErrorSink {
}
}
- void check(int n) {
+ bool check(int n) {
int newSize = ind - pProgramBase + n;
- if (newSize > mSize) {
+ bool overflow = newSize > mSize;
+ if (overflow && !mOverflowed) {
+ mOverflowed = true;
if (mErrorSink) {
mErrorSink->error("Code too large: %d bytes", newSize);
}
}
+ return overflow;
}
public:
@@ -86,6 +97,7 @@ class Compiler : public ErrorSink {
ind = 0;
mErrorSink = 0;
mSize = 0;
+ mOverflowed = false;
}
~CodeBuf() {
@@ -104,7 +116,9 @@ class Compiler : public ErrorSink {
}
int o4(int n) {
- check(4);
+ if(check(4)) {
+ return 0;
+ }
intptr_t result = (intptr_t) ind;
* (int*) ind = n;
ind += 4;
@@ -115,7 +129,9 @@ class Compiler : public ErrorSink {
* Output a byte. Handles all values, 0..ff.
*/
void ob(int n) {
- check(1);
+ if(check(1)) {
+ return;
+ }
*ind++ = n;
}
@@ -166,7 +182,7 @@ class Compiler : public ErrorSink {
pCodeBuf->setErrorSink(mErrorSink);
}
- void setErrorSink(ErrorSink* pErrorSink) {
+ virtual void setErrorSink(ErrorSink* pErrorSink) {
mErrorSink = pErrorSink;
if (pCodeBuf) {
pCodeBuf->setErrorSink(mErrorSink);
@@ -988,7 +1004,14 @@ class Compiler : public ErrorSink {
}
virtual int finishCompile() {
- return 0;
+ size_t pagesize = 4096;
+ size_t base = (size_t) getBase() & ~ (pagesize - 1);
+ size_t top = ((size_t) getPC() + pagesize - 1) & ~ (pagesize - 1);
+ int err = mprotect((void*) base, top - base, PROT_READ | PROT_WRITE | PROT_EXEC);
+ if (err) {
+ error("mprotect() failed: %d", errno);
+ }
+ return err;
}
private:
@@ -1024,18 +1047,184 @@ class Compiler : public ErrorSink {
int decodeOp(int op) {
if (op < 0 || op > OP_COUNT) {
error("Out-of-range operator: %d\n", op);
+ op = 0;
}
return operatorHelper[op];
}
void gmov(int l, int t) {
o(l + 0x83);
- oad((t < LOCAL) << 7 | 5, t);
+ oad((t > -LOCAL && t < LOCAL) << 7 | 5, t);
}
};
#endif // PROVIDE_X86_CODEGEN
+#ifdef PROVIDE_TRACE_CODEGEN
+ class TraceCodeGenerator : public CodeGenerator {
+ private:
+ CodeGenerator* mpBase;
+
+ public:
+ TraceCodeGenerator(CodeGenerator* pBase) {
+ mpBase = pBase;
+ }
+
+ virtual ~TraceCodeGenerator() {
+ delete mpBase;
+ }
+
+ virtual void init(CodeBuf* pCodeBuf) {
+ mpBase->init(pCodeBuf);
+ }
+
+ void setErrorSink(ErrorSink* pErrorSink) {
+ mpBase->setErrorSink(pErrorSink);
+ }
+
+ /* returns address to patch with local variable size
+ */
+ virtual int functionEntry(int argCount) {
+ int result = mpBase->functionEntry(argCount);
+ fprintf(stderr, "functionEntry(%d) -> %d\n", argCount, result);
+ return result;
+ }
+
+ virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
+ fprintf(stderr, "functionExit(%d, %d, %d)\n",
+ argCount, localVariableAddress, localVariableSize);
+ mpBase->functionExit(argCount, localVariableAddress, localVariableSize);
+ }
+
+ /* load immediate value */
+ virtual void li(int t) {
+ fprintf(stderr, "li(%d)\n", t);
+ mpBase->li(t);
+ }
+
+ virtual int gjmp(int t) {
+ int result = mpBase->gjmp(t);
+ fprintf(stderr, "gjmp(%d) = %d\n", t, result);
+ return result;
+ }
+
+ /* l = 0: je, l == 1: jne */
+ virtual int gtst(bool l, int t) {
+ int result = mpBase->gtst(l, t);
+ fprintf(stderr, "gtst(%d,%d) = %d\n", l, t, result);
+ return result;
+ }
+
+ virtual void gcmp(int op) {
+ fprintf(stderr, "gcmp(%d)\n", op);
+ mpBase->gcmp(op);
+ }
+
+ virtual void genOp(int op) {
+ fprintf(stderr, "genOp(%d)\n", op);
+ mpBase->genOp(op);
+ }
+
+ virtual void clearR1() {
+ fprintf(stderr, "clearR1()\n");
+ mpBase->clearR1();
+ }
+
+ virtual void pushR0() {
+ fprintf(stderr, "pushR0()\n");
+ mpBase->pushR0();
+ }
+
+ virtual void popR1() {
+ fprintf(stderr, "popR1()\n");
+ mpBase->popR1();
+ }
+
+ virtual void storeR0ToR1(bool isInt) {
+ fprintf(stderr, "storeR0ToR1(%d)\n", isInt);
+ mpBase->storeR0ToR1(isInt);
+ }
+
+ virtual void loadR0FromR0(bool isInt) {
+ fprintf(stderr, "loadR0FromR0(%d)\n", isInt);
+ mpBase->loadR0FromR0(isInt);
+ }
+
+ virtual void leaR0(int ea) {
+ fprintf(stderr, "leaR0(%d)\n", ea);
+ mpBase->leaR0(ea);
+ }
+
+ virtual void storeR0(int ea) {
+ fprintf(stderr, "storeR0(%d)\n", ea);
+ mpBase->storeR0(ea);
+ }
+
+ virtual void loadR0(int ea, bool isIncDec, int op) {
+ fprintf(stderr, "loadR0(%d, %d, %d)\n", ea, isIncDec, op);
+ mpBase->loadR0(ea, isIncDec, op);
+ }
+
+ virtual int beginFunctionCallArguments() {
+ int result = mpBase->beginFunctionCallArguments();
+ fprintf(stderr, "beginFunctionCallArguments() = %d\n", result);
+ return result;
+ }
+
+ virtual void storeR0ToArg(int l) {
+ fprintf(stderr, "storeR0ToArg(%d)\n", l);
+ mpBase->storeR0ToArg(l);
+ }
+
+ virtual void endFunctionCallArguments(int a, int l) {
+ fprintf(stderr, "endFunctionCallArguments(%d, %d)\n", a, l);
+ mpBase->endFunctionCallArguments(a, l);
+ }
+
+ virtual int callForward(int symbol) {
+ int result = mpBase->callForward(symbol);
+ fprintf(stderr, "callForward(%d) = %d\n", symbol, result);
+ return result;
+ }
+
+ virtual void callRelative(int t) {
+ fprintf(stderr, "callRelative(%d)\n", t);
+ mpBase->callRelative(t);
+ }
+
+ virtual void callIndirect(int l) {
+ fprintf(stderr, "callIndirect(%d)\n", l);
+ mpBase->callIndirect(l);
+ }
+
+ virtual void adjustStackAfterCall(int l, bool isIndirect) {
+ fprintf(stderr, "adjustStackAfterCall(%d, %d)\n", l, isIndirect);
+ mpBase->adjustStackAfterCall(l, isIndirect);
+ }
+
+ virtual int jumpOffset() {
+ return mpBase->jumpOffset();
+ }
+
+ virtual int disassemble(FILE* out) {
+ return mpBase->disassemble(out);
+ }
+
+ /* output a symbol and patch all calls to it */
+ virtual void gsym(int t) {
+ fprintf(stderr, "gsym(%d)\n", t);
+ mpBase->gsym(t);
+ }
+
+ virtual int finishCompile() {
+ int result = mpBase->finishCompile();
+ fprintf(stderr, "finishCompile() = %d\n", result);
+ return result;
+ }
+ };
+
+#endif // PROVIDE_TRACE_CODEGEN
+
class InputStream {
public:
int getChar() {
@@ -1086,27 +1275,6 @@ class Compiler : public ErrorSink {
size_t mPosition;
};
- int ch; // Current input character, or EOF
- intptr_t tok; // token
- intptr_t tokc; // token extra info
- int tokl; // token operator level
- intptr_t rsym; // return symbol
- intptr_t loc; // local variable index
- char* glo; // global variable index
- char* sym_stk;
- char* dstk; // Define stack
- char* dptr; // Macro state: Points to macro text during macro playback.
- int dch; // Macro state: Saves old value of ch during a macro playback.
- char* last_id;
- void* pSymbolBase;
- char* pGlobalBase;
- char* pVarsBase; // Value of variables
-
- InputStream* file;
-
- CodeBuf codeBuf;
- CodeGenerator* pGen;
-
class String {
public:
String() {
@@ -1115,18 +1283,57 @@ class Compiler : public ErrorSink {
mSize = 0;
}
+ String(const char* item, int len, bool adopt) {
+ if (len < 0) {
+ len = strlen(item);
+ }
+ if (adopt) {
+ mpBase = (char*) item;
+ mUsed = len;
+ mSize = len + 1;
+ } else {
+ mpBase = 0;
+ mUsed = 0;
+ mSize = 0;
+ appendBytes(item, len);
+ }
+ }
+
+ String(const String& other) {
+ mpBase = 0;
+ mUsed = 0;
+ mSize = 0;
+ appendBytes(other.getUnwrapped(), other.len());
+ }
+
~String() {
if (mpBase) {
free(mpBase);
}
}
- char* getUnwrapped() {
+ String& operator=(const String& other) {
+ clear();
+ appendBytes(other.getUnwrapped(), other.len());
+ return *this;
+ }
+
+ inline char* getUnwrapped() const {
return mpBase;
}
+ void clear() {
+ mUsed = 0;
+ if (mSize > 0) {
+ mpBase[0] = 0;
+ }
+ }
+
void appendCStr(const char* s) {
- int n = strlen(s);
+ appendBytes(s, strlen(s));
+ }
+
+ void appendBytes(const char* s, int n) {
memcpy(ensure(n), s, n + 1);
}
@@ -1134,6 +1341,14 @@ class Compiler : public ErrorSink {
* ensure(1) = c;
}
+ char* orphan() {
+ char* result = mpBase;
+ mpBase = 0;
+ mUsed = 0;
+ mSize = 0;
+ return result;
+ }
+
void printf(const char* fmt,...) {
va_list ap;
va_start(ap, fmt);
@@ -1148,7 +1363,7 @@ class Compiler : public ErrorSink {
free(temp);
}
- size_t len() {
+ inline size_t len() const {
return mUsed;
}
@@ -1174,33 +1389,433 @@ class Compiler : public ErrorSink {
size_t mSize;
};
- String mErrorBuf;
+ /**
+ * Wrap an externally allocated string for use as a hash key.
+ */
+ class FakeString : public String {
+ public:
+ FakeString(const char* string, size_t length) :
+ String((char*) string, length, true) {}
+
+ ~FakeString() {
+ orphan();
+ }
+ };
+
+ template<class V> class StringTable {
+ public:
+ StringTable() {
+ init(10);
+ }
+
+ StringTable(size_t initialCapacity) {
+ init(initialCapacity);
+ }
+
+ ~StringTable() {
+ clear();
+ hashmapFree(mpMap);
+ }
+
+ void clear() {
+ hashmapForEach(mpMap, freeKeyValue, this);
+ }
+
+ bool contains(String* pKey) {
+ bool result = hashmapContainsKey(mpMap, pKey);
+ return result;
+ }
+
+ V* get(String* pKey) {
+ V* result = (V*) hashmapGet(mpMap, pKey);
+ return result;
+ }
- jmp_buf mErrorRecoveryJumpBuf;
+ V* remove(String* pKey) {
+ V* result = (V*) hashmapRemove(mpMap, pKey);
+ return result;
+ }
+
+ V* put(String* pKey, V* value) {
+ V* result = (V*) hashmapPut(mpMap, pKey, value);
+ if (result) {
+ // The key was not adopted by the map, so delete it here.
+ delete pKey;
+ }
+ return result;
+ }
+
+ void forEach(bool (*callback)(String* key, V* value, void* context),
+ void* context) {
+ hashmapForEach(mpMap, (bool (*)(void*, void*, void*)) callback,
+ context);
+ }
+
+ protected:
+
+ void init(size_t initialCapacity) {
+ mpMap = hashmapCreate(initialCapacity, hashFn, equalsFn);
+ }
+
+ static int hashFn(void* pKey) {
+ String* pString = (String*) pKey;
+ return hashmapHash(pString->getUnwrapped(), pString->len());
+ }
+
+ static bool equalsFn(void* keyA, void* keyB) {
+ String* pStringA = (String*) keyA;
+ String* pStringB = (String*) keyB;
+ return pStringA->len() == pStringB->len()
+ && strcmp(pStringA->getUnwrapped(), pStringB->getUnwrapped())
+ == 0;
+ }
+
+ static bool freeKeyValue(void* key, void* value, void* context) {
+ delete (String*) key;
+ delete (V*) value;
+ return true;
+ }
+
+ Hashmap* mpMap;
+ };
+
+ class MacroTable : public StringTable<String> {
+ public:
+ MacroTable() : StringTable<String>(10) {}
+ };
+
+ class KeywordTable {
+ public:
+
+ KeywordTable(){
+ mpMap = hashmapCreate(40, hashFn, equalsFn);
+ put("int", TOK_INT);
+ put("char", TOK_CHAR);
+ put("void", TOK_VOID);
+ put("if", TOK_IF);
+ put("else", TOK_ELSE);
+ put("while", TOK_WHILE);
+ put("break", TOK_BREAK);
+ put("return", TOK_RETURN);
+ put("for", TOK_FOR);
+ // TODO: remove these preprocessor-specific keywords. You should
+ // be able to have symbols named pragma or define.
+ put("pragma", TOK_PRAGMA);
+ put("define", TOK_DEFINE);
+
+ const char* unsupported[] = {
+ "auto",
+ "case",
+ "const",
+ "continue",
+ "default",
+ "do",
+ "double",
+ "enum",
+ "extern",
+ "float",
+ "goto",
+ "long",
+ "register",
+ "short",
+ "signed",
+ "sizeof",
+ "static",
+ "struct",
+ "switch",
+ "typedef",
+ "union",
+ "unsigned",
+ "volatile",
+ "_Bool",
+ "_Complex",
+ "_Imaginary",
+ "inline",
+ "restrict",
+ 0};
+
+ for(int i = 0; unsupported[i]; i++) {
+ put(unsupported[i], TOK_UNSUPPORTED_KEYWORD);
+ }
+ }
+
+ ~KeywordTable() {
+ hashmapFree(mpMap);
+ }
+
+ int get(String* key) {
+ return (int) hashmapGet(mpMap, key->getUnwrapped());
+ }
+
+ const char* lookupKeyFor(int value) {
+ FindValContext context;
+ context.key = 0;
+ hashmapForEach(mpMap, findKeyFn, &context);
+ return context.key;
+ }
+
+ private:
+ void put(const char* kw, int val) {
+ hashmapPut(mpMap, (void*) kw, (void*) val);
+ }
+
+ static int hashFn(void* pKey) {
+ char* pString = (char*) pKey;
+ return hashmapHash(pString, strlen(pString));
+ }
+
+ static bool equalsFn(void* keyA, void* keyB) {
+ const char* pStringA = (const char*) keyA;
+ const char* pStringB = (const char*) keyB;
+ return strcmp(pStringA, pStringB) == 0;
+ }
+
+ struct FindValContext {
+ char* key;
+ int value;
+ };
+
+ static bool findKeyFn(void* key, void* value, void* context) {
+ FindValContext* pContext = (FindValContext*) context;
+ if ((int) value == pContext->value) {
+ pContext->key = (char*) key;
+ return false;
+ }
+ return true;
+ }
+
+ Hashmap* mpMap;
+ };
+
+ template<class E> class Array {
+ public:
+ Array() {
+ mpBase = 0;
+ mUsed = 0;
+ mSize = 0;
+ }
+
+ ~Array() {
+ if (mpBase) {
+ free(mpBase);
+ }
+ }
+
+ E get(int i) {
+ if (i < 0 || i > (int) mUsed) {
+ // error("internal error: Index out of range");
+ return E();
+ }
+ return mpBase[i];
+ }
+
+ void set(int i, E val) {
+ mpBase[i] = val;
+ }
+
+ void pop() {
+ if (mUsed > 0) {
+ mUsed -= 1;
+ } else {
+ // error("internal error: Popped empty stack.");
+ }
+ }
+
+ void push(E item) {
+ * ensure(1) = item;
+ }
+
+ size_t len() {
+ return mUsed;
+ }
+
+ private:
+ E* ensure(int n) {
+ size_t newUsed = mUsed + n;
+ if (newUsed > mSize) {
+ size_t newSize = mSize * 2 + 10;
+ if (newSize < newUsed) {
+ newSize = newUsed;
+ }
+ mpBase = (E*) realloc(mpBase, sizeof(E) * newSize);
+ mSize = newSize;
+ }
+ E* result = mpBase + mUsed;
+ mUsed = newUsed;
+ return result;
+ }
+
+ E* mpBase;
+ size_t mUsed;
+ size_t mSize;
+ };
+
+ struct InputState {
+ InputStream* pStream;
+ int oldCh;
+ };
+
+ struct VariableInfo {
+ VariableInfo() {
+ pAddress = 0;
+ pForward = 0;
+ }
+ void* pAddress;
+ void* pForward; // For a forward direction, linked list of data to fix up
+ };
+
+ typedef StringTable<VariableInfo> SymbolTable;
+
+ class SymbolStack {
+ public:
+ SymbolStack() {
+ mLevel = 0;
+ addEntry();
+ }
+
+ void pushLevel() {
+ mLevel++;
+ }
+
+ void popLevel() {
+ mLevel--;
+ Entry e = mStack.get(mStack.len()-1);
+ if (mLevel < e.level) {
+ mStack.pop();
+ delete e.pTable;
+ }
+ }
+
+ VariableInfo* get(String* pName) {
+ int len = mStack.len();
+ VariableInfo* v = NULL;
+ int level = -1;
+ for (int i = len - 1; i >= 0; i--) {
+ Entry e = mStack.get(i);
+ v = e.pTable->get(pName);
+ if (v) {
+ level = e.level;
+ break;
+ }
+ }
+#if 0
+ fprintf(stderr, "Lookup %s %08x level %d\n", pName->getUnwrapped(), v, level);
+ if (v) {
+ fprintf(stderr, " %08x %08x\n", v->pAddress, v->pForward);
+ }
+#endif
+ return v;
+ }
+
+ VariableInfo* addLocal(String* pName) {
+ int len = mStack.len();
+ if (mStack.get(len-1).level != mLevel) {
+ addEntry();
+ len++;
+ }
+ return addImp(len-1, pName);
+ }
+
+ VariableInfo* addGlobal(String* pName) {
+ return addImp(0, pName);
+ }
+
+ void forEachGlobal(
+ bool (*callback)(String* key, VariableInfo* value, void* context),
+ void* context) {
+ mStack.get(0).pTable->forEach(callback, context);
+ }
+
+ private:
+ VariableInfo* addImp(int entryIndex, String* pName) {
+ Entry e = mStack.get(entryIndex);
+ SymbolTable* pTable = e.pTable;
+ if (pTable->contains(pName)) {
+ return NULL;
+ }
+ VariableInfo* v = new VariableInfo();
+
+ delete pTable->put(pName, v);
+#if 0
+ fprintf(stderr, "Add \"%s\" %08x level %d\n", pName->getUnwrapped(), v, e.level);
+#endif
+ return v;
+ }
+
+ void addEntry() {
+ Entry e;
+ e.level = mLevel;
+ e.pTable = new SymbolTable();
+ mStack.push(e);
+ }
+
+ struct Entry {
+ Entry() {
+ level = 0;
+ pTable = NULL;
+ }
+ int level;
+ SymbolTable* pTable;
+ };
+
+ int mLevel;
+ Array<Entry> mStack;
+ };
+
+ int ch; // Current input character, or EOF
+ intptr_t tok; // token
+ intptr_t tokc; // token extra info
+ int tokl; // token operator level
+ intptr_t rsym; // return symbol
+ intptr_t loc; // local variable index
+ char* glo; // global variable index
+ String mTokenString;
+ char* dptr; // Macro state: Points to macro text during macro playback.
+ int dch; // Macro state: Saves old value of ch during a macro playback.
+ char* pGlobalBase;
+ KeywordTable mKeywords;
+ SymbolStack mSymbolTable;
+ InputStream* file;
+
+ CodeBuf codeBuf;
+ CodeGenerator* pGen;
+
+ MacroTable mMacros;
+ Array<InputState> mInputStateStack;
+
+ String mErrorBuf;
String mPragmas;
int mPragmaStringCount;
static const int ALLOC_SIZE = 99999;
- // Indentifiers start at 0x100 and increase by # (chars + 1) * 8
- static const int TOK_IDENT = 0x100;
- static const int TOK_INT = 0x100;
- static const int TOK_CHAR = TOK_INT + 4*8;
- static const int TOK_VOID = TOK_CHAR + 5*8;
- static const int TOK_IF = TOK_VOID + 5*8;
- static const int TOK_ELSE = TOK_IF + 3*8;
- static const int TOK_WHILE = TOK_ELSE + 5*8;
- static const int TOK_BREAK = TOK_WHILE + 6*8;
- static const int TOK_RETURN = TOK_BREAK + 6*8;
- static const int TOK_FOR = TOK_RETURN + 7*8;
- static const int TOK_PRAGMA = TOK_FOR + 4*8;
- static const int TOK_DEFINE = TOK_PRAGMA + 7*8;
- static const int TOK_MAIN = TOK_DEFINE + 7*8;
-
static const int TOK_DUMMY = 1;
static const int TOK_NUM = 2;
+ // 3..255 are character and/or operators
+
+ // Keywords start at 0x100 and increase by 1
+ static const int TOK_KEYWORD = 0x100;
+ static const int TOK_INT = TOK_KEYWORD + 0;
+ static const int TOK_CHAR = TOK_KEYWORD + 1;
+ static const int TOK_VOID = TOK_KEYWORD + 2;
+ static const int TOK_IF = TOK_KEYWORD + 3;
+ static const int TOK_ELSE = TOK_KEYWORD + 4;
+ static const int TOK_WHILE = TOK_KEYWORD + 5;
+ static const int TOK_BREAK = TOK_KEYWORD + 6;
+ static const int TOK_RETURN = TOK_KEYWORD + 7;
+ static const int TOK_FOR = TOK_KEYWORD + 8;
+ static const int TOK_PRAGMA = TOK_KEYWORD + 9;
+ static const int TOK_DEFINE = TOK_KEYWORD + 10;
+ static const int TOK_UNSUPPORTED_KEYWORD = TOK_KEYWORD + 0xff;
+
+ static const int TOK_UNDEFINED_SYMBOL = 0x200;
+
+ // Symbols start at 0x300, but are really pointers to VariableInfo structs.
+ static const int TOK_SYMBOL = 0x300;
+
+
static const int LOCAL = 0x200;
static const int SYM_FORWARD = 0;
@@ -1208,7 +1823,6 @@ class Compiler : public ErrorSink {
/* tokens in string heap */
static const int TAG_TOK = ' ';
- static const int TAG_MACRO = 2;
static const int OP_INCREMENT = 0;
static const int OP_DECREMENT = 1;
@@ -1242,16 +1856,13 @@ class Compiler : public ErrorSink {
static const char operatorLevel[];
void pdef(int t) {
- if (dstk - sym_stk >= ALLOC_SIZE) {
- error("Symbol table exhausted");
- }
- *dstk++ = t;
+ mTokenString.append(t);
}
void inp() {
if (dptr) {
ch = *dptr++;
- if (ch == TAG_MACRO) {
+ if (ch == 0) {
dptr = 0;
ch = dch;
}
@@ -1266,13 +1877,92 @@ class Compiler : public ErrorSink {
return isalnum(ch) | (ch == '_');
}
- /* read a character constant */
- void getq() {
+ /* read a character constant, advances ch to after end of constant */
+ int getq() {
+ int val = ch;
if (ch == '\\') {
inp();
- if (ch == 'n')
- ch = '\n';
+ if (isoctal(ch)) {
+ // 1 to 3 octal characters.
+ val = 0;
+ for(int i = 0; i < 3; i++) {
+ if (isoctal(ch)) {
+ val = (val << 3) + ch - '0';
+ inp();
+ }
+ }
+ return val;
+ } else if (ch == 'x' || ch == 'X') {
+ // N hex chars
+ inp();
+ if (! isxdigit(ch)) {
+ error("'x' character escape requires at least one digit.");
+ } else {
+ val = 0;
+ while (isxdigit(ch)) {
+ int d = ch;
+ if (isdigit(d)) {
+ d -= '0';
+ } else if (d <= 'F') {
+ d = d - 'A' + 10;
+ } else {
+ d = d - 'a' + 10;
+ }
+ val = (val << 4) + d;
+ inp();
+ }
+ }
+ } else {
+ int val = ch;
+ switch (ch) {
+ case 'a':
+ val = '\a';
+ break;
+ case 'b':
+ val = '\b';
+ break;
+ case 'f':
+ val = '\f';
+ break;
+ case 'n':
+ val = '\n';
+ break;
+ case 'r':
+ val = '\r';
+ break;
+ case 't':
+ val = '\t';
+ break;
+ case 'v':
+ val = '\v';
+ break;
+ case '\\':
+ val = '\\';
+ break;
+ case '\'':
+ val = '\'';
+ break;
+ case '"':
+ val = '"';
+ break;
+ case '?':
+ val = '?';
+ break;
+ default:
+ error("Undefined character escape %c", ch);
+ break;
+ }
+ inp();
+ return val;
+ }
+ } else {
+ inp();
}
+ return val;
+ }
+
+ static bool isoctal(int ch) {
+ return ch >= '0' && ch <= '7';
}
void next() {
@@ -1283,22 +1973,13 @@ class Compiler : public ErrorSink {
inp();
next();
if (tok == TOK_DEFINE) {
- next();
- pdef(TAG_TOK); /* fill last ident tag */
- *(int *) tok = SYM_DEFINE;
- *(char* *) (tok + 4) = dstk; /* define stack */
- while (ch != '\n') {
- pdef(ch);
- inp();
- }
- pdef(ch);
- pdef(TAG_MACRO);
+ doDefine();
} else if (tok == TOK_PRAGMA) {
doPragma();
} else {
- error("Unsupported preprocessor directive \"%s\"", last_id);
+ error("Unsupported preprocessor directive \"%s\"",
+ mTokenString.getUnwrapped());
}
-
}
inp();
}
@@ -1306,37 +1987,35 @@ class Compiler : public ErrorSink {
tok = ch;
/* encode identifiers & numbers */
if (isid()) {
- pdef(TAG_TOK);
- last_id = dstk;
+ mTokenString.clear();
while (isid()) {
pdef(ch);
inp();
}
if (isdigit(tok)) {
- tokc = strtol(last_id, 0, 0);
+ tokc = strtol(mTokenString.getUnwrapped(), 0, 0);
tok = TOK_NUM;
} else {
- if (dstk - sym_stk + 1 > ALLOC_SIZE) {
- error("symbol stack overflow");
- }
- * dstk = TAG_TOK; /* no need to mark end of string (we
- suppose data is initialized to zero by calloc) */
- tok = (intptr_t) (strstr(sym_stk, (last_id - 1))
- - sym_stk);
- * dstk = 0; /* mark real end of ident for dlsym() */
- tok = tok * 8 + TOK_IDENT;
- if (tok > TOK_DEFINE) {
- if (tok + 8 > ALLOC_SIZE) {
- error("Variable Table overflow.");
- }
- tok = (intptr_t) (pVarsBase + tok);
- /* printf("tok=%s %x\n", last_id, tok); */
- /* define handling */
- if (*(int *) tok == SYM_DEFINE) {
- dptr = *(char* *) (tok + 4);
- dch = ch;
- inp();
- next();
+ // Is this a macro?
+ String* pValue = mMacros.get(&mTokenString);
+ if (pValue) {
+ // Yes, it is a macro
+ dptr = pValue->getUnwrapped();
+ dch = ch;
+ inp();
+ next();
+ } else {
+ // Is this a keyword?
+ int kwtok = mKeywords.get(&mTokenString);
+ if (kwtok) {
+ tok = kwtok;
+ // fprintf(stderr, "tok= keyword %s %x\n", last_id, tok);
+ } else {
+ tok = (intptr_t) mSymbolTable.get(&mTokenString);
+ if (!tok) {
+ tok = TOK_UNDEFINED_SYMBOL;
+ }
+ // fprintf(stderr, "tok= symbol %s %x\n", last_id, tok);
}
}
}
@@ -1344,24 +2023,29 @@ class Compiler : public ErrorSink {
inp();
if (tok == '\'') {
tok = TOK_NUM;
- getq();
- tokc = ch;
- inp();
- inp();
+ tokc = getq();
+ if (ch != '\'') {
+ error("Expected a ' character, got %c", ch);
+ } else {
+ inp();
+ }
} else if ((tok == '/') & (ch == '*')) {
inp();
- while (ch) {
- while (ch != '*')
+ while (ch && ch != EOF) {
+ while (ch != '*' && ch != EOF)
inp();
inp();
if (ch == '/')
ch = 0;
}
+ if (ch == EOF) {
+ error("End of file inside comment.");
+ }
inp();
next();
} else if ((tok == '/') & (ch == '/')) {
inp();
- while (ch && (ch != '\n')) {
+ while (ch && (ch != '\n') && (ch != EOF)) {
inp();
}
inp();
@@ -1394,15 +2078,19 @@ class Compiler : public ErrorSink {
}
#if 0
{
- char* p;
+ const char* p;
printf("tok=0x%x ", tok);
- if (tok >= TOK_IDENT) {
+ if (tok >= TOK_KEYWORD) {
printf("'");
- if (tok> TOK_DEFINE)
- p = sym_stk + 1 + ((char*) tok - pVarsBase - TOK_IDENT) / 8;
- else
- p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
+ if (tok>= TOK_SYMBOL)
+ p = sym_stk + 1 + ((char*) tok - (char*) pVarsBase) / 8;
+ else {
+ p = mKeywords.lookupKeyFor(tok);
+ if (!p) {
+ p = "unknown keyword";
+ }
+ }
while (*p != TAG_TOK && *p)
printf("%c", *p++);
printf("'\n");
@@ -1415,6 +2103,31 @@ class Compiler : public ErrorSink {
#endif
}
+ void doDefine() {
+ String* pName = new String();
+ while (isspace(ch)) {
+ inp();
+ }
+ while (isid()) {
+ pName->append(ch);
+ inp();
+ }
+ if (ch == '(') {
+ delete pName;
+ error("Defines with arguments not supported");
+ return;
+ }
+ while (isspace(ch)) {
+ inp();
+ }
+ String* pValue = new String();
+ while (ch != '\n' && ch != EOF) {
+ pValue->append(ch);
+ inp();
+ }
+ delete mMacros.put(pName, pValue);
+ }
+
void doPragma() {
// # pragma name(val)
int state = 0;
@@ -1463,7 +2176,6 @@ class Compiler : public ErrorSink {
mErrorBuf.printf("%ld: ", file->getLine());
mErrorBuf.vprintf(fmt, ap);
mErrorBuf.printf("\n");
- longjmp(mErrorRecoveryJumpBuf, 1);
}
void skip(intptr_t c) {
@@ -1477,15 +2189,16 @@ class Compiler : public ErrorSink {
void unary(intptr_t l) {
intptr_t n, t, a;
int c;
+ String tString;
t = 0;
- n = 1; /* type of expression 0 = forward, 1 = value, other =
- lvalue */
+ n = 1; /* type of expression 0 = forward, 1 = value, other = lvalue */
if (tok == '\"') {
pGen->li((int) glo);
- while (ch != '\"') {
- getq();
- *allocGlobalSpace(1) = ch;
- inp();
+ while (ch != '\"' && ch != EOF) {
+ *allocGlobalSpace(1) = getq();
+ }
+ if (ch != '\"') {
+ error("Unterminated string constant.");
}
*glo = 0;
/* align heap */
@@ -1496,6 +2209,7 @@ class Compiler : public ErrorSink {
c = tokl;
a = tokc;
t = tok;
+ tString = mTokenString;
next();
if (t == TOK_NUM) {
pGen->li(a);
@@ -1538,11 +2252,23 @@ class Compiler : public ErrorSink {
} else if (t == '&') {
pGen->leaR0(*(int *) tok);
next();
+ } else if (t == EOF ) {
+ error("Unexpected EOF.");
+ } else if (!checkSymbol(t, &tString)) {
+ // Don't have to do anything special here, the error
+ // message was printed by checkSymbol() above.
} else {
- n = *(int *) t;
+ if (t == TOK_UNDEFINED_SYMBOL) {
+ t = (intptr_t) mSymbolTable.addGlobal(
+ new String(tString));
+ }
+
+ n = (intptr_t) ((VariableInfo*) t)->pAddress;
/* forward reference: try dlsym */
if (!n) {
- n = (intptr_t) dlsym(RTLD_DEFAULT, last_id);
+ n = (intptr_t) dlsym(RTLD_DEFAULT,
+ tString.getUnwrapped());
+ ((VariableInfo*) t)->pAddress = (void*) n;
}
if ((tok == '=') & l) {
/* assignment */
@@ -1551,6 +2277,9 @@ class Compiler : public ErrorSink {
pGen->storeR0(n);
} else if (tok != '(') {
/* variable */
+ if (!n) {
+ error("Undefined variable %s", tString.getUnwrapped());
+ }
pGen->loadR0(n, tokl == 11, tokc);
if (tokl == 11) {
next();
@@ -1568,7 +2297,7 @@ class Compiler : public ErrorSink {
a = pGen->beginFunctionCallArguments();
next();
l = 0;
- while (tok != ')') {
+ while (tok != ')' && tok != EOF) {
expr();
pGen->storeR0ToArg(l);
if (tok == ',')
@@ -1576,7 +2305,7 @@ class Compiler : public ErrorSink {
l = l + 4;
}
pGen->endFunctionCallArguments(a, l);
- next();
+ skip(')');
if (!n) {
/* forward reference */
t = t + 4;
@@ -1639,20 +2368,23 @@ class Compiler : public ErrorSink {
return pGen->gtst(0, 0);
}
- void block(intptr_t l) {
+ void block(intptr_t l, bool outermostFunctionBlock) {
intptr_t a, n, t;
- if (tok == TOK_IF) {
+ if (tok == TOK_INT || tok == TOK_CHAR) {
+ /* declarations */
+ localDeclarations();
+ } else if (tok == TOK_IF) {
next();
skip('(');
a = test_expr();
skip(')');
- block(l);
+ block(l, false);
if (tok == TOK_ELSE) {
next();
n = pGen->gjmp(0); /* jmp */
pGen->gsym(a);
- block(l);
+ block(l, false);
pGen->gsym(n); /* patch else jmp */
} else {
pGen->gsym(a); /* patch if test */
@@ -1682,16 +2414,20 @@ class Compiler : public ErrorSink {
}
}
skip(')');
- block((intptr_t) &a);
+ block((intptr_t) &a, false);
pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */
pGen->gsym(a);
} else if (tok == '{') {
+ if (! outermostFunctionBlock) {
+ mSymbolTable.pushLevel();
+ }
next();
- /* declarations */
- localDeclarations();
- while (tok != '}')
- block(l);
- next();
+ while (tok != '}' && tok != EOF)
+ block(l, false);
+ skip('}');
+ if (! outermostFunctionBlock) {
+ mSymbolTable.popLevel();
+ }
} else {
if (tok == TOK_RETURN) {
next();
@@ -1765,24 +2501,47 @@ class Compiler : public ErrorSink {
}
}
- void checkSymbol() {
- if (tok <= TOK_DEFINE) {
- error("Expected a symbol");
+ void addGlobalSymbol() {
+ tok = (intptr_t) mSymbolTable.addGlobal(
+ new String(mTokenString));
+ reportIfDuplicate();
+ }
+
+ void reportIfDuplicate() {
+ if (!tok) {
+ error("Duplicate definition of %s", mTokenString.getUnwrapped());
}
}
+ void addLocalSymbol() {
+ tok = (intptr_t) mSymbolTable.addLocal(
+ new String(mTokenString));
+ reportIfDuplicate();
+ }
+
void localDeclarations() {
intptr_t a;
Type base;
while (acceptType(base)) {
- while (tok != ';') {
+ while (tok != ';' && tok != EOF) {
Type t = acceptPointerDeclaration(t);
- checkSymbol();
- loc = loc + 4;
- *(int *) tok = -loc;
-
+ int variableAddress = 0;
+ if (checkSymbol()) {
+ addLocalSymbol();
+ if (tok) {
+ loc = loc + 4;
+ variableAddress = -loc;
+ ((VariableInfo*) tok)->pAddress = (void*) variableAddress;
+ }
+ }
next();
+ if (tok == '=') {
+ /* assignment */
+ next();
+ expr();
+ pGen->storeR0(variableAddress);
+ }
if (tok == ',')
next();
}
@@ -1790,56 +2549,112 @@ class Compiler : public ErrorSink {
}
}
+ bool checkSymbol() {
+ return checkSymbol(tok, &mTokenString);
+ }
+
+ bool checkSymbol(int token, String* pText) {
+ bool result = token < EOF || token >= TOK_UNDEFINED_SYMBOL;
+ if (!result) {
+ String temp;
+ if (token == EOF ) {
+ temp.printf("EOF");
+ } else if (token == TOK_NUM) {
+ temp.printf("numeric constant");
+ } else if (token >= 0 && token < 256) {
+ temp.printf("char \'%c\'", token);
+ } else if (token >= TOK_KEYWORD && token < TOK_UNSUPPORTED_KEYWORD) {
+ temp.printf("keyword \"%s\"", pText->getUnwrapped());
+ } else {
+ temp.printf("reserved keyword \"%s\"",
+ pText->getUnwrapped());
+ }
+ error("Expected symbol. Got %s", temp.getUnwrapped());
+ }
+ return result;
+ }
+
void globalDeclarations() {
while (tok != EOF) {
Type base;
expectType(base);
Type t = acceptPointerDeclaration(t);
- checkSymbol();
- int name = tok;
+ if (tok >= 0 && tok < TOK_UNDEFINED_SYMBOL) {
+ error("Unexpected token %d", tok);
+ break;
+ }
+ if (tok == TOK_UNDEFINED_SYMBOL) {
+ addGlobalSymbol();
+ }
+ VariableInfo* name = (VariableInfo*) tok;
+ if (name && name->pAddress) {
+ error("Already defined global %s",
+ mTokenString.getUnwrapped());
+ }
next();
- if (tok == ',' || tok == ';') {
+ if (tok == ',' || tok == ';' || tok == '=') {
// it's a variable declaration
for(;;) {
- *(int* *) name = (int*) allocGlobalSpace(4);
+ if (name) {
+ name->pAddress = (int*) allocGlobalSpace(4);
+ }
+ if (tok == '=') {
+ next();
+ if (tok == TOK_NUM) {
+ if (name) {
+ * (int*) name->pAddress = tokc;
+ }
+ next();
+ } else {
+ error("Expected an integer constant");
+ }
+ }
if (tok != ',') {
break;
}
- next();
+ skip(',');
t = acceptPointerDeclaration(t);
- checkSymbol();
- name = tok;
+ addGlobalSymbol();
+ name = (VariableInfo*) tok;
next();
}
skip(';');
} else {
- /* patch forward references (XXX: does not work for function
- pointers) */
- pGen->gsym(*(int *) (name + 4));
- /* put function address */
- *(int *) name = codeBuf.getPC();
+ if (name) {
+ /* patch forward references (XXX: does not work for function
+ pointers) */
+ pGen->gsym((int) name->pForward);
+ /* put function address */
+ name->pAddress = (void*) codeBuf.getPC();
+ }
skip('(');
+ mSymbolTable.pushLevel();
intptr_t a = 8;
int argCount = 0;
- while (tok != ')') {
+ while (tok != ')' && tok != EOF) {
Type aType;
expectType(aType);
aType = acceptPointerDeclaration(aType);
- checkSymbol();
- /* read param name and compute offset */
- *(int *) tok = a;
- a = a + 4;
+ if (checkSymbol()) {
+ addLocalSymbol();
+ if (tok) {
+ /* read param name and compute offset */
+ *(int *) tok = a;
+ a = a + 4;
+ }
+ }
next();
if (tok == ',')
next();
argCount++;
}
- skip(')'); /* skip ')' */
+ skip(')');
rsym = loc = 0;
a = pGen->functionEntry(argCount);
- block(0);
+ block(0, true);
pGen->gsym(rsym);
pGen->functionExit(argCount, a, loc);
+ mSymbolTable.popLevel();
}
}
}
@@ -1847,6 +2662,7 @@ class Compiler : public ErrorSink {
char* allocGlobalSpace(int bytes) {
if (glo - pGlobalBase + bytes > ALLOC_SIZE) {
error("Global space exhausted");
+ return NULL;
}
char* result = glo;
glo += bytes;
@@ -1854,18 +2670,10 @@ class Compiler : public ErrorSink {
}
void cleanup() {
- if (sym_stk != 0) {
- free(sym_stk);
- sym_stk = 0;
- }
if (pGlobalBase != 0) {
free(pGlobalBase);
pGlobalBase = 0;
}
- if (pVarsBase != 0) {
- free(pVarsBase);
- pVarsBase = 0;
- }
if (pGen) {
delete pGen;
pGen = 0;
@@ -1881,18 +2689,13 @@ class Compiler : public ErrorSink {
tokc = 0;
tokl = 0;
ch = 0;
- pVarsBase = 0;
rsym = 0;
loc = 0;
glo = 0;
- sym_stk = 0;
- dstk = 0;
dptr = 0;
dch = 0;
- last_id = 0;
file = 0;
pGlobalBase = 0;
- pVarsBase = 0;
pGen = 0;
mPragmaStringCount = 0;
}
@@ -1926,8 +2729,9 @@ class Compiler : public ErrorSink {
}
if (pGen == NULL) {
error("No code generator defined.");
+ } else {
+ pGen->setErrorSink(this);
}
- pGen->setErrorSink(this);
}
public:
@@ -1948,42 +2752,54 @@ public:
int compile(const char* text, size_t textLength) {
int result;
- if (! (result = setjmp(mErrorRecoveryJumpBuf))) {
- cleanup();
- clear();
- codeBuf.init(ALLOC_SIZE);
- setArchitecture(NULL);
- if (!pGen) {
- return -1;
- }
- pGen->init(&codeBuf);
- file = new TextInputStream(text, textLength);
- sym_stk = (char*) calloc(1, ALLOC_SIZE);
- static const char* predefinedSymbols =
- " int char void"
- " if else while break return for"
- " pragma define main ";
- dstk = strcpy(sym_stk, predefinedSymbols)
- + strlen(predefinedSymbols);
- pGlobalBase = (char*) calloc(1, ALLOC_SIZE);
- glo = pGlobalBase;
- pVarsBase = (char*) calloc(1, ALLOC_SIZE);
- inp();
- next();
- globalDeclarations();
- pGen->finishCompile();
+
+ cleanup();
+ clear();
+ codeBuf.init(ALLOC_SIZE);
+ setArchitecture(NULL);
+ if (!pGen) {
+ return -1;
+ }
+#ifdef PROVIDE_TRACE_CODEGEN
+ pGen = new TraceCodeGenerator(pGen);
+#endif
+ pGen->setErrorSink(this);
+ pGen->init(&codeBuf);
+ file = new TextInputStream(text, textLength);
+ pGlobalBase = (char*) calloc(1, ALLOC_SIZE);
+ glo = pGlobalBase;
+ inp();
+ next();
+ globalDeclarations();
+ checkForUndefinedForwardReferences();
+ result = pGen->finishCompile();
+ if (result == 0) {
+ if (mErrorBuf.len()) {
+ result = -2;
+ }
}
return result;
}
- int run(int argc, char** argv) {
- typedef int (*mainPtr)(int argc, char** argv);
- mainPtr aMain = (mainPtr) *(int*) (pVarsBase + TOK_MAIN);
- if (!aMain) {
- fprintf(stderr, "Could not find function \"main\".\n");
- return -1;
+ void checkForUndefinedForwardReferences() {
+ mSymbolTable.forEachGlobal(static_ufrcFn, this);
+ }
+
+ static bool static_ufrcFn(String* key, VariableInfo* value,
+ void* context) {
+ Compiler* pCompiler = (Compiler*) context;
+ return pCompiler->undefinedForwardReferenceCheck(key, value);
+ }
+
+ bool undefinedForwardReferenceCheck(String* key, VariableInfo* value) {
+#if 0
+ fprintf(stderr, "%s 0x%8x 0x%08x\n", key->getUnwrapped(),
+ value->pAddress, value->pForward);
+#endif
+ if (!value->pAddress && value->pForward) {
+ error("Undefined forward reference: %s", key->getUnwrapped());
}
- return aMain(argc, argv);
+ return true;
}
int dump(FILE* out) {
@@ -1999,30 +2815,10 @@ public:
* If found, return its value.
*/
void* lookup(const char* name) {
- if (!sym_stk) {
- return NULL;
- }
- size_t nameLen = strlen(name);
- char* pSym = sym_stk;
- char c;
- for(;;) {
- c = *pSym++;
- if (c == 0) {
- break;
- }
- if (c == TAG_TOK) {
- if (memcmp(pSym, name, nameLen) == 0
- && pSym[nameLen] == TAG_TOK) {
- int tok = pSym - 1 - sym_stk;
- tok = tok * 8 + TOK_IDENT;
- if (tok <= TOK_DEFINE) {
- return 0;
- } else {
- tok = (intptr_t) (pVarsBase + tok);
- return * (void**) tok;
- }
- }
- }
+ String string(name, -1, false);
+ VariableInfo* pVariableInfo = mSymbolTable.get(&string);
+ if (pVariableInfo) {
+ return pVariableInfo->pAddress;
}
return NULL;
}