diff options
Diffstat (limited to 'lib/Bytecode')
-rw-r--r-- | lib/Bytecode/Makefile | 5 | ||||
-rw-r--r-- | lib/Bytecode/Reader/ConstantReader.cpp | 218 | ||||
-rw-r--r-- | lib/Bytecode/Reader/InstructionReader.cpp | 213 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Makefile | 7 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Reader.cpp | 478 | ||||
-rw-r--r-- | lib/Bytecode/Reader/ReaderInternals.h | 146 | ||||
-rw-r--r-- | lib/Bytecode/Writer/ConstantWriter.cpp | 154 | ||||
-rw-r--r-- | lib/Bytecode/Writer/InstructionWriter.cpp | 184 | ||||
-rw-r--r-- | lib/Bytecode/Writer/Makefile | 7 | ||||
-rw-r--r-- | lib/Bytecode/Writer/SlotCalculator.cpp | 195 | ||||
-rw-r--r-- | lib/Bytecode/Writer/SlotCalculator.h | 96 | ||||
-rw-r--r-- | lib/Bytecode/Writer/Writer.cpp | 182 | ||||
-rw-r--r-- | lib/Bytecode/Writer/WriterInternals.h | 74 |
13 files changed, 1959 insertions, 0 deletions
diff --git a/lib/Bytecode/Makefile b/lib/Bytecode/Makefile new file mode 100644 index 0000000..75d4f7c --- /dev/null +++ b/lib/Bytecode/Makefile @@ -0,0 +1,5 @@ +LEVEL = ../.. +DIRS = Reader Writer + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bytecode/Reader/ConstantReader.cpp b/lib/Bytecode/Reader/ConstantReader.cpp new file mode 100644 index 0000000..b85bd88 --- /dev/null +++ b/lib/Bytecode/Reader/ConstantReader.cpp @@ -0,0 +1,218 @@ +//===- ReadConst.cpp - Code to constants and constant pools -----------------=== +// +// This file implements functionality to deserialize constants and entire +// constant pools. +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +//===------------------------------------------------------------------------=== + +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/DerivedTypes.h" +#include "ReaderInternals.h" + +bool BytecodeParser::parseTypeConstant(const uchar *&Buf, const uchar *EndBuf, + ConstPoolVal *&V) { + const Type *Val = 0; + + unsigned PrimType; + if (read_vbr(Buf, EndBuf, PrimType)) return true; + + if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) { + V = new ConstPoolType(Val); // It's just a primitive ID. + return false; + } + + switch (PrimType) { + case Type::MethodTyID: { + unsigned Typ; + if (read_vbr(Buf, EndBuf, Typ)) return true; + const Type *RetType = getType(Typ); + if (RetType == 0) return true; + + MethodType::ParamTypes Params; + + if (read_vbr(Buf, EndBuf, Typ)) return true; + while (Typ) { + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + Params.push_back(Ty); + + if (read_vbr(Buf, EndBuf, Typ)) return true; + } + + Val = MethodType::getMethodType(RetType, Params); + break; + } + case Type::ArrayTyID: { + unsigned ElTyp; + if (read_vbr(Buf, EndBuf, ElTyp)) return true; + const Type *ElementType = getType(ElTyp); + if (ElementType == 0) return true; + + int NumElements; + if (read_vbr(Buf, EndBuf, NumElements)) return true; + Val = ArrayType::getArrayType(ElementType, NumElements); + break; + } + case Type::StructTyID: { + unsigned Typ; + StructType::ElementTypes Elements; + + if (read_vbr(Buf, EndBuf, Typ)) return true; + while (Typ) { // List is terminated by void/0 typeid + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + Elements.push_back(Ty); + + if (read_vbr(Buf, EndBuf, Typ)) return true; + } + + Val = StructType::getStructType(Elements); + break; + } + case Type::PointerTyID: { + unsigned ElTyp; + if (read_vbr(Buf, EndBuf, ElTyp)) return true; + const Type *ElementType = getType(ElTyp); + if (ElementType == 0) return true; + Val = PointerType::getPointerType(ElementType); + break; + } + + default: + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to deserialize" + << " primitive Type " << PrimType << "\n"; + return true; + } + + V = new ConstPoolType(Val); + return false; +} + +bool BytecodeParser::parseConstPoolValue(const uchar *&Buf, + const uchar *EndBuf, + const Type *Ty, ConstPoolVal *&V) { + switch (Ty->getPrimitiveID()) { + case Type::BoolTyID: { + unsigned Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + if (Val != 0 && Val != 1) return true; + V = new ConstPoolBool(Val == 1); + break; + } + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: { + unsigned Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + if (!ConstPoolUInt::isValueValidForType(Ty, Val)) return true; + V = new ConstPoolUInt(Ty, Val); + break; + } + + case Type::ULongTyID: { + uint64_t Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + V = new ConstPoolUInt(Ty, Val); + break; + } + + case Type::SByteTyID: // Unsigned integer types... + case Type::ShortTyID: + case Type::IntTyID: { + int Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + if (!ConstPoolSInt::isValueValidForType(Ty, Val)) return 0; + V = new ConstPoolSInt(Ty, Val); + break; + } + + case Type::LongTyID: { + int64_t Val; + if (read_vbr(Buf, EndBuf, Val)) return true; + V = new ConstPoolSInt(Ty, Val); + break; + } + + case Type::TypeTyID: + if (parseTypeConstant(Buf, EndBuf, V)) return true; + break; + + case Type::ArrayTyID: { + const ArrayType *AT = (const ArrayType*)Ty; + unsigned NumElements; + if (AT->isSized()) // Sized array, # elements stored in type! + NumElements = (unsigned)AT->getNumElements(); + else // Unsized array, # elements stored in stream! + if (read_vbr(Buf, EndBuf, NumElements)) return true; + + vector<ConstPoolVal *> Elements; + while (NumElements--) { // Read all of the elements of the constant. + unsigned Slot; + if (read_vbr(Buf, EndBuf, Slot)) return true; + Value *V = getValue(AT->getElementType(), Slot, false); + if (!V || V->getValueType() != Value::ConstantVal) + return true; + Elements.push_back((ConstPoolVal*)V); + } + V = new ConstPoolArray(AT, Elements); + break; + } + + case Type::StructTyID: { + const StructType *ST = (const StructType*)Ty; + const StructType::ElementTypes &ET = ST->getElementTypes(); + + vector<ConstPoolVal *> Elements; + for (unsigned i = 0; i < ET.size(); ++i) { + unsigned Slot; + if (read_vbr(Buf, EndBuf, Slot)) return true; + Value *V = getValue(ET[i], Slot, false); + if (!V || V->getValueType() != Value::ConstantVal) + return true; + Elements.push_back((ConstPoolVal*)V); + } + + V = new ConstPoolStruct(ST, Elements); + break; + } + + default: + cerr << __FILE__ << ":" << __LINE__ + << ": Don't know how to deserialize constant value of type '" + << Ty->getName() << "'\n"; + return true; + } + return false; +} + +bool BytecodeParser::ParseConstantPool(const uchar *&Buf, const uchar *EndBuf, + SymTabValue::ConstantPoolType &CP, + ValueTable &Tab) { + while (Buf < EndBuf) { + unsigned NumEntries, Typ; + + if (read_vbr(Buf, EndBuf, NumEntries) || + read_vbr(Buf, EndBuf, Typ)) return true; + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + + for (unsigned i = 0; i < NumEntries; i++) { + ConstPoolVal *I; + if (parseConstPoolValue(Buf, EndBuf, Ty, I)) return true; +#if 0 + cerr << " Read const value: <" << I->getType()->getName() + << ">: " << I->getStrValue() << endl; +#endif + insertValue(I, Tab); + CP.insert(I); + } + } + + return Buf > EndBuf; +} diff --git a/lib/Bytecode/Reader/InstructionReader.cpp b/lib/Bytecode/Reader/InstructionReader.cpp new file mode 100644 index 0000000..667e144 --- /dev/null +++ b/lib/Bytecode/Reader/InstructionReader.cpp @@ -0,0 +1,213 @@ +//===- ReadInst.cpp - Code to read an instruction from bytecode -------------=== +// +// This file defines the mechanism to read an instruction from a bytecode +// stream. +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Change from getValue(Raw.Arg1) etc, to getArg(Raw, 1) +// Make it check type, so that casts are checked. +// +//===------------------------------------------------------------------------=== + +#include "llvm/iOther.h" +#include "llvm/iTerminators.h" +#include "llvm/iMemory.h" +#include "llvm/DerivedTypes.h" +#include "ReaderInternals.h" + +bool BytecodeParser::ParseRawInst(const uchar *&Buf, const uchar *EndBuf, + RawInst &Result) { + unsigned Op, Typ; + if (read(Buf, EndBuf, Op)) return true; + + Result.NumOperands = Op >> 30; + Result.Opcode = (Op >> 24) & 63; + + switch (Result.NumOperands) { + case 1: + Result.Ty = getType((Op >> 12) & 4095); + Result.Arg1 = Op & 4095; + if (Result.Arg1 == 4095) // Handle special encoding for 0 operands... + Result.NumOperands = 0; + break; + case 2: + Result.Ty = getType((Op >> 16) & 255); + Result.Arg1 = (Op >> 8 ) & 255; + Result.Arg2 = (Op >> 0 ) & 255; + break; + case 3: + Result.Ty = getType((Op >> 18) & 63); + Result.Arg1 = (Op >> 12) & 63; + Result.Arg2 = (Op >> 6 ) & 63; + Result.Arg3 = (Op >> 0 ) & 63; + break; + case 0: + Buf -= 4; // Hrm, try this again... + if (read_vbr(Buf, EndBuf, Result.Opcode)) return true; + if (read_vbr(Buf, EndBuf, Typ)) return true; + Result.Ty = getType(Typ); + if (read_vbr(Buf, EndBuf, Result.NumOperands)) return true; + + switch (Result.NumOperands) { + case 0: + cerr << "Zero Arg instr found!\n"; + return true; // This encoding is invalid! + case 1: + if (read_vbr(Buf, EndBuf, Result.Arg1)) return true; + break; + case 2: + if (read_vbr(Buf, EndBuf, Result.Arg1) || + read_vbr(Buf, EndBuf, Result.Arg2)) return true; + break; + case 3: + if (read_vbr(Buf, EndBuf, Result.Arg1) || + read_vbr(Buf, EndBuf, Result.Arg2) || + read_vbr(Buf, EndBuf, Result.Arg3)) return true; + break; + default: + if (read_vbr(Buf, EndBuf, Result.Arg1) || + read_vbr(Buf, EndBuf, Result.Arg2)) return true; + + // Allocate a vector to hold arguments 3, 4, 5, 6 ... + Result.VarArgs = new vector<unsigned>(Result.NumOperands-2); + for (unsigned a = 0; a < Result.NumOperands-2; a++) + if (read_vbr(Buf, EndBuf, (*Result.VarArgs)[a])) return true; + break; + } + if (align32(Buf, EndBuf)) return true; + break; + } + + //cerr << "NO: " << Result.NumOperands << " opcode: " << Result.Opcode + // << " Ty: " << Result.Ty->getName() << " arg1: " << Result.Arg1 << endl; + return false; +} + + +bool BytecodeParser::ParseInstruction(const uchar *&Buf, const uchar *EndBuf, + Instruction *&Res) { + RawInst Raw; + if (ParseRawInst(Buf, EndBuf, Raw)) return true;; + + if (Raw.Opcode >= Instruction::FirstUnaryOp && + Raw.Opcode < Instruction::NumUnaryOps && Raw.NumOperands == 1) { + Res = Instruction::getUnaryOperator(Raw.Opcode, getValue(Raw.Ty, Raw.Arg1)); + return false; + } else if (Raw.Opcode >= Instruction::FirstBinaryOp && + Raw.Opcode < Instruction::NumBinaryOps && Raw.NumOperands == 2) { + Res = Instruction::getBinaryOperator(Raw.Opcode, getValue(Raw.Ty, Raw.Arg1), + getValue(Raw.Ty, Raw.Arg2)); + return false; + } else if (Raw.Opcode == Instruction::PHINode) { + PHINode *PN = new PHINode(Raw.Ty); + switch (Raw.NumOperands) { + case 0: cerr << "Invalid phi node encountered!\n"; + delete PN; + return true; + case 1: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); break; + case 2: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg2)); break; + case 3: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg2)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg3)); break; + default: + PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); + PN->addIncoming(getValue(Raw.Ty, Raw.Arg2)); + { + vector<unsigned> &args = *Raw.VarArgs; + for (unsigned i = 0; i < args.size(); i++) + PN->addIncoming(getValue(Raw.Ty, args[i])); + } + delete Raw.VarArgs; + } + Res = PN; + return false; + } else if (Raw.Opcode == Instruction::Ret) { + if (Raw.NumOperands == 0) { + Res = new ReturnInst(); return false; + } else if (Raw.NumOperands == 1) { + Res = new ReturnInst(getValue(Raw.Ty, Raw.Arg1)); return false; + } + } else if (Raw.Opcode == Instruction::Br) { + if (Raw.NumOperands == 1) { + Res = new BranchInst((BasicBlock*)getValue(Type::LabelTy, Raw.Arg1)); + return false; + } else if (Raw.NumOperands == 3) { + Res = new BranchInst((BasicBlock*)getValue(Type::LabelTy, Raw.Arg1), + (BasicBlock*)getValue(Type::LabelTy, Raw.Arg2), + getValue(Type::BoolTy , Raw.Arg3)); + return false; + } + } else if (Raw.Opcode == Instruction::Switch) { + SwitchInst *I = + new SwitchInst(getValue(Raw.Ty, Raw.Arg1), + (BasicBlock*)getValue(Type::LabelTy, Raw.Arg2)); + Res = I; + if (Raw.NumOperands < 3) return false; // No destinations? Wierd. + + if (Raw.NumOperands == 3 || Raw.VarArgs->size() & 1) { + cerr << "Switch statement with odd number of arguments!\n"; + delete I; + return true; + } + + vector<unsigned> &args = *Raw.VarArgs; + for (unsigned i = 0; i < args.size(); i += 2) + I->dest_push_back((ConstPoolVal*)getValue(Raw.Ty, args[i]), + (BasicBlock*)getValue(Type::LabelTy, args[i+1])); + + delete Raw.VarArgs; + return false; + } else if (Raw.Opcode == Instruction::Call) { + Method *M = (Method*)getValue(Raw.Ty, Raw.Arg1); + if (M == 0) return true; + + const MethodType::ParamTypes &PL = M->getMethodType()->getParamTypes(); + MethodType::ParamTypes::const_iterator It = PL.begin(); + + vector<Value *> Params; + switch (Raw.NumOperands) { + case 0: cerr << "Invalid call instruction encountered!\n"; + return true; + case 1: break; + case 2: Params.push_back(getValue(*It++, Raw.Arg2)); break; + case 3: Params.push_back(getValue(*It++, Raw.Arg2)); + if (It == PL.end()) return true; + Params.push_back(getValue(*It++, Raw.Arg3)); break; + default: + Params.push_back(getValue(*It++, Raw.Arg2)); + { + vector<unsigned> &args = *Raw.VarArgs; + for (unsigned i = 0; i < args.size(); i++) { + if (It == PL.end()) return true; + Params.push_back(getValue(*It++, args[i])); + } + } + delete Raw.VarArgs; + } + if (It != PL.end()) return true; + + Res = new CallInst(M, Params); + return false; + } else if (Raw.Opcode == Instruction::Malloc) { + if (Raw.NumOperands > 2) return true; + Value *Sz = (Raw.NumOperands == 2) ? getValue(Type::UIntTy, Raw.Arg2) : 0; + Res = new MallocInst((ConstPoolType*)getValue(Type::TypeTy, Raw.Arg1), Sz); + return false; + } else if (Raw.Opcode == Instruction::Alloca) { + if (Raw.NumOperands > 2) return true; + Value *Sz = (Raw.NumOperands == 2) ? getValue(Type::UIntTy, Raw.Arg2) : 0; + Res = new AllocaInst((ConstPoolType*)getValue(Type::TypeTy, Raw.Arg1), Sz); + return false; + } else if (Raw.Opcode == Instruction::Free) { + Value *Val = getValue(Raw.Ty, Raw.Arg1); + if (!Val->getType()->isPointerType()) return true; + Res = new FreeInst(Val); + return false; + } + + cerr << "Unrecognized instruction! " << Raw.Opcode << endl; + return true; +} diff --git a/lib/Bytecode/Reader/Makefile b/lib/Bytecode/Reader/Makefile new file mode 100644 index 0000000..2c79d15 --- /dev/null +++ b/lib/Bytecode/Reader/Makefile @@ -0,0 +1,7 @@ + +LEVEL = ../../.. + +LIBRARYNAME = bcreader + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp new file mode 100644 index 0000000..c3f4c90 --- /dev/null +++ b/lib/Bytecode/Reader/Reader.cpp @@ -0,0 +1,478 @@ +//===- Reader.cpp - Code to read bytecode files -----------------------------=== +// +// This library implements the functionality defined in llvm/Bytecode/Reader.h +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Make error message outputs be configurable depending on an option? +// TODO: Allow passing in an option to ignore the symbol table +// +//===------------------------------------------------------------------------=== + +#include "llvm/Bytecode/Reader.h" +#include "llvm/Bytecode/Format.h" +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/iOther.h" +#include "ReaderInternals.h" +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <algorithm> + +bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) { + if (Ty->isPrimitiveType()) { + Slot = Ty->getPrimitiveID(); + } else { + TypeMapType::iterator I = TypeMap.find(Ty); + if (I == TypeMap.end()) return true; // Didn't find type! + Slot = I->second; + } + //cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << endl; + return false; +} + +const Type *BytecodeParser::getType(unsigned ID) { + const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID); + if (T) return T; + + //cerr << "Looking up Type ID: " << ID << endl; + + const Value *D = getValue(Type::TypeTy, ID, false); + if (D == 0) return 0; + + assert(D->getType() == Type::TypeTy && + D->getValueType() == Value::ConstantVal); + + + return ((const ConstPoolType*)D)->getValue();; +} + +bool BytecodeParser::insertValue(Value *Def, vector<ValueList> &ValueTab) { + unsigned type; + if (getTypeSlot(Def->getType(), type)) return true; + + if (ValueTab.size() <= type) + ValueTab.resize(type+1, ValueList()); + + //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size() + // << "] = " << Def << endl; + + if (type == Type::TypeTyID && Def->getValueType() == Value::ConstantVal) { + const Type *Ty = ((const ConstPoolType*)Def)->getValue(); + unsigned ValueOffset = FirstDerivedTyID; + + if (&ValueTab == &Values) // Take into consideration module level types + ValueOffset += ModuleValues[type].size(); + + if (TypeMap.find(Ty) == TypeMap.end()) + TypeMap[Ty] = ValueTab[type].size()+ValueOffset; + } + + ValueTab[type].push_back(Def); + + return false; +} + +Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) { + unsigned Num = oNum; + unsigned type; // The type plane it lives in... + + if (getTypeSlot(Ty, type)) return 0; // TODO: true + + if (type == Type::TypeTyID) { // The 'type' plane has implicit values + const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num); + if (T) return (Value*)T; // Asked for a primitive type... + + // Otherwise, derived types need offset... + Num -= FirstDerivedTyID; + } + + if (ModuleValues.size() > type) { + if (ModuleValues[type].size() > Num) + return ModuleValues[type][Num]; + Num -= ModuleValues[type].size(); + } + + if (Values.size() > type && Values[type].size() > Num) + return Values[type][Num]; + + if (!Create) return 0; // Do not create a placeholder? + + Value *d = 0; + switch (Ty->getPrimitiveID()) { + case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break; + case Type::MethodTyID: + cerr << "Creating method pholder! : " << type << ":" << oNum << " " + << Ty->getName() << endl; + d = new MethPHolder(Ty, oNum); + insertValue(d, LateResolveModuleValues); + return d; + default: d = new DefPHolder(Ty, oNum); break; + } + + assert(d != 0 && "How did we not make something?"); + if (insertValue(d, LateResolveValues)) return 0; + return d; +} + +bool BytecodeParser::postResolveValues(ValueTable &ValTab) { + bool Error = false; + for (unsigned ty = 0; ty < ValTab.size(); ty++) { + ValueList &DL = ValTab[ty]; + unsigned Size; + while ((Size = DL.size())) { + unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]); + + Value *D = DL[Size-1]; + DL.pop_back(); + + Value *NewDef = getValue(D->getType(), IDNumber, false); + if (NewDef == 0) { + Error = true; // Unresolved thinger + cerr << "Unresolvable reference found: <" << D->getType()->getName() + << ">:" << IDNumber << "!\n"; + } else { + // Fixup all of the uses of this placeholder def... + D->replaceAllUsesWith(NewDef); + + // Now that all the uses are gone, delete the placeholder... + // If we couldn't find a def (error case), then leak a little + delete D; // memory, 'cause otherwise we can't remove all uses! + } + } + } + + return Error; +} + +bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf, + BasicBlock *&BB) { + BB = new BasicBlock(); + + while (Buf < EndBuf) { + Instruction *Def; + if (ParseInstruction(Buf, EndBuf, Def)) { + delete BB; + return true; + } + + if (Def == 0) { delete BB; return true; } + if (insertValue(Def, Values)) { delete BB; return true; } + + BB->getInstList().push_back(Def); + } + + return false; +} + +bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf) { + while (Buf < EndBuf) { + // Symtab block header: [num entries][type id number] + unsigned NumEntries, Typ; + if (read_vbr(Buf, EndBuf, NumEntries) || + read_vbr(Buf, EndBuf, Typ)) return true; + const Type *Ty = getType(Typ); + if (Ty == 0) return true; + + for (unsigned i = 0; i < NumEntries; i++) { + // Symtab entry: [def slot #][name] + unsigned slot; + if (read_vbr(Buf, EndBuf, slot)) return true; + string Name; + if (read(Buf, EndBuf, Name, false)) // Not aligned... + return true; + + Value *D = getValue(Ty, slot, false); // Find mapping... + if (D == 0) return true; + D->setName(Name); + } + } + + return Buf > EndBuf; +} + + +bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf, + Module *C) { + // Clear out the local values table... + Values.clear(); + if (MethodSignatureList.empty()) return true; // Unexpected method! + + const MethodType *MTy = MethodSignatureList.front().first; + unsigned MethSlot = MethodSignatureList.front().second; + MethodSignatureList.pop_front(); + Method *M = new Method(MTy); + + const MethodType::ParamTypes &Params = MTy->getParamTypes(); + for (MethodType::ParamTypes::const_iterator It = Params.begin(); + It != Params.end(); It++) { + MethodArgument *MA = new MethodArgument(*It); + if (insertValue(MA, Values)) { delete M; return true; } + M->getArgumentList().push_back(MA); + } + + while (Buf < EndBuf) { + unsigned Type, Size; + const uchar *OldBuf = Buf; + if (readBlock(Buf, EndBuf, Type, Size)) { delete M; return true; } + + switch (Type) { + case BytecodeFormat::ConstantPool: + if (ParseConstantPool(Buf, Buf+Size, M->getConstantPool(), Values)) { + cerr << "Error reading constant pool!\n"; + delete M; return true; + } + break; + + case BytecodeFormat::BasicBlock: { + BasicBlock *BB; + if (ParseBasicBlock(Buf, Buf+Size, BB) || + insertValue(BB, Values)) { + cerr << "Error parsing basic block!\n"; + delete M; return true; // Parse error... :( + } + + M->getBasicBlocks().push_back(BB); + break; + } + + case BytecodeFormat::SymbolTable: + if (ParseSymbolTable(Buf, Buf+Size)) { + cerr << "Error reading method symbol table!\n"; + delete M; return true; + } + break; + + default: + Buf += Size; + if (OldBuf > Buf) return true; // Wrap around! + break; + } + if (align32(Buf, EndBuf)) { + delete M; // Malformed bc file, read past end of block. + return true; + } + } + + if (postResolveValues(LateResolveValues) || + postResolveValues(LateResolveModuleValues)) { + delete M; return true; // Unresolvable references! + } + + Value *MethPHolder = getValue(MTy, MethSlot, false); + assert(MethPHolder && "Something is broken no placeholder found!"); + assert(MethPHolder->getValueType() == Value::MethodVal && "Not a method?"); + + unsigned type; // Type slot + assert(!getTypeSlot(MTy, type) && "How can meth type not exist?"); + getTypeSlot(MTy, type); + + C->getMethodList().push_back(M); + + // Replace placeholder with the real method pointer... + ModuleValues[type][MethSlot] = M; + + // If anyone is using the placeholder make them use the real method instead + MethPHolder->replaceAllUsesWith(M); + + // We don't need the placeholder anymore! + delete MethPHolder; + + return false; +} + +bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End, + Module *C) { + + if (!MethodSignatureList.empty()) return true; // Two ModuleGlobal blocks? + + // Read the method signatures for all of the methods that are coming, and + // create fillers in the Value tables. + unsigned MethSignature; + if (read_vbr(Buf, End, MethSignature)) return true; + while (MethSignature != Type::VoidTyID) { // List is terminated by Void + const Type *Ty = getType(MethSignature); + if (!Ty || !Ty->isMethodType()) { + cerr << "Method not meth type! "; + if (Ty) cerr << Ty->getName(); else cerr << MethSignature; cerr << endl; + return true; + } + + // When the ModuleGlobalInfo section is read, we load the type of each method + // and the 'ModuleValues' slot that it lands in. We then load a placeholder + // into its slot to reserve it. When the method is loaded, this placeholder + // is replaced. + + // Insert the placeholder... + Value *Def = new MethPHolder(Ty, 0); + insertValue(Def, ModuleValues); + + // Figure out which entry of its typeslot it went into... + unsigned TypeSlot; + if (getTypeSlot(Def->getType(), TypeSlot)) return true; + + unsigned SlotNo = ModuleValues[TypeSlot].size()-1; + + // Keep track of this information in a linked list that is emptied as + // methods are loaded... + // + MethodSignatureList.push_back(make_pair((const MethodType*)Ty, SlotNo)); + if (read_vbr(Buf, End, MethSignature)) return true; + } + + if (align32(Buf, End)) return true; + + // This is for future proofing... in the future extra fields may be added that + // we don't understand, so we transparently ignore them. + // + Buf = End; + return false; +} + +bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf, + Module *&C) { + + unsigned Type, Size; + if (readBlock(Buf, EndBuf, Type, Size)) return true; + if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) + return true; // Hrm, not a class? + + MethodSignatureList.clear(); // Just in case... + + // Read into instance variables... + if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return true; + if (align32(Buf, EndBuf)) return true; + + C = new Module(); + + while (Buf < EndBuf) { + const uchar *OldBuf = Buf; + if (readBlock(Buf, EndBuf, Type, Size)) { delete C; return true; } + switch (Type) { + case BytecodeFormat::ModuleGlobalInfo: + if (ParseModuleGlobalInfo(Buf, Buf+Size, C)) { + cerr << "Error reading class global info section!\n"; + delete C; return true; + } + break; + + case BytecodeFormat::ConstantPool: + if (ParseConstantPool(Buf, Buf+Size, C->getConstantPool(), ModuleValues)) { + cerr << "Error reading class constant pool!\n"; + delete C; return true; + } + break; + + case BytecodeFormat::Method: { + if (ParseMethod(Buf, Buf+Size, C)) { + delete C; return true; // Error parsing method + } + break; + } + + case BytecodeFormat::SymbolTable: + if (ParseSymbolTable(Buf, Buf+Size)) { + cerr << "Error reading class symbol table!\n"; + delete C; return true; + } + break; + + default: + cerr << "Unknown class block: " << Type << endl; + Buf += Size; + if (OldBuf > Buf) return true; // Wrap around! + break; + } + if (align32(Buf, EndBuf)) { delete C; return true; } + } + + if (!MethodSignatureList.empty()) // Expected more methods! + return true; + return false; +} + +Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) { + LateResolveValues.clear(); + unsigned Sig; + // Read and check signature... + if (read(Buf, EndBuf, Sig) || + Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24)) + return 0; // Invalid signature! + + Module *Result; + if (ParseModule(Buf, EndBuf, Result)) return 0; + return Result; +} + + +Module *ParseBytecodeBuffer(const uchar *Buffer, unsigned Length) { + BytecodeParser Parser; + return Parser.ParseBytecode(Buffer, Buffer+Length); +} + +// Parse and return a class file... +// +Module *ParseBytecodeFile(const string &Filename) { + struct stat StatBuf; + Module *Result = 0; + + if (Filename != string("-")) { // Read from a file... + int FD = open(Filename.data(), O_RDONLY); + if (FD == -1) return 0; + + if (fstat(FD, &StatBuf) == -1) { close(FD); return 0; } + + int Length = StatBuf.st_size; + if (Length == 0) { close(FD); return 0; } + uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ, + MAP_PRIVATE, FD, 0); + if (Buffer == (uchar*)-1) { close(FD); return 0; } + + BytecodeParser Parser; + Result = Parser.ParseBytecode(Buffer, Buffer+Length); + + munmap((char*)Buffer, Length); + close(FD); + } else { // Read from stdin + size_t FileSize = 0; + int BlockSize; + uchar Buffer[4096], *FileData = 0; + while ((BlockSize = read(0, Buffer, 4))) { + if (BlockSize == -1) { free(FileData); return 0; } + + FileData = (uchar*)realloc(FileData, FileSize+BlockSize); + memcpy(FileData+FileSize, Buffer, BlockSize); + FileSize += BlockSize; + } + + if (FileSize == 0) { free(FileData); return 0; } + +#define ALIGN_PTRS 1 +#if ALIGN_PTRS + uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + assert((Buf != (uchar*)-1) && "mmap returned error!"); + free(FileData); + memcpy(Buf, FileData, FileSize); +#else + uchar *Buf = FileData; +#endif + + BytecodeParser Parser; + Result = Parser.ParseBytecode(Buf, Buf+FileSize); + +#if ALIGN_PTRS + munmap((char*)Buf, FileSize); // Free mmap'd data area +#else + free(FileData); // Free realloc'd block of memory +#endif + } + + return Result; +} diff --git a/lib/Bytecode/Reader/ReaderInternals.h b/lib/Bytecode/Reader/ReaderInternals.h new file mode 100644 index 0000000..3bb0472 --- /dev/null +++ b/lib/Bytecode/Reader/ReaderInternals.h @@ -0,0 +1,146 @@ +//===-- ReaderInternals.h - Definitions internal to the reader ---*- C++ -*--=// +// +// This header file defines various stuff that is used by the bytecode reader. +// +//===----------------------------------------------------------------------===// + +#ifndef READER_INTERNALS_H +#define READER_INTERNALS_H + +#include "llvm/Bytecode/Primitives.h" +#include "llvm/SymTabValue.h" +#include "llvm/Method.h" +#include "llvm/Instruction.h" +#include <map> +#include <utility> + +class BasicBlock; +class Method; +class Module; +class Type; + +typedef unsigned char uchar; + +struct RawInst { // The raw fields out of the bytecode stream... + unsigned NumOperands; + unsigned Opcode; + const Type *Ty; + unsigned Arg1, Arg2; + union { + unsigned Arg3; + vector<unsigned> *VarArgs; // Contains arg #3,4,5... if NumOperands > 3 + }; +}; + +class BytecodeParser { +public: + BytecodeParser() { + // Define this in case we don't see a ModuleGlobalInfo block. + FirstDerivedTyID = Type::FirstDerivedTyID; + } + + Module *ParseBytecode(const uchar *Buf, const uchar *EndBuf); +private: // All of this data is transient across calls to ParseBytecode + typedef vector<Value *> ValueList; + typedef vector<ValueList> ValueTable; + typedef map<const Type *, unsigned> TypeMapType; + ValueTable Values, LateResolveValues; + ValueTable ModuleValues, LateResolveModuleValues; + TypeMapType TypeMap; + + // Information read from the ModuleGlobalInfo section of the file... + unsigned FirstDerivedTyID; + + // When the ModuleGlobalInfo section is read, we load the type of each method + // and the 'ModuleValues' slot that it lands in. We then load a placeholder + // into its slot to reserve it. When the method is loaded, this placeholder + // is replaced. + // + list<pair<const MethodType *, unsigned> > MethodSignatureList; + +private: + bool ParseModule (const uchar * Buf, const uchar *End, Module *&); + bool ParseModuleGlobalInfo (const uchar *&Buf, const uchar *End, Module *); + bool ParseSymbolTable (const uchar *&Buf, const uchar *End); + bool ParseMethod (const uchar *&Buf, const uchar *End, Module *); + bool ParseBasicBlock (const uchar *&Buf, const uchar *End, BasicBlock *&); + bool ParseInstruction (const uchar *&Buf, const uchar *End, Instruction *&); + bool ParseRawInst (const uchar *&Buf, const uchar *End, RawInst &); + + bool ParseConstantPool(const uchar *&Buf, const uchar *EndBuf, + SymTabValue::ConstantPoolType &CP, ValueTable &Tab); + + + bool parseConstPoolValue(const uchar *&Buf, const uchar *End, + const Type *Ty, ConstPoolVal *&V); + bool parseTypeConstant (const uchar *&Buf, const uchar *, ConstPoolVal *&); + + Value *getValue(const Type *Ty, unsigned num, bool Create = true); + const Type *getType(unsigned ID); + + bool insertValue(Value *D, vector<ValueList> &D); + bool postResolveValues(ValueTable &ValTab); + + bool getTypeSlot(const Type *Ty, unsigned &Slot); +}; + +template<class SuperType> +class PlaceholderDef : public SuperType { + unsigned ID; +public: + PlaceholderDef(const Type *Ty, unsigned id) : SuperType(Ty), ID(id) {} + unsigned getID() { return ID; } +}; + +struct InstPlaceHolderHelper : public Instruction { + InstPlaceHolderHelper(const Type *Ty) : Instruction(Ty, UserOp1, "") {} + inline virtual void dropAllReferences() {} + virtual string getOpcode() const { return "placeholder"; } + + virtual Instruction *clone() const { abort(); return 0; } + + // No "operands"... + virtual Value *getOperand(unsigned i) { return 0; } + virtual const Value *getOperand(unsigned i) const { return 0; } + virtual bool setOperand(unsigned i, Value *Val) { return false; } + virtual unsigned getNumOperands() const { return 0; } +}; + +struct BBPlaceHolderHelper : public BasicBlock { + BBPlaceHolderHelper(const Type *Ty) : BasicBlock() { + assert(Ty->isLabelType()); + } +}; + +struct MethPlaceHolderHelper : public Method { + MethPlaceHolderHelper(const Type *Ty) + : Method((const MethodType*)Ty) { + assert(Ty->isMethodType() && "Method placeholders must be method types!"); + } +}; + +typedef PlaceholderDef<InstPlaceHolderHelper> DefPHolder; +typedef PlaceholderDef<BBPlaceHolderHelper> BBPHolder; +typedef PlaceholderDef<MethPlaceHolderHelper> MethPHolder; + +static inline unsigned getValueIDNumberFromPlaceHolder(Value *Def) { + switch (Def->getType()->getPrimitiveID()) { + case Type::LabelTyID: return ((BBPHolder*)Def)->getID(); + case Type::MethodTyID: return ((MethPHolder*)Def)->getID(); + default: return ((DefPHolder*)Def)->getID(); + } +} + +static inline bool readBlock(const uchar *&Buf, const uchar *EndBuf, + unsigned &Type, unsigned &Size) { +#if DEBUG_OUTPUT + bool Result = read(Buf, EndBuf, Type) || read(Buf, EndBuf, Size); + cerr << "StartLoc = " << ((unsigned)Buf & 4095) + << " Type = " << Type << " Size = " << Size << endl; + return Result; +#else + return read(Buf, EndBuf, Type) || read(Buf, EndBuf, Size); +#endif +} + +#endif diff --git a/lib/Bytecode/Writer/ConstantWriter.cpp b/lib/Bytecode/Writer/ConstantWriter.cpp new file mode 100644 index 0000000..e0504a5 --- /dev/null +++ b/lib/Bytecode/Writer/ConstantWriter.cpp @@ -0,0 +1,154 @@ +//===-- WriteConst.cpp - Functions for writing constants ---------*- C++ -*--=// +// +// This file implements the routines for encoding constants to a bytecode +// stream. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#include "WriterInternals.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/SymbolTable.h" +#include "llvm/DerivedTypes.h" + +void BytecodeWriter::outputType(const Type *T) { + output_vbr((unsigned)T->getPrimitiveID(), Out); + + // That's all there is to handling primitive types... + if (T->isPrimitiveType()) + return; // We might do this if we alias a prim type: %x = type int + + switch (T->getPrimitiveID()) { // Handle derived types now. + case Type::MethodTyID: { + const MethodType *MT = (const MethodType*)T; + int Slot = Table.getValSlot(MT->getReturnType()); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + + // Output all of the arguments... + MethodType::ParamTypes::const_iterator I = MT->getParamTypes().begin(); + for (; I != MT->getParamTypes().end(); I++) { + Slot = Table.getValSlot(*I); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + + // Terminate list with VoidTy + output_vbr((unsigned)Type::VoidTy->getPrimitiveID(), Out); + break; + } + + case Type::ArrayTyID: { + const ArrayType *AT = (const ArrayType*)T; + int Slot = Table.getValSlot(AT->getElementType()); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + //cerr << "Type slot = " << Slot << " Type = " << T->getName() << endl; + + output_vbr(AT->getNumElements(), Out); + break; + } + + case Type::StructTyID: { + const StructType *ST = (const StructType*)T; + + // Output all of the element types... + StructType::ElementTypes::const_iterator I = ST->getElementTypes().begin(); + for (; I != ST->getElementTypes().end(); I++) { + int Slot = Table.getValSlot(*I); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + + // Terminate list with VoidTy + output_vbr((unsigned)Type::VoidTy->getPrimitiveID(), Out); + break; + } + + case Type::PointerTyID: { + const PointerType *PT = (const PointerType*)T; + int Slot = Table.getValSlot(PT->getValueType()); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + break; + } + + case Type::ModuleTyID: + case Type::PackedTyID: + default: + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" + << " Type '" << T->getName() << "'\n"; + break; + } +} + +bool BytecodeWriter::outputConstant(const ConstPoolVal *CPV) { + switch (CPV->getType()->getPrimitiveID()) { + case Type::BoolTyID: // Boolean Types + if (((const ConstPoolBool*)CPV)->getValue()) + output_vbr((unsigned)1, Out); + else + output_vbr((unsigned)0, Out); + break; + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: + case Type::ULongTyID: + output_vbr(((const ConstPoolUInt*)CPV)->getValue(), Out); + break; + + case Type::SByteTyID: // Signed integer types... + case Type::ShortTyID: + case Type::IntTyID: + case Type::LongTyID: + output_vbr(((const ConstPoolSInt*)CPV)->getValue(), Out); + break; + + case Type::TypeTyID: // Serialize type type + outputType(((const ConstPoolType*)CPV)->getValue()); + break; + + case Type::ArrayTyID: { + const ConstPoolArray *CPA = (const ConstPoolArray *)CPV; + unsigned size = CPA->getValues().size(); + if (!((const ArrayType *)CPA->getType())->isSized()) + output_vbr(size, Out); // Not for sized arrays!!! + + for (unsigned i = 0; i < size; i++) { + int Slot = Table.getValSlot(CPA->getValues()[i]); + assert(Slot != -1 && "Constant used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + break; + } + + case Type::StructTyID: { + const ConstPoolStruct *CPS = (const ConstPoolStruct*)CPV; + const vector<ConstPoolUse> &Vals = CPS->getValues(); + + for (unsigned i = 0; i < Vals.size(); ++i) { + int Slot = Table.getValSlot(Vals[i]); + assert(Slot != -1 && "Constant used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + break; + } + + case Type::FloatTyID: // Floating point types... + case Type::DoubleTyID: + // TODO: Floating point type serialization + + + case Type::VoidTyID: + case Type::LabelTyID: + default: + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" + << " type '" << CPV->getType()->getName() << "'\n"; + break; + } + return false; +} diff --git a/lib/Bytecode/Writer/InstructionWriter.cpp b/lib/Bytecode/Writer/InstructionWriter.cpp new file mode 100644 index 0000000..c7c04ef --- /dev/null +++ b/lib/Bytecode/Writer/InstructionWriter.cpp @@ -0,0 +1,184 @@ +//===-- WriteInst.cpp - Functions for writing instructions -------*- C++ -*--=// +// +// This file implements the routines for encoding instruction opcodes to a +// bytecode stream. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#include "WriterInternals.h" +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/BasicBlock.h" +#include "llvm/Instruction.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Tools/DataTypes.h" +#include <algorithm> + +typedef unsigned char uchar; + +// outputInstructionFormat0 - Output those wierd instructions that have a large +// number of operands or have large operands themselves... +// +// Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>] +// +static void outputInstructionFormat0(const Instruction *I, + const SlotCalculator &Table, + unsigned Type, vector<uchar> &Out) { + // Opcode must have top two bits clear... + output_vbr(I->getInstType(), Out); // Instruction Opcode ID + output_vbr(Type, Out); // Result type + + unsigned NumArgs; // Count the number of arguments to the instruction + for (NumArgs = 0; I->getOperand(NumArgs); NumArgs++) /*empty*/; + output_vbr(NumArgs, Out); + + for (unsigned i = 0; const Value *N = I->getOperand(i); i++) { + assert(i < NumArgs && "Count of arguments failed!"); + + int Slot = Table.getValSlot(N); + output_vbr((unsigned)Slot, Out); + } + align32(Out); // We must maintain correct alignment! +} + + +// outputInstructionFormat1 - Output one operand instructions, knowing that no +// operand index is >= 2^12. +// +static void outputInstructionFormat1(const Instruction *I, + const SlotCalculator &Table, int *Slots, + unsigned Type, vector<uchar> &Out) { + unsigned IType = I->getInstType(); // Instruction Opcode ID + + // bits Instruction format: + // -------------------------- + // 31-30: Opcode type, fixed to 1. + // 29-24: Opcode + // 23-12: Resulting type plane + // 11- 0: Operand #1 (if set to (2^12-1), then zero operands) + // + unsigned Opcode = (1 << 30) | (IType << 24) | (Type << 12) | Slots[0]; + // cerr << "1 " << IType << " " << Type << " " << Slots[0] << endl; + output(Opcode, Out); +} + + +// outputInstructionFormat2 - Output two operand instructions, knowing that no +// operand index is >= 2^8. +// +static void outputInstructionFormat2(const Instruction *I, + const SlotCalculator &Table, int *Slots, + unsigned Type, vector<uchar> &Out) { + unsigned IType = I->getInstType(); // Instruction Opcode ID + + // bits Instruction format: + // -------------------------- + // 31-30: Opcode type, fixed to 2. + // 29-24: Opcode + // 23-16: Resulting type plane + // 15- 8: Operand #1 + // 7- 0: Operand #2 + // + unsigned Opcode = (2 << 30) | (IType << 24) | (Type << 16) | + (Slots[0] << 8) | (Slots[1] << 0); + // cerr << "2 " << IType << " " << Type << " " << Slots[0] << " " + // << Slots[1] << endl; + output(Opcode, Out); +} + + +// outputInstructionFormat3 - Output three operand instructions, knowing that no +// operand index is >= 2^6. +// +static void outputInstructionFormat3(const Instruction *I, + const SlotCalculator &Table, int *Slots, + unsigned Type, vector<uchar> &Out) { + unsigned IType = I->getInstType(); // Instruction Opcode ID + + // bits Instruction format: + // -------------------------- + // 31-30: Opcode type, fixed to 3 + // 29-24: Opcode + // 23-18: Resulting type plane + // 17-12: Operand #1 + // 11- 6: Operand #2 + // 5- 0: Operand #3 + // + unsigned Opcode = (3 << 30) | (IType << 24) | (Type << 18) | + (Slots[0] << 12) | (Slots[1] << 6) | (Slots[2] << 0); + // cerr << "3 " << IType << " " << Type << " " << Slots[0] << " " + // << Slots[1] << " " << Slots[2] << endl; + output(Opcode, Out); +} + +bool BytecodeWriter::processInstruction(const Instruction *I) { + assert(I->getInstType() < 64 && "Opcode too big???"); + + unsigned NumOperands = 0; + int MaxOpSlot = 0; + int Slots[3]; Slots[0] = (1 << 12)-1; + + const Value *Def; + while ((Def = I->getOperand(NumOperands))) { + int slot = Table.getValSlot(Def); + assert(slot != -1 && "Broken bytecode!"); + if (slot > MaxOpSlot) MaxOpSlot = slot; + if (NumOperands < 3) Slots[NumOperands] = slot; + NumOperands++; + } + + // Figure out which type to encode with the instruction. Typically we want + // the type of the first parameter, as opposed to the type of the instruction + // (for example, with setcc, we always know it returns bool, but the type of + // the first param is actually interesting). But if we have no arguments + // we take the type of the instruction itself. + // + + const Type *Ty; + if (NumOperands) + Ty = I->getOperand(0)->getType(); + else + Ty = I->getType(); + + unsigned Type; + int Slot = Table.getValSlot(Ty); + assert(Slot != -1 && "Type not available!!?!"); + Type = (unsigned)Slot; + + + // Decide which instruction encoding to use. This is determined primarily by + // the number of operands, and secondarily by whether or not the max operand + // will fit into the instruction encoding. More operands == fewer bits per + // operand. + // + switch (NumOperands) { + case 0: + case 1: + if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops + outputInstructionFormat1(I, Table, Slots, Type, Out); + return false; + } + break; + + case 2: + if (MaxOpSlot < (1 << 8)) { + outputInstructionFormat2(I, Table, Slots, Type, Out); + return false; + } + break; + + case 3: + if (MaxOpSlot < (1 << 6)) { + outputInstructionFormat3(I, Table, Slots, Type, Out); + return false; + } + break; + } + + outputInstructionFormat0(I, Table, Type, Out); + return false; +} diff --git a/lib/Bytecode/Writer/Makefile b/lib/Bytecode/Writer/Makefile new file mode 100644 index 0000000..c03db56 --- /dev/null +++ b/lib/Bytecode/Writer/Makefile @@ -0,0 +1,7 @@ + +LEVEL = ../../.. + +LIBRARYNAME = bcwriter + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bytecode/Writer/SlotCalculator.cpp b/lib/Bytecode/Writer/SlotCalculator.cpp new file mode 100644 index 0000000..01fae37 --- /dev/null +++ b/lib/Bytecode/Writer/SlotCalculator.cpp @@ -0,0 +1,195 @@ +//===-- SlotCalculator.cpp - Calculate what slots values land in ------------=// +// +// This file implements a useful analysis step to figure out what numbered +// slots values in a program will land in (keeping track of per plane +// information as required. +// +// This is used primarily for when writing a file to disk, either in bytecode +// or source format. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/SlotCalculator.h" +#include "llvm/ConstantPool.h" +#include "llvm/Method.h" +#include "llvm/Module.h" +#include "llvm/BasicBlock.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/iOther.h" +#include "llvm/DerivedTypes.h" + +SlotCalculator::SlotCalculator(const Module *M, bool IgnoreNamed) { + IgnoreNamedNodes = IgnoreNamed; + TheModule = M; + + // Preload table... Make sure that all of the primitive types are in the table + // and that their Primitive ID is equal to their slot # + // + for (unsigned i = 0; i < Type::FirstDerivedTyID; ++i) { + assert(Type::getPrimitiveType((Type::PrimitiveID)i)); + insertVal(Type::getPrimitiveType((Type::PrimitiveID)i)); + } + + if (M == 0) return; // Empty table... + + bool Result = processModule(M); + assert(Result == false && "Error in processModule!"); +} + +SlotCalculator::SlotCalculator(const Method *M, bool IgnoreNamed) { + IgnoreNamedNodes = IgnoreNamed; + TheModule = M ? M->getParent() : 0; + + // Preload table... Make sure that all of the primitive types are in the table + // and that their Primitive ID is equal to their slot # + // + for (unsigned i = 0; i < Type::FirstDerivedTyID; ++i) { + assert(Type::getPrimitiveType((Type::PrimitiveID)i)); + insertVal(Type::getPrimitiveType((Type::PrimitiveID)i)); + } + + if (TheModule == 0) return; // Empty table... + + bool Result = processModule(TheModule); + assert(Result == false && "Error in processModule!"); + + incorporateMethod(M); +} + +void SlotCalculator::incorporateMethod(const Method *M) { + assert(ModuleLevel.size() == 0 && "Module already incorporated!"); + + // Save the Table state before we process the method... + for (unsigned i = 0; i < Table.size(); ++i) { + ModuleLevel.push_back(Table[i].size()); + } + + // Process the method to incorporate its values into our table + processMethod(M); +} + +void SlotCalculator::purgeMethod() { + assert(ModuleLevel.size() != 0 && "Module not incorporated!"); + unsigned NumModuleTypes = ModuleLevel.size(); + + // First, remove values from existing type planes + for (unsigned i = 0; i < NumModuleTypes; ++i) { + unsigned ModuleSize = ModuleLevel[i]; // Size of plane before method came + while (Table[i].size() != ModuleSize) { + NodeMap.erase(NodeMap.find(Table[i].back())); // Erase from nodemap + Table[i].pop_back(); // Shrink plane + } + } + + // We don't need this state anymore, free it up. + ModuleLevel.clear(); + + // Next, remove any type planes defined by the method... + while (NumModuleTypes != Table.size()) { + TypePlane &Plane = Table.back(); + while (Plane.size()) { + NodeMap.erase(NodeMap.find(Plane.back())); // Erase from nodemap + Plane.pop_back(); // Shrink plane + } + + Table.pop_back(); // Nuke the plane, we don't like it. + } +} + +bool SlotCalculator::processConstant(const ConstPoolVal *CPV) { + //cerr << "Inserting constant: '" << CPV->getStrValue() << endl; + insertVal(CPV); + return false; +} + +// processType - This callback occurs when an derived type is discovered +// at the class level. This activity occurs when processing a constant pool. +// +bool SlotCalculator::processType(const Type *Ty) { + //cerr << "processType: " << Ty->getName() << endl; + // TODO: Don't leak memory!!! Free this in the dtor! + insertVal(new ConstPoolType(Ty)); + return false; +} + +bool SlotCalculator::visitMethod(const Method *M) { + //cerr << "visitMethod: '" << M->getType()->getName() << "'\n"; + insertVal(M); + return false; +} + +bool SlotCalculator::processMethodArgument(const MethodArgument *MA) { + insertVal(MA); + return false; +} + +bool SlotCalculator::processBasicBlock(const BasicBlock *BB) { + insertVal(BB); + ModuleAnalyzer::processBasicBlock(BB); // Lets visit the instructions too! + return false; +} + +bool SlotCalculator::processInstruction(const Instruction *I) { + insertVal(I); + return false; +} + +int SlotCalculator::getValSlot(const Value *D) const { + map<const Value*, unsigned>::const_iterator I = NodeMap.find(D); + if (I == NodeMap.end()) return -1; + + return (int)I->second; +} + +void SlotCalculator::insertVal(const Value *D) { + if (D == 0) return; + + // If this node does not contribute to a plane, or if the node has a + // name and we don't want names, then ignore the silly node... + // + if (D->getType() == Type::VoidTy || (IgnoreNamedNodes && D->hasName())) + return; + + const Type *Typ = D->getType(); + unsigned Ty = Typ->getPrimitiveID(); + if (Typ->isDerivedType()) { + int DefSlot = getValSlot(Typ); + if (DefSlot == -1) { // Have we already entered this type? + // This can happen if a type is first seen in an instruction. For + // example, if you say 'malloc uint', this defines a type 'uint*' that + // may be undefined at this point. + // + cerr << "SHOULDNT HAPPEN Adding Type ba: " << Typ->getName() << endl; + assert(0 && "SHouldn't this be taken care of by processType!?!?!"); + // Nope... add this to the Type plane now! + insertVal(Typ); + + DefSlot = getValSlot(Typ); + assert(DefSlot >= 0 && "Type didn't get inserted correctly!"); + } + Ty = (unsigned)DefSlot; + } + + if (Table.size() <= Ty) // Make sure we have the type plane allocated... + Table.resize(Ty+1, TypePlane()); + + // Insert node into table and NodeMap... + NodeMap[D] = Table[Ty].size(); + + if (Typ == Type::TypeTy && // If it's a type constant, add the Type also + D->getValueType() != Value::TypeVal) { + assert(D->getValueType() == Value::ConstantVal && + "All Type instances should be constant types!"); + + const ConstPoolType *CPT = (const ConstPoolType*)D; + int Slot = getValSlot(CPT->getValue()); + if (Slot == -1) { + // Only add if it's not already here! + NodeMap[CPT->getValue()] = Table[Ty].size(); + } else if (!CPT->hasName()) { // If the type has no name... + NodeMap[D] = (unsigned)Slot; // Don't readd type, merge. + return; + } + } + Table[Ty].push_back(D); +} diff --git a/lib/Bytecode/Writer/SlotCalculator.h b/lib/Bytecode/Writer/SlotCalculator.h new file mode 100644 index 0000000..99e40cb --- /dev/null +++ b/lib/Bytecode/Writer/SlotCalculator.h @@ -0,0 +1,96 @@ +//===-- llvm/Analysis/SlotCalculator.h - Calculate value slots ---*- C++ -*-==// +// +// This ModuleAnalyzer subclass calculates the slots that values will land in. +// This is useful for when writing bytecode or assembly out, because you have +// to know these things. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SLOTCALCULATOR_H +#define LLVM_ANALYSIS_SLOTCALCULATOR_H + +#include "llvm/Analysis/ModuleAnalyzer.h" +#include "llvm/SymTabValue.h" +#include <vector> +#include <map> + +class SlotCalculator : public ModuleAnalyzer { + const Module *TheModule; + bool IgnoreNamedNodes; // Shall we not count named nodes? + + typedef vector<const Value*> TypePlane; + vector <TypePlane> Table; + map<const Value *, unsigned> NodeMap; + + // ModuleLevel - Used to keep track of which values belong to the module, + // and which values belong to the currently incorporated method. + // + vector <unsigned> ModuleLevel; + +public: + SlotCalculator(const Module *M, bool IgnoreNamed); + SlotCalculator(const Method *M, bool IgnoreNamed);// Start out in incorp state + inline ~SlotCalculator() {} + + // getValSlot returns < 0 on error! + int getValSlot(const Value *D) const; + + inline unsigned getNumPlanes() const { return Table.size(); } + inline unsigned getModuleLevel(unsigned Plane) const { + return Plane < ModuleLevel.size() ? ModuleLevel[Plane] : 0; + } + + inline const TypePlane &getPlane(unsigned Plane) const { + return Table[Plane]; + } + + // If you'd like to deal with a method, use these two methods to get its data + // into the SlotCalculator! + // + void incorporateMethod(const Method *M); + void purgeMethod(); + +protected: + // insertVal - Insert a value into the value table... + // + void insertVal(const Value *D); + + // visitMethod - This member is called after the constant pool has been + // processed. The default implementation of this is a noop. + // + virtual bool visitMethod(const Method *M); + + // processConstant is called once per each constant in the constant pool. It + // traverses the constant pool such that it visits each constant in the + // order of its type. Thus, all 'int' typed constants shall be visited + // sequentially, etc... + // + virtual bool processConstant(const ConstPoolVal *CPV); + + // processType - This callback occurs when an derived type is discovered + // at the class level. This activity occurs when processing a constant pool. + // + virtual bool processType(const Type *Ty); + + // processMethods - The default implementation of this method loops through + // all of the methods in the module and processModule's them. We don't want + // this (we want to explicitly visit them with incorporateMethod), so we + // disable it. + // + virtual bool processMethods(const Module *M) { return false; } + + // processMethodArgument - This member is called for every argument that + // is passed into the method. + // + virtual bool processMethodArgument(const MethodArgument *MA); + + // processBasicBlock - This member is called for each basic block in a methd. + // + virtual bool processBasicBlock(const BasicBlock *BB); + + // processInstruction - This member is called for each Instruction in a methd. + // + virtual bool processInstruction(const Instruction *I); +}; + +#endif diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp new file mode 100644 index 0000000..d03c945 --- /dev/null +++ b/lib/Bytecode/Writer/Writer.cpp @@ -0,0 +1,182 @@ +//===-- Writer.cpp - Library for writing VM bytecode files -------*- C++ -*--=// +// +// This library implements the functionality defined in llvm/Bytecode/Writer.h +// +// This library uses the Analysis library to figure out offsets for +// variables in the method tables... +// +// Note that this file uses an unusual technique of outputting all the bytecode +// to a vector of unsigned char's, then copies the vector to an ostream. The +// reason for this is that we must do "seeking" in the stream to do back- +// patching, and some very important ostreams that we want to support (like +// pipes) do not support seeking. :( :( :( +// +// The choice of the vector data structure is influenced by the extremely fast +// "append" speed, plus the free "seek"/replace in the middle of the stream. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#include "WriterInternals.h" +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/BasicBlock.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/SymbolTable.h" +#include "llvm/DerivedTypes.h" +#include <string.h> +#include <algorithm> + +BytecodeWriter::BytecodeWriter(vector<unsigned char> &o, const Module *M) + : Out(o), Table(M, false) { + + outputSignature(); + + // Emit the top level CLASS block. + BytecodeBlock ModuleBlock(BytecodeFormat::Module, Out); + + // Output largest ID of first "primitive" type: + output_vbr((unsigned)Type::FirstDerivedTyID, Out); + align32(Out); + + // Do the whole module now! + processModule(M); + + // If needed, output the symbol table for the class... + if (M->hasSymbolTable()) + outputSymbolTable(*M->getSymbolTable()); +} + +// TODO: REMOVE +#include "llvm/Assembly/Writer.h" + +bool BytecodeWriter::processConstPool(const ConstantPool &CP, bool isMethod) { + BytecodeBlock *CPool = new BytecodeBlock(BytecodeFormat::ConstantPool, Out); + + unsigned NumPlanes = Table.getNumPlanes(); + + for (unsigned pno = 0; pno < NumPlanes; pno++) { + const vector<const Value*> &Plane = Table.getPlane(pno); + if (Plane.empty()) continue; // Skip empty type planes... + + unsigned ValNo = 0; // Don't reemit module constants + if (isMethod) ValNo = Table.getModuleLevel(pno); + + unsigned NumConstants = 0; + for (unsigned vn = ValNo; vn < Plane.size(); vn++) + if (Plane[vn]->getValueType() == Value::ConstantVal) + NumConstants++; + + if (NumConstants == 0) continue; // Skip empty type planes... + + // Output type header: [num entries][type id number] + // + output_vbr(NumConstants, Out); + + // Output the Type ID Number... + int Slot = Table.getValSlot(Plane.front()->getType()); + assert (Slot != -1 && "Type in constant pool but not in method!!"); + output_vbr((unsigned)Slot, Out); + + //cerr << "NC: " << NumConstants << " Slot = " << hex << Slot << endl; + + for (; ValNo < Plane.size(); ValNo++) { + const Value *V = Plane[ValNo]; + if (V->getValueType() == Value::ConstantVal) { + //cerr << "Serializing value: <" << V->getType() << ">: " + // << ((const ConstPoolVal*)V)->getStrValue() << ":" + // << Out.size() << "\n"; + outputConstant((const ConstPoolVal*)V); + } + } + } + + delete CPool; // End bytecode block section! + + if (!isMethod) { // The ModuleInfoBlock follows directly after the c-pool + assert(CP.getParent()->getValueType() == Value::ModuleVal); + outputModuleInfoBlock((const Module*)CP.getParent()); + } + + return false; +} + +void BytecodeWriter::outputModuleInfoBlock(const Module *M) { + BytecodeBlock ModuleInfoBlock(BytecodeFormat::ModuleGlobalInfo, Out); + + // Output the types of the methods in this class + Module::MethodListType::const_iterator I = M->getMethodList().begin(); + while (I != M->getMethodList().end()) { + int Slot = Table.getValSlot((*I)->getType()); + assert(Slot != -1 && "Module const pool is broken!"); + assert(Slot >= Type::FirstDerivedTyID && "Derived type not in range!"); + output_vbr((unsigned)Slot, Out); + I++; + } + output_vbr((unsigned)Table.getValSlot(Type::VoidTy), Out); + align32(Out); +} + +bool BytecodeWriter::processMethod(const Method *M) { + BytecodeBlock MethodBlock(BytecodeFormat::Method, Out); + + Table.incorporateMethod(M); + + if (ModuleAnalyzer::processMethod(M)) return true; + + // If needed, output the symbol table for the method... + if (M->hasSymbolTable()) + outputSymbolTable(*M->getSymbolTable()); + + Table.purgeMethod(); + return false; +} + + +bool BytecodeWriter::processBasicBlock(const BasicBlock *BB) { + BytecodeBlock MethodBlock(BytecodeFormat::BasicBlock, Out); + return ModuleAnalyzer::processBasicBlock(BB); +} + +void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { + BytecodeBlock MethodBlock(BytecodeFormat::SymbolTable, Out); + + for (SymbolTable::const_iterator TI = MST.begin(); TI != MST.end(); TI++) { + SymbolTable::type_const_iterator I = MST.type_begin(TI->first); + SymbolTable::type_const_iterator End = MST.type_end(TI->first); + int Slot; + + if (I == End) continue; // Don't mess with an absent type... + + // Symtab block header: [num entries][type id number] + output_vbr(MST.type_size(TI->first), Out); + + Slot = Table.getValSlot(TI->first); + assert(Slot != -1 && "Type in symtab, but not in table!"); + output_vbr((unsigned)Slot, Out); + + for (; I != End; I++) { + // Symtab entry: [def slot #][name] + Slot = Table.getValSlot(I->second); + assert (Slot != -1 && "Value in symtab but not in method!!"); + output_vbr((unsigned)Slot, Out); + output(I->first, Out, false); // Don't force alignment... + } + } +} + +void WriteBytecodeToFile(const Module *C, ostream &Out) { + assert(C && "You can't write a null class!!"); + + vector<unsigned char> Buffer; + + // This object populates buffer for us... + BytecodeWriter BCW(Buffer, C); + + // Okay, write the vector out to the ostream now... + Out.write(&Buffer[0], Buffer.size()); + Out.flush(); +} diff --git a/lib/Bytecode/Writer/WriterInternals.h b/lib/Bytecode/Writer/WriterInternals.h new file mode 100644 index 0000000..be9ccf9 --- /dev/null +++ b/lib/Bytecode/Writer/WriterInternals.h @@ -0,0 +1,74 @@ +//===-- WriterInternals.h - Data structures shared by the Writer -*- C++ -*--=// +// +// This header defines the interface used between components of the bytecode +// writer. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_BYTECODE_WRITER_WRITERINTERNALS_H +#define LLVM_LIB_BYTECODE_WRITER_WRITERINTERNALS_H + +#include "llvm/Bytecode/Writer.h" +#include "llvm/Bytecode/Format.h" +#include "llvm/Bytecode/Primitives.h" +#include "llvm/Analysis/SlotCalculator.h" +#include "llvm/Tools/DataTypes.h" +#include "llvm/Instruction.h" + +class BytecodeWriter : public ModuleAnalyzer { + vector<unsigned char> &Out; + SlotCalculator Table; +public: + BytecodeWriter(vector<unsigned char> &o, const Module *M); + +protected: + virtual bool processConstPool(const ConstantPool &CP, bool isMethod); + virtual bool processMethod(const Method *M); + virtual bool processBasicBlock(const BasicBlock *BB); + virtual bool processInstruction(const Instruction *I); + +private : + inline void outputSignature() { + static const unsigned char *Sig = (const unsigned char*)"llvm"; + Out.insert(Out.end(), Sig, Sig+4); // output the bytecode signature... + } + + void outputModuleInfoBlock(const Module *C); + void outputSymbolTable(const SymbolTable &ST); + bool outputConstant(const ConstPoolVal *CPV); + void outputType(const Type *T); +}; + + + + +// BytecodeBlock - Little helper class that helps us do backpatching of bytecode +// block sizes really easily. It backpatches when it goes out of scope. +// +class BytecodeBlock { + unsigned Loc; + vector<unsigned char> &Out; + + BytecodeBlock(const BytecodeBlock &); // do not implement + void operator=(const BytecodeBlock &); // do not implement +public: + inline BytecodeBlock(unsigned ID, vector<unsigned char> &o) : Out(o) { + output(ID, Out); + output((unsigned)0, Out); // Reserve the space for the block size... + Loc = Out.size(); + } + + inline ~BytecodeBlock() { // Do backpatch when block goes out + // of scope... + // cerr << "OldLoc = " << Loc << " NewLoc = " << NewLoc << " diff = " << (NewLoc-Loc) << endl; + output((unsigned)(Out.size()-Loc), Out, (int)Loc-4); + align32(Out); // Blocks must ALWAYS be aligned + } +}; + + +#endif |