diff options
Diffstat (limited to 'lib/Bytecode')
-rw-r--r-- | lib/Bytecode/Analyzer/Analyzer.cpp | 242 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/AnalyzerInternals.h | 65 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/AnalyzerWrappers.cpp | 208 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/BytecodeHandler.cpp | 220 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/BytecodeHandler.h | 247 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/Dumper.cpp | 311 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/Makefile | 13 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/Parser.cpp | 877 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/Parser.h | 178 | ||||
-rw-r--r-- | lib/Bytecode/Analyzer/ReaderPrimitives.h | 101 | ||||
-rw-r--r-- | lib/Bytecode/Makefile | 2 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Analyzer.cpp | 242 | ||||
-rw-r--r-- | lib/Bytecode/Reader/AnalyzerInternals.h | 65 | ||||
-rw-r--r-- | lib/Bytecode/Reader/AnalyzerWrappers.cpp | 208 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Dumper.cpp | 311 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Parser.cpp | 877 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Parser.h | 178 |
17 files changed, 4344 insertions, 1 deletions
diff --git a/lib/Bytecode/Analyzer/Analyzer.cpp b/lib/Bytecode/Analyzer/Analyzer.cpp new file mode 100644 index 0000000..99c3e41 --- /dev/null +++ b/lib/Bytecode/Analyzer/Analyzer.cpp @@ -0,0 +1,242 @@ +//===-- BytecodeHandler.cpp - Parsing Handler -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the BytecodeHandler class that gets called by the +// AbstractBytecodeParser when parsing events occur. +// +//===----------------------------------------------------------------------===// + +#include "AnalyzerInternals.h" + +using namespace llvm; + + +namespace { + +class AnalyzerHandler : public BytecodeHandler { +public: + bool handleError(const std::string& str ) + { + return false; + } + + void handleStart() + { + } + + void handleFinish() + { + } + + void handleModuleBegin(const std::string& id) + { + } + + void handleModuleEnd(const std::string& id) + { + } + + void handleVersionInfo( + unsigned char RevisionNum, ///< Byte code revision number + Module::Endianness Endianness, ///< Endianness indicator + Module::PointerSize PointerSize ///< PointerSize indicator + ) + { + } + + void handleModuleGlobalsBegin() + { + } + + void handleGlobalVariable( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes ///< The linkage type of the GV + ) + { + } + + void handleInitializedGV( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes,///< The linkage type of the GV + unsigned initSlot ///< Slot number of GV's initializer + ) + { + } + + virtual void handleType( const Type* Ty ) + { + } + + void handleFunctionDeclaration( + const Type* FuncType ///< The type of the function + ) + { + } + + void handleModuleGlobalsEnd() + { + } + + void handleCompactionTableBegin() + { + } + + void handleCompactionTablePlane( + unsigned Ty, + unsigned NumEntries + ) + { + } + + void handleCompactionTableType( + unsigned i, + unsigned TypSlot, + const Type* + ) + { + } + + void handleCompactionTableValue( + unsigned i, + unsigned ValSlot, + const Type* + ) + { + } + + void handleCompactionTableEnd() + { + } + + void handleSymbolTableBegin() + { + } + + void handleSymbolTablePlane( + unsigned Ty, + unsigned NumEntries, + const Type* Typ + ) + { + } + + void handleSymbolTableType( + unsigned i, + unsigned slot, + const std::string& name + ) + { + } + + void handleSymbolTableValue( + unsigned i, + unsigned slot, + const std::string& name + ) + { + } + + void handleSymbolTableEnd() + { + } + + void handleFunctionBegin( + const Type* FType, + GlobalValue::LinkageTypes linkage + ) + { + } + + void handleFunctionEnd( + const Type* FType + ) + { + } + + void handleBasicBlockBegin( + unsigned blocknum + ) + { + } + + bool handleInstruction( + unsigned Opcode, + const Type* iType, + std::vector<unsigned>& Operands + ) + { + return false; + } + + void handleBasicBlockEnd(unsigned blocknum) + { + } + + void handleGlobalConstantsBegin() + { + } + + void handleConstantExpression( + unsigned Opcode, + const Type* Typ, + std::vector<std::pair<const Type*,unsigned> > ArgVec + ) + { + } + + void handleConstantValue( Constant * c ) + { + } + + void handleConstantArray( + const ArrayType* AT, + std::vector<unsigned>& Elements ) + { + } + + void handleConstantStruct( + const StructType* ST, + std::vector<unsigned>& ElementSlots) + { + } + + void handleConstantPointer( + const PointerType* PT, unsigned Slot) + { + } + + void handleConstantString( const ConstantArray* CA ) + { + } + + + void handleGlobalConstantsEnd() + { + } + +}; + +} + +void llvm::BytecodeAnalyzer::AnalyzeBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID +) +{ + AnalyzerHandler TheHandler; + AbstractBytecodeParser TheParser(&TheHandler); + TheParser.ParseBytecode( Buf, Length, ModuleID ); + TheParser.ParseAllFunctionBodies(); +} + +// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/AnalyzerInternals.h b/lib/Bytecode/Analyzer/AnalyzerInternals.h new file mode 100644 index 0000000..d9a2e84 --- /dev/null +++ b/lib/Bytecode/Analyzer/AnalyzerInternals.h @@ -0,0 +1,65 @@ +//===-- ReaderInternals.h - Definitions internal to the reader --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines various stuff that is used by the bytecode reader. +// +//===----------------------------------------------------------------------===// + +#ifndef ANALYZER_INTERNALS_H +#define ANALYZER_INTERNALS_H + +#include "Parser.h" +#include "llvm/Bytecode/Analyzer.h" + +// Enable to trace to figure out what the heck is going on when parsing fails +//#define TRACE_LEVEL 10 +//#define DEBUG_OUTPUT + +#if TRACE_LEVEL // ByteCodeReading_TRACEr +#define BCR_TRACE(n, X) \ + if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X +#else +#define BCR_TRACE(n, X) +#endif + +namespace llvm { + +class BytecodeAnalyzer { + BytecodeAnalyzer(const BytecodeAnalyzer &); // DO NOT IMPLEMENT + void operator=(const BytecodeAnalyzer &); // DO NOT IMPLEMENT +public: + BytecodeAnalyzer() { } + ~BytecodeAnalyzer() { } + + void AnalyzeBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID + ); + + void DumpBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID + ); + + void dump() const { + std::cerr << "BytecodeParser instance!\n"; + } +private: + BytecodeAnalysis TheAnalysis; +}; + +} // End llvm namespace + +#endif + +// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp b/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp new file mode 100644 index 0000000..a0e4845 --- /dev/null +++ b/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp @@ -0,0 +1,208 @@ +//===- AnalyzerWrappers.cpp - Analyze bytecode from file or buffer -------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements loading and analysis of a bytecode file and analyzing a +// bytecode buffer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bytecode/Analyzer.h" +#include "AnalyzerInternals.h" +#include "Support/FileUtilities.h" +#include "Support/StringExtras.h" +#include "Config/unistd.h" +#include <cerrno> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// BytecodeFileAnalyzer - Analyze from an mmap'able file descriptor. +// + +namespace { + /// BytecodeFileAnalyzer - parses a bytecode file from a file + class BytecodeFileAnalyzer : public BytecodeAnalyzer { + private: + unsigned char *Buffer; + unsigned Length; + + BytecodeFileAnalyzer(const BytecodeFileAnalyzer&); // Do not implement + void operator=(const BytecodeFileAnalyzer &BFR); // Do not implement + + public: + BytecodeFileAnalyzer(const std::string &Filename, BytecodeAnalysis& bca); + ~BytecodeFileAnalyzer(); + }; +} + +static std::string ErrnoMessage (int savedErrNum, std::string descr) { + return ::strerror(savedErrNum) + std::string(", while trying to ") + descr; +} + +BytecodeFileAnalyzer::BytecodeFileAnalyzer(const std::string &Filename, + BytecodeAnalysis& bca) { + Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length); + if (Buffer == 0) + throw "Error reading file '" + Filename + "'."; + + try { + // Parse the bytecode we mmapped in + if ( bca.dumpBytecode ) + DumpBytecode(Buffer, Length, bca, Filename); + AnalyzeBytecode(Buffer, Length, bca, Filename); + } catch (...) { + UnmapFileFromAddressSpace(Buffer, Length); + throw; + } +} + +BytecodeFileAnalyzer::~BytecodeFileAnalyzer() { + // Unmmap the bytecode... + UnmapFileFromAddressSpace(Buffer, Length); +} + +//===----------------------------------------------------------------------===// +// BytecodeBufferAnalyzer - Read from a memory buffer +// + +namespace { + /// BytecodeBufferAnalyzer - parses a bytecode file from a buffer + /// + class BytecodeBufferAnalyzer : public BytecodeAnalyzer { + private: + const unsigned char *Buffer; + bool MustDelete; + + BytecodeBufferAnalyzer(const BytecodeBufferAnalyzer&); // Do not implement + void operator=(const BytecodeBufferAnalyzer &BFR); // Do not implement + + public: + BytecodeBufferAnalyzer(const unsigned char *Buf, unsigned Length, + BytecodeAnalysis& bca, const std::string &ModuleID); + ~BytecodeBufferAnalyzer(); + + }; +} + +BytecodeBufferAnalyzer::BytecodeBufferAnalyzer(const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID) { + // If not aligned, allocate a new buffer to hold the bytecode... + const unsigned char *ParseBegin = 0; + if ((intptr_t)Buf & 3) { + Buffer = new unsigned char[Length+4]; + unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned + ParseBegin = Buffer + Offset; + memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over + MustDelete = true; + } else { + // If we don't need to copy it over, just use the caller's copy + ParseBegin = Buffer = Buf; + MustDelete = false; + } + try { + if ( bca.dumpBytecode ) + DumpBytecode(ParseBegin, Length, bca, ModuleID); + AnalyzeBytecode(ParseBegin, Length, bca, ModuleID); + } catch (...) { + if (MustDelete) delete [] Buffer; + throw; + } +} + +BytecodeBufferAnalyzer::~BytecodeBufferAnalyzer() { + if (MustDelete) delete [] Buffer; +} + +//===----------------------------------------------------------------------===// +// BytecodeStdinAnalyzer - Read bytecode from Standard Input +// + +namespace { + /// BytecodeStdinAnalyzer - parses a bytecode file from stdin + /// + class BytecodeStdinAnalyzer : public BytecodeAnalyzer { + private: + std::vector<unsigned char> FileData; + unsigned char *FileBuf; + + BytecodeStdinAnalyzer(const BytecodeStdinAnalyzer&); // Do not implement + void operator=(const BytecodeStdinAnalyzer &BFR); // Do not implement + + public: + BytecodeStdinAnalyzer(BytecodeAnalysis& bca); + }; +} + +BytecodeStdinAnalyzer::BytecodeStdinAnalyzer(BytecodeAnalysis& bca ) { + int BlockSize; + unsigned char Buffer[4096*4]; + + // Read in all of the data from stdin, we cannot mmap stdin... + while ((BlockSize = ::read(0 /*stdin*/, Buffer, 4096*4))) { + if (BlockSize == -1) + throw ErrnoMessage(errno, "read from standard input"); + + FileData.insert(FileData.end(), Buffer, Buffer+BlockSize); + } + + if (FileData.empty()) + throw std::string("Standard Input empty!"); + + FileBuf = &FileData[0]; + if (bca.dumpBytecode) + DumpBytecode(&FileData[0], FileData.size(), bca, "<stdin>"); + AnalyzeBytecode(FileBuf, FileData.size(), bca, "<stdin>"); +} + +//===----------------------------------------------------------------------===// +// Wrapper functions +//===----------------------------------------------------------------------===// + +// AnalyzeBytecodeFile - analyze one file +void llvm::AnalyzeBytecodeFile(const std::string &Filename, + BytecodeAnalysis& bca, + std::string *ErrorStr) +{ + try { + if ( Filename != "-" ) + BytecodeFileAnalyzer bfa(Filename,bca); + else + BytecodeStdinAnalyzer bsa(bca); + } catch (std::string &err) { + if (ErrorStr) *ErrorStr = err; + } +} + +// AnalyzeBytecodeBuffer - analyze a buffer +void llvm::AnalyzeBytecodeBuffer( + const unsigned char* Buffer, ///< Pointer to start of bytecode buffer + unsigned BufferSize, ///< Size of the bytecode buffer + BytecodeAnalysis& Results, ///< The results of the analysis + std::string* ErrorStr ///< Errors, if any. + ) +{ + try { + BytecodeBufferAnalyzer(Buffer, BufferSize, Results, "<buffer>" ); + } catch (std::string& err ) { + if ( ErrorStr) *ErrorStr = err; + } +} + + +/// This function prints the contents of rhe BytecodeAnalysis structure in +/// a human legible form. +/// @brief Print BytecodeAnalysis structure to an ostream +void llvm::PrintBytecodeAnalysis(BytecodeAnalysis& bca, std::ostream& Out ) +{ + Out << "Not Implemented Yet.\n"; +} + +// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/BytecodeHandler.cpp b/lib/Bytecode/Analyzer/BytecodeHandler.cpp new file mode 100644 index 0000000..2415958 --- /dev/null +++ b/lib/Bytecode/Analyzer/BytecodeHandler.cpp @@ -0,0 +1,220 @@ +//===-- BytecodeHandler.cpp - Parsing Handler -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the BytecodeHandler class that gets called by the +// AbstractBytecodeParser when parsing events occur. +// +//===----------------------------------------------------------------------===// + +#include "BytecodeHandler.h" + +using namespace llvm; + +bool BytecodeHandler::handleError(const std::string& str ) +{ + return false; +} + +void BytecodeHandler::handleStart() +{ +} + +void BytecodeHandler::handleFinish() +{ +} + +void BytecodeHandler::handleModuleBegin(const std::string& id) +{ +} + +void BytecodeHandler::handleModuleEnd(const std::string& id) +{ +} + +void BytecodeHandler::handleVersionInfo( + unsigned char RevisionNum, ///< Byte code revision number + Module::Endianness Endianness, ///< Endianness indicator + Module::PointerSize PointerSize ///< PointerSize indicator +) +{ +} + +void BytecodeHandler::handleModuleGlobalsBegin() +{ +} + +void BytecodeHandler::handleGlobalVariable( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes ///< The linkage type of the GV +) +{ +} + +void BytecodeHandler::handleInitializedGV( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes,///< The linkage type of the GV + unsigned initSlot ///< Slot number of GV's initializer +) +{ +} + +void BytecodeHandler::handleType( const Type* Ty ) +{ +} + +void BytecodeHandler::handleFunctionDeclaration( + const Type* FuncType ///< The type of the function +) +{ +} + +void BytecodeHandler::handleModuleGlobalsEnd() +{ +} + +void BytecodeHandler::handleCompactionTableBegin() +{ +} + +void BytecodeHandler::handleCompactionTablePlane( + unsigned Ty, + unsigned NumEntries +) +{ +} + +void BytecodeHandler::handleCompactionTableType( + unsigned i, + unsigned TypSlot, + const Type* +) +{ +} + +void BytecodeHandler::handleCompactionTableValue( + unsigned i, + unsigned ValSlot, + const Type* +) +{ +} + +void BytecodeHandler::handleCompactionTableEnd() +{ +} + +void BytecodeHandler::handleSymbolTableBegin() +{ +} + +void BytecodeHandler::handleSymbolTablePlane( + unsigned Ty, + unsigned NumEntries, + const Type* Typ +) +{ +} + +void BytecodeHandler::handleSymbolTableType( + unsigned i, + unsigned slot, + const std::string& name +) +{ +} + +void BytecodeHandler::handleSymbolTableValue( + unsigned i, + unsigned slot, + const std::string& name +) +{ +} + +void BytecodeHandler::handleSymbolTableEnd() +{ +} + +void BytecodeHandler::handleFunctionBegin( + const Type* FType, + GlobalValue::LinkageTypes linkage +) +{ +} + +void BytecodeHandler::handleFunctionEnd( + const Type* FType +) +{ +} + +void BytecodeHandler::handleBasicBlockBegin( + unsigned blocknum +) +{ +} + +bool BytecodeHandler::handleInstruction( + unsigned Opcode, + const Type* iType, + std::vector<unsigned>& Operands +) +{ + return false; +} + +void BytecodeHandler::handleBasicBlockEnd(unsigned blocknum) +{ +} + +void BytecodeHandler::handleGlobalConstantsBegin() +{ +} + +void BytecodeHandler::handleConstantExpression( + unsigned Opcode, + const Type* Typ, + std::vector<std::pair<const Type*,unsigned> > ArgVec + ) +{ +} + +void BytecodeHandler::handleConstantValue( Constant * c ) +{ +} + +void BytecodeHandler::handleConstantArray( + const ArrayType* AT, + std::vector<unsigned>& Elements ) +{ +} + +void BytecodeHandler::handleConstantStruct( + const StructType* ST, + std::vector<unsigned>& ElementSlots) +{ +} + +void BytecodeHandler::handleConstantPointer( + const PointerType* PT, unsigned Slot) +{ +} + +void BytecodeHandler::handleConstantString( const ConstantArray* CA ) +{ +} + + +void BytecodeHandler::handleGlobalConstantsEnd() +{ +} + +// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/BytecodeHandler.h b/lib/Bytecode/Analyzer/BytecodeHandler.h new file mode 100644 index 0000000..2b03e2d --- /dev/null +++ b/lib/Bytecode/Analyzer/BytecodeHandler.h @@ -0,0 +1,247 @@ +//===-- BytecodeHandler.h - Parsing Handler ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the BytecodeHandler class that gets called by the +// AbstractBytecodeParser when parsing events occur. +// +//===----------------------------------------------------------------------===// + +#ifndef BYTECODE_HANDLER_H +#define BYTECODE_HANDLER_H + +#include "llvm/Module.h" +#include "llvm/GlobalValue.h" +#include <vector> + +namespace llvm { + +class ArrayType; +class StructType; +class PointerType; +class ConstantArray; + +/// This class provides the interface for the handling bytecode events during +/// parsing. The methods on this interface are invoked by the +/// AbstractBytecodeParser as it discovers the content of a bytecode stream. +/// This class provides a a clear separation of concerns between recognizing +/// the semantic units of a bytecode file and deciding what to do with them. +/// The AbstractBytecodeParser recognizes the content of the bytecode file and +/// calls the BytecodeHandler methods to determine what should be done. This +/// arrangement allows Bytecode files to be read and handled for a number of +/// purposes simply by creating a subclass of BytecodeHandler. None of the +/// parsing details need to be understood, only the meaning of the calls +/// made on this interface. +/// +/// Another paradigm that uses this design pattern is the XML SAX Parser. The +/// ContentHandler for SAX plays the same role as the BytecodeHandler here. +/// @brief Handle Bytecode Parsing Events +class BytecodeHandler { + +/// @name Constructors And Operators +/// @{ +public: + /// @brief Default constructor (empty) + BytecodeHandler() {} + /// @brief Virtual destructor (empty) + virtual ~BytecodeHandler() {} + +private: + BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT + void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT + +/// @} +/// @name Handler Methods +/// @{ +public: + + /// This method is called whenever the parser detects an error in the + /// bytecode formatting. Returning true will cause the parser to keep + /// going, however this is inadvisable in most cases. Returning false will + /// cause the parser to throw the message as a std::string. + /// @brief Handle parsing errors. + virtual bool handleError(const std::string& str ); + + /// This method is called at the beginning of a parse before anything is + /// read in order to give the handler a chance to initialize. + /// @brief Handle the start of a bytecode parse + virtual void handleStart(); + + /// This method is called at the end of a parse after everything has been + /// read in order to give the handler a chance to terminate. + /// @brief Handle the end of a bytecode parse + virtual void handleFinish(); + + /// This method is called at the start of a module to indicate that a + /// module is being parsed. + /// @brief Handle the start of a module. + virtual void handleModuleBegin(const std::string& id); + + /// This method is called at the end of a module to indicate that the module + /// previously being parsed has concluded. + /// @brief Handle the end of a module. + virtual void handleModuleEnd(const std::string& id); + + /// This method is called once the version information has been parsed. It + /// provides the information about the version of the bytecode file being + /// read. + /// @brief Handle the bytecode prolog + virtual void handleVersionInfo( + unsigned char RevisionNum, ///< Byte code revision number + Module::Endianness Endianness, ///< Endianness indicator + Module::PointerSize PointerSize ///< PointerSize indicator + ); + + /// This method is called at the start of a module globals block which + /// contains the global variables and the function placeholders + virtual void handleModuleGlobalsBegin(); + + /// This method is called when a non-initialized global variable is + /// recognized. Its type, constness, and linkage type are provided. + /// @brief Handle a non-initialized global variable + virtual void handleGlobalVariable( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes ///< The linkage type of the GV + ); + + /// This method is called when an initialized global variable is recognized. + /// Its type constness, linkage type, and the slot number of the initializer + /// are provided. + /// @brief Handle an intialized global variable. + virtual void handleInitializedGV( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes,///< The linkage type of the GV + unsigned initSlot ///< Slot number of GV's initializer + ); + + /// This method is called when a new type is recognized. The type is + /// converted from the bytecode and passed to this method. + /// @brief Handle a type + virtual void handleType( const Type* Ty ); + + /// This method is called when the function prototype for a function is + /// encountered in the module globals block. + virtual void handleFunctionDeclaration( + const Type* FuncType ///< The type of the function + ); + + /// This method is called at the end of the module globals block. + /// @brief Handle end of module globals block. + virtual void handleModuleGlobalsEnd(); + + /// This method is called at the beginning of a compaction table. + /// @brief Handle start of compaction table. + virtual void handleCompactionTableBegin(); + virtual void handleCompactionTablePlane( + unsigned Ty, + unsigned NumEntries + ); + + virtual void handleCompactionTableType( + unsigned i, + unsigned TypSlot, + const Type* + ); + + virtual void handleCompactionTableValue( + unsigned i, + unsigned ValSlot, + const Type* + ); + + virtual void handleCompactionTableEnd(); + + virtual void handleSymbolTableBegin(); + + virtual void handleSymbolTablePlane( + unsigned Ty, + unsigned NumEntries, + const Type* Ty + ); + + virtual void handleSymbolTableType( + unsigned i, + unsigned slot, + const std::string& name + ); + + virtual void handleSymbolTableValue( + unsigned i, + unsigned slot, + const std::string& name + ); + + virtual void handleSymbolTableEnd(); + + virtual void handleFunctionBegin( + const Type* FType, + GlobalValue::LinkageTypes linkage + ); + + virtual void handleFunctionEnd( + const Type* FType + ); + + virtual void handleBasicBlockBegin( + unsigned blocknum + ); + + /// This method is called for each instruction that is parsed. + /// @returns true if the instruction is a block terminating instruction + /// @brief Handle an instruction + virtual bool handleInstruction( + unsigned Opcode, + const Type* iType, + std::vector<unsigned>& Operands + ); + + /// This method is called for each block that is parsed. + virtual void handleBasicBlockEnd(unsigned blocknum); + /// This method is called at the start of the global constants block. + /// @brief Handle start of global constants block. + virtual void handleGlobalConstantsBegin(); + + virtual void handleConstantExpression( + unsigned Opcode, + const Type* Typ, + std::vector<std::pair<const Type*,unsigned> > ArgVec + ); + + virtual void handleConstantArray( + const ArrayType* AT, + std::vector<unsigned>& ElementSlots + ); + + virtual void handleConstantStruct( + const StructType* ST, + std::vector<unsigned>& ElementSlots + ); + + virtual void handleConstantPointer( + const PointerType* PT, + unsigned Slot + ); + + virtual void handleConstantString( + const ConstantArray* CA + ); + + virtual void handleConstantValue( Constant * c ); + virtual void handleGlobalConstantsEnd(); + +/// @} + +}; + +} // End llvm namespace + +#endif + +// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/Dumper.cpp b/lib/Bytecode/Analyzer/Dumper.cpp new file mode 100644 index 0000000..6ff4ea0 --- /dev/null +++ b/lib/Bytecode/Analyzer/Dumper.cpp @@ -0,0 +1,311 @@ +//===-- BytecodeDumper.cpp - Parsing Handler --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the BytecodeDumper class that gets called by the +// AbstractBytecodeParser when parsing events occur. It merely dumps the +// information presented to it from the parser. +// +//===----------------------------------------------------------------------===// + +#include "AnalyzerInternals.h" +#include "llvm/Constant.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instruction.h" +#include "llvm/Type.h" + +using namespace llvm; + +namespace { + +class BytecodeDumper : public llvm::BytecodeHandler { +public: + + virtual bool handleError(const std::string& str ) + { + std::cout << "ERROR: " << str << "\n"; + return true; + } + + virtual void handleStart() + { + std::cout << "Bytecode {\n"; + } + + virtual void handleFinish() + { + std::cout << "} End Bytecode\n"; + } + + virtual void handleModuleBegin(const std::string& id) + { + std::cout << " Module " << id << " {\n"; + } + + virtual void handleModuleEnd(const std::string& id) + { + std::cout << " } End Module " << id << "\n"; + } + + virtual void handleVersionInfo( + unsigned char RevisionNum, ///< Byte code revision number + Module::Endianness Endianness, ///< Endianness indicator + Module::PointerSize PointerSize ///< PointerSize indicator + ) + { + std::cout << " RevisionNum: " << int(RevisionNum) + << " Endianness: " << Endianness + << " PointerSize: " << PointerSize << "\n"; + } + + virtual void handleModuleGlobalsBegin() + { + std::cout << " BLOCK: ModuleGlobalInfo {\n"; + } + + virtual void handleGlobalVariable( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes Linkage ///< The linkage type of the GV + ) + { + std::cout << " GV: Uninitialized, " + << ( isConstant? "Constant, " : "Variable, ") + << " Linkage=" << Linkage << " Type=" + << ElemType->getDescription() << "\n"; + } + + virtual void handleInitializedGV( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes Linkage,///< The linkage type of the GV + unsigned initSlot ///< Slot number of GV's initializer + ) + { + std::cout << " GV: Initialized, " + << ( isConstant? "Constant, " : "Variable, ") + << " Linkage=" << Linkage << " Type=" + << ElemType->getDescription() + << " InitializerSlot=" << initSlot << "\n"; + } + + virtual void handleType( const Type* Ty ) + { + std::cout << " Type: " << Ty->getDescription() << "\n"; + } + + virtual void handleFunctionDeclaration( const Type* FuncType ) + { + std::cout << " Function: " << FuncType->getDescription() << "\n"; + } + + virtual void handleModuleGlobalsEnd() + { + std::cout << " } END BLOCK: ModuleGlobalInfo\n"; + } + + void handleCompactionTableBegin() + { + std::cout << " BLOCK: CompactionTable {\n"; + } + + virtual void handleCompactionTablePlane( unsigned Ty, unsigned NumEntries ) + { + std::cout << " Plane: Ty=" << Ty << " Size=" << NumEntries << "\n"; + } + + virtual void handleCompactionTableType( + unsigned i, + unsigned TypSlot, + const Type* Ty + ) + { + std::cout << " Type: " << i << " Slot:" << TypSlot + << " is " << Ty->getDescription() << "\n"; + } + + virtual void handleCompactionTableValue( + unsigned i, + unsigned ValSlot, + const Type* Ty + ) + { + std::cout << " Value: " << i << " Slot:" << ValSlot + << " is " << Ty->getDescription() << "\n"; + } + + virtual void handleCompactionTableEnd() + { + std::cout << " } END BLOCK: CompactionTable\n"; + } + + virtual void handleSymbolTableBegin() + { + std::cout << " BLOCK: SymbolTable {\n"; + } + + virtual void handleSymbolTablePlane( + unsigned Ty, + unsigned NumEntries, + const Type* Typ + ) + { + std::cout << " Plane: Ty=" << Ty << " Size=" << NumEntries + << " Type: " << Typ->getDescription() << "\n"; + } + + virtual void handleSymbolTableType( + unsigned i, + unsigned slot, + const std::string& name + ) + { + std::cout << " Type " << i << " Slot=" << slot + << " Name: " << name << "\n"; + } + + virtual void handleSymbolTableValue( + unsigned i, + unsigned slot, + const std::string& name + ) + { + std::cout << " Value " << i << " Slot=" << slot + << " Name: " << name << "\n"; + } + + virtual void handleSymbolTableEnd() + { + std::cout << " } END BLOCK: SymbolTable\n"; + } + + virtual void handleFunctionBegin( + const Type* FType, + GlobalValue::LinkageTypes linkage + ) + { + std::cout << " BLOCK: Function {\n"; + std::cout << " Linkage: " << linkage << "\n"; + std::cout << " Type: " << FType->getDescription() << "\n"; + } + + virtual void handleFunctionEnd( + const Type* FType + ) + { + std::cout << " } END BLOCK: Function\n"; + } + + virtual void handleBasicBlockBegin( + unsigned blocknum + ) + { + std::cout << " BLOCK: BasicBlock #" << blocknum << "{\n"; + } + + virtual bool handleInstruction( + unsigned Opcode, + const Type* iType, + std::vector<unsigned>& Operands + ) + { + std::cout << " INST: OpCode=" + << Instruction::getOpcodeName(Opcode) << " Type=" + << iType->getDescription() << "\n"; + for ( unsigned i = 0; i < Operands.size(); ++i ) + std::cout << " Op#" << i << " Slot=" << Operands[i] << "\n"; + + return Instruction::isTerminator(Opcode); + } + + virtual void handleBasicBlockEnd(unsigned blocknum) + { + std::cout << " } END BLOCK: BasicBlock #" << blocknum << "{\n"; + } + + virtual void handleGlobalConstantsBegin() + { + std::cout << " BLOCK: GlobalConstants {\n"; + } + + virtual void handleConstantExpression( + unsigned Opcode, + const Type* Typ, + std::vector<std::pair<const Type*,unsigned> > ArgVec + ) + { + std::cout << " EXPR: " << Instruction::getOpcodeName(Opcode) + << " Type=" << Typ->getDescription() << "\n"; + for ( unsigned i = 0; i < ArgVec.size(); ++i ) + std::cout << " Arg#" << i << " Type=" + << ArgVec[i].first->getDescription() << " Slot=" + << ArgVec[i].second << "\n"; + } + + virtual void handleConstantValue( Constant * c ) + { + std::cout << " VALUE: "; + c->print(std::cout); + std::cout << "\n"; + } + + virtual void handleConstantArray( + const ArrayType* AT, + std::vector<unsigned>& Elements ) + { + std::cout << " ARRAY: " << AT->getDescription() << "\n"; + for ( unsigned i = 0; i < Elements.size(); ++i ) + std::cout << " #" << i << " Slot=" << Elements[i] << "\n"; + } + + virtual void handleConstantStruct( + const StructType* ST, + std::vector<unsigned>& Elements) + { + std::cout << " STRUC: " << ST->getDescription() << "\n"; + for ( unsigned i = 0; i < Elements.size(); ++i ) + std::cout << " #" << i << " Slot=" << Elements[i] << "\n"; + } + + virtual void handleConstantPointer( + const PointerType* PT, unsigned Slot) + { + std::cout << " POINT: " << PT->getDescription() + << " Slot=" << Slot << "\n"; + } + + virtual void handleConstantString( const ConstantArray* CA ) + { + std::cout << " STRNG: "; + CA->print(std::cout); + std::cout << "\n"; + } + + virtual void handleGlobalConstantsEnd() + { + std::cout << " } END BLOCK: GlobalConstants\n"; + } +}; + +} + +void BytecodeAnalyzer::DumpBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID + ) +{ + BytecodeDumper TheHandler; + AbstractBytecodeParser TheParser(&TheHandler); + TheParser.ParseBytecode( Buf, Length, ModuleID ); + TheParser.ParseAllFunctionBodies(); +} + +// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/Makefile b/lib/Bytecode/Analyzer/Makefile new file mode 100644 index 0000000..f3327cd --- /dev/null +++ b/lib/Bytecode/Analyzer/Makefile @@ -0,0 +1,13 @@ +##===- lib/Bytecode/Reader/Makefile ------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by the LLVM research group and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = bcanalyzer + +include $(LEVEL)/Makefile.common + diff --git a/lib/Bytecode/Analyzer/Parser.cpp b/lib/Bytecode/Analyzer/Parser.cpp new file mode 100644 index 0000000..d236b64 --- /dev/null +++ b/lib/Bytecode/Analyzer/Parser.cpp @@ -0,0 +1,877 @@ +//===- Reader.cpp - Code to read bytecode files ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library implements the functionality defined in llvm/Bytecode/Reader.h +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Allow passing in an option to ignore the symbol table +// +//===----------------------------------------------------------------------===// + +#include "AnalyzerInternals.h" +#include "llvm/Module.h" +#include "llvm/Bytecode/Format.h" +#include "Support/StringExtras.h" +#include <iostream> +#include <sstream> + +using namespace llvm; + +#define PARSE_ERROR(inserters) \ + { \ + std::ostringstream errormsg; \ + errormsg << inserters; \ + if ( ! handler->handleError( errormsg.str() ) ) \ + throw std::string(errormsg.str()); \ + } + +const Type *AbstractBytecodeParser::getType(unsigned ID) { + //cerr << "Looking up Type ID: " << ID << "\n"; + + if (ID < Type::FirstDerivedTyID) + if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) + return T; // Asked for a primitive type... + + // Otherwise, derived types need offset... + ID -= Type::FirstDerivedTyID; + + if (!CompactionTypeTable.empty()) { + if (ID >= CompactionTypeTable.size()) + PARSE_ERROR("Type ID out of range for compaction table!"); + return CompactionTypeTable[ID]; + } + + // Is it a module-level type? + if (ID < ModuleTypes.size()) + return ModuleTypes[ID].get(); + + // Nope, is it a function-level type? + ID -= ModuleTypes.size(); + if (ID < FunctionTypes.size()) + return FunctionTypes[ID].get(); + + PARSE_ERROR("Illegal type reference!"); + return Type::VoidTy; +} + +bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, + std::vector<unsigned> &Operands) { + Operands.clear(); + unsigned iType = 0; + unsigned Opcode = 0; + unsigned Op = read(Buf, EndBuf); + + // bits Instruction format: Common to all formats + // -------------------------- + // 01-00: Opcode type, fixed to 1. + // 07-02: Opcode + Opcode = (Op >> 2) & 63; + Operands.resize((Op >> 0) & 03); + + switch (Operands.size()) { + case 1: + // bits Instruction format: + // -------------------------- + // 19-08: Resulting type plane + // 31-20: Operand #1 (if set to (2^12-1), then zero operands) + // + iType = (Op >> 8) & 4095; + Operands[0] = (Op >> 20) & 4095; + if (Operands[0] == 4095) // Handle special encoding for 0 operands... + Operands.resize(0); + break; + case 2: + // bits Instruction format: + // -------------------------- + // 15-08: Resulting type plane + // 23-16: Operand #1 + // 31-24: Operand #2 + // + iType = (Op >> 8) & 255; + Operands[0] = (Op >> 16) & 255; + Operands[1] = (Op >> 24) & 255; + break; + case 3: + // bits Instruction format: + // -------------------------- + // 13-08: Resulting type plane + // 19-14: Operand #1 + // 25-20: Operand #2 + // 31-26: Operand #3 + // + iType = (Op >> 8) & 63; + Operands[0] = (Op >> 14) & 63; + Operands[1] = (Op >> 20) & 63; + Operands[2] = (Op >> 26) & 63; + break; + case 0: + Buf -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(Buf, EndBuf); + Opcode >>= 2; + iType = read_vbr_uint(Buf, EndBuf); + + unsigned NumOperands = read_vbr_uint(Buf, EndBuf); + Operands.resize(NumOperands); + + if (NumOperands == 0) + PARSE_ERROR("Zero-argument instruction found; this is invalid."); + + for (unsigned i = 0; i != NumOperands; ++i) + Operands[i] = read_vbr_uint(Buf, EndBuf); + align32(Buf, EndBuf); + break; + } + + return handler->handleInstruction(Opcode, getType(iType), Operands); +} + +/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one +/// basicblock at a time. This method reads in one of the basicblock packets. +void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf, + BufPtr EndBuf, + unsigned BlockNo) { + handler->handleBasicBlockBegin( BlockNo ); + + std::vector<unsigned> Args; + bool is_terminating = false; + while (Buf < EndBuf) + is_terminating = ParseInstruction(Buf, EndBuf, Args); + + if ( ! is_terminating ) + PARSE_ERROR( + "Failed to recognize instruction as terminating at end of block"); + + handler->handleBasicBlockEnd( BlockNo ); +} + + +/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the +/// body of a function. In post 1.0 bytecode files, we no longer emit basic +/// block individually, in order to avoid per-basic-block overhead. +unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, BufPtr EndBuf) { + unsigned BlockNo = 0; + std::vector<unsigned> Args; + + while (Buf < EndBuf) { + handler->handleBasicBlockBegin( BlockNo ); + + // Read instructions into this basic block until we get to a terminator + bool is_terminating = false; + while (Buf < EndBuf && !is_terminating ) + is_terminating = ParseInstruction(Buf, EndBuf, Args ) ; + + if (!is_terminating) + PARSE_ERROR( "Non-terminated basic block found!"); + + handler->handleBasicBlockEnd( BlockNo ); + ++BlockNo; + } + return BlockNo; +} + +void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) { + handler->handleSymbolTableBegin(); + + while (Buf < EndBuf) { + // Symtab block header: [num entries][type id number] + unsigned NumEntries = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(Buf, EndBuf); + const Type *Ty = getType(Typ); + + handler->handleSymbolTablePlane( Typ, NumEntries, Ty ); + + for (unsigned i = 0; i != NumEntries; ++i) { + // Symtab entry: [def slot #][name] + unsigned slot = read_vbr_uint(Buf, EndBuf); + std::string Name = read_str(Buf, EndBuf); + + if (Typ == Type::TypeTyID) + handler->handleSymbolTableType( i, slot, Name ); + else + handler->handleSymbolTableValue( i, slot, Name ); + } + } + + if (Buf > EndBuf) + PARSE_ERROR("Tried to read past end of buffer while reading symbol table."); + + handler->handleSymbolTableEnd(); +} + +void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { + if (FunctionSignatureList.empty()) + throw std::string("FunctionSignatureList empty!"); + + const Type *FType = FunctionSignatureList.back(); + FunctionSignatureList.pop_back(); + + // Save the information for future reading of the function + LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf); + // Pretend we've `parsed' this function + Buf = EndBuf; +} + +void AbstractBytecodeParser::ParseNextFunction(Type* FType) { + // Find {start, end} pointers and slot in the map. If not there, we're done. + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(FType); + + // Make sure we found it + if ( Fi == LazyFunctionLoadMap.end() ) { + PARSE_ERROR("Unrecognized function of type " << FType->getDescription()); + return; + } + + BufPtr Buf = Fi->second.Buf; + BufPtr EndBuf = Fi->second.EndBuf; + assert(Fi->first == FType); + + LazyFunctionLoadMap.erase(Fi); + + this->ParseFunctionBody( FType, Buf, EndBuf ); +} + +void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, + BufPtr &Buf, BufPtr EndBuf ) { + + GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage; + + unsigned LinkageType = read_vbr_uint(Buf, EndBuf); + switch (LinkageType) { + case 0: Linkage = GlobalValue::ExternalLinkage; break; + case 1: Linkage = GlobalValue::WeakLinkage; break; + case 2: Linkage = GlobalValue::AppendingLinkage; break; + case 3: Linkage = GlobalValue::InternalLinkage; break; + case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + default: + PARSE_ERROR("Invalid linkage type for Function."); + Linkage = GlobalValue::InternalLinkage; + break; + } + + handler->handleFunctionBegin(FType,Linkage); + + // Keep track of how many basic blocks we have read in... + unsigned BlockNum = 0; + bool InsertedArguments = false; + + while (Buf < EndBuf) { + unsigned Type, Size; + BufPtr OldBuf = Buf; + readBlock(Buf, EndBuf, Type, Size); + + switch (Type) { + case BytecodeFormat::ConstantPool: + ParseConstantPool(Buf, Buf+Size, FunctionTypes ); + break; + + case BytecodeFormat::CompactionTable: + ParseCompactionTable(Buf, Buf+Size); + break; + + case BytecodeFormat::BasicBlock: + ParseBasicBlock(Buf, Buf+Size, BlockNum++); + break; + + case BytecodeFormat::InstructionList: + if (BlockNum) + PARSE_ERROR("InstructionList must come before basic blocks!"); + BlockNum = ParseInstructionList(Buf, Buf+Size); + break; + + case BytecodeFormat::SymbolTable: + ParseSymbolTable(Buf, Buf+Size ); + break; + + default: + Buf += Size; + if (OldBuf > Buf) + PARSE_ERROR("Wrapped around reading bytecode"); + break; + } + + // Malformed bc file if read past end of block. + align32(Buf, EndBuf); + } + + handler->handleFunctionEnd(FType); + + // Clear out function-level types... + FunctionTypes.clear(); + CompactionTypeTable.clear(); +} + +void AbstractBytecodeParser::ParseAllFunctionBodies() { + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin(); + LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end(); + + while ( Fi != Fe ) { + const Type* FType = Fi->first; + this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf); + } +} + +void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) { + + handler->handleCompactionTableBegin(); + + while (Buf != End) { + unsigned NumEntries = read_vbr_uint(Buf, End); + unsigned Ty; + + if ((NumEntries & 3) == 3) { + NumEntries >>= 2; + Ty = read_vbr_uint(Buf, End); + } else { + Ty = NumEntries >> 2; + NumEntries &= 3; + } + + handler->handleCompactionTablePlane( Ty, NumEntries ); + + if (Ty == Type::TypeTyID) { + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned TypeSlot = read_vbr_uint(Buf,End); + const Type *Typ = getGlobalTableType(TypeSlot); + handler->handleCompactionTableType( i, TypeSlot, Typ ); + } + } else { + const Type *Typ = getType(Ty); + // Push the implicit zero + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned ValSlot = read_vbr_uint(Buf, End); + handler->handleCompactionTableValue( i, ValSlot, Typ ); + } + } + } + handler->handleCompactionTableEnd(); +} + +const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, + const unsigned char *EndBuf) { + unsigned PrimType = read_vbr_uint(Buf, EndBuf); + + const Type *Val = 0; + if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) + return Val; + + switch (PrimType) { + case Type::FunctionTyID: { + const Type *RetType = getType(read_vbr_uint(Buf, EndBuf)); + + unsigned NumParams = read_vbr_uint(Buf, EndBuf); + + std::vector<const Type*> Params; + while (NumParams--) + Params.push_back(getType(read_vbr_uint(Buf, EndBuf))); + + bool isVarArg = Params.size() && Params.back() == Type::VoidTy; + if (isVarArg) Params.pop_back(); + + Type* result = FunctionType::get(RetType, Params, isVarArg); + handler->handleType( result ); + return result; + } + case Type::ArrayTyID: { + unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + const Type *ElementType = getType(ElTyp); + + unsigned NumElements = read_vbr_uint(Buf, EndBuf); + + BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size=" + << NumElements << "\n"); + Type* result = ArrayType::get(ElementType, NumElements); + handler->handleType( result ); + return result; + } + case Type::StructTyID: { + std::vector<const Type*> Elements; + unsigned Typ = read_vbr_uint(Buf, EndBuf); + while (Typ) { // List is terminated by void/0 typeid + Elements.push_back(getType(Typ)); + Typ = read_vbr_uint(Buf, EndBuf); + } + + Type* result = StructType::get(Elements); + handler->handleType( result ); + return result; + } + case Type::PointerTyID: { + unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n"); + Type* result = PointerType::get(getType(ElTyp)); + handler->handleType( result ); + return result; + } + + case Type::OpaqueTyID: { + Type* result = OpaqueType::get(); + handler->handleType( result ); + return result; + } + + default: + PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n"); + return Val; + } +} + +// ParseTypeConstants - We have to use this weird code to handle recursive +// types. We know that recursive types will only reference the current slab of +// values in the type plane, but they can forward reference types before they +// have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might +// be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix +// this ugly problem, we pessimistically insert an opaque type for each type we +// are about to read. This means that forward references will resolve to +// something and when we reread the type later, we can replace the opaque type +// with a new resolved concrete type. +// +void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, + const unsigned char *EndBuf, + TypeListTy &Tab, + unsigned NumEntries) { + assert(Tab.size() == 0 && "should not have read type constants in before!"); + + // Insert a bunch of opaque types to be resolved later... + Tab.reserve(NumEntries); + for (unsigned i = 0; i != NumEntries; ++i) + Tab.push_back(OpaqueType::get()); + + // Loop through reading all of the types. Forward types will make use of the + // opaque types just inserted. + // + for (unsigned i = 0; i != NumEntries; ++i) { + const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get(); + if (NewTy == 0) throw std::string("Couldn't parse type!"); + BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy << + "' Replacing: " << OldTy << "\n"); + + // Don't insertValue the new type... instead we want to replace the opaque + // type with the new concrete value... + // + + // Refine the abstract type to the new type. This causes all uses of the + // abstract type to use NewTy. This also will cause the opaque type to be + // deleted... + // + cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy); + + // This should have replace the old opaque type with the new type in the + // value table... or with a preexisting type that was already in the system + assert(Tab[i] != OldTy && "refineAbstractType didn't work!"); + } + + BCR_TRACE(5, "Resulting types:\n"); + for (unsigned i = 0; i < NumEntries; ++i) { + BCR_TRACE(5, (void*)Tab[i].get() << " - " << Tab[i].get() << "\n"); + } +} + + +void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, + const unsigned char *EndBuf, + unsigned TypeID) { + + // We must check for a ConstantExpr before switching by type because + // a ConstantExpr can be of any type, and has no explicit value. + // + // 0 if not expr; numArgs if is expr + unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf); + + if (isExprNumArgs) { + unsigned Opcode = read_vbr_uint(Buf, EndBuf); + const Type* Typ = getType(TypeID); + + // FIXME: Encoding of constant exprs could be much more compact! + std::vector<std::pair<const Type*,unsigned> > ArgVec; + ArgVec.reserve(isExprNumArgs); + + // Read the slot number and types of each of the arguments + for (unsigned i = 0; i != isExprNumArgs; ++i) { + unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf); + unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf); + BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot) + << "' slot: " << ArgValSlot << "\n"); + + // Get the arg value from its slot if it exists, otherwise a placeholder + ArgVec.push_back(std::make_pair(getType(ArgTypeSlot), ArgValSlot)); + } + + handler->handleConstantExpression( Opcode, Typ, ArgVec ); + return; + } + + // Ok, not an ConstantExpr. We now know how to read the given type... + const Type *Ty = getType(TypeID); + switch (Ty->getPrimitiveID()) { + case Type::BoolTyID: { + unsigned Val = read_vbr_uint(Buf, EndBuf); + if (Val != 0 && Val != 1) + PARSE_ERROR("Invalid boolean value read."); + + handler->handleConstantValue( ConstantBool::get(Val == 1)); + break; + } + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: { + unsigned Val = read_vbr_uint(Buf, EndBuf); + if (!ConstantUInt::isValueValidForType(Ty, Val)) + throw std::string("Invalid unsigned byte/short/int read."); + handler->handleConstantValue( ConstantUInt::get(Ty, Val) ); + break; + } + + case Type::ULongTyID: { + handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) ); + break; + } + + case Type::SByteTyID: // Signed integer types... + case Type::ShortTyID: + case Type::IntTyID: { + case Type::LongTyID: + int64_t Val = read_vbr_int64(Buf, EndBuf); + if (!ConstantSInt::isValueValidForType(Ty, Val)) + throw std::string("Invalid signed byte/short/int/long read."); + handler->handleConstantValue( ConstantSInt::get(Ty, Val) ); + break; + } + + case Type::FloatTyID: { + float F; + input_data(Buf, EndBuf, &F, &F+1); + handler->handleConstantValue( ConstantFP::get(Ty, F) ); + break; + } + + case Type::DoubleTyID: { + double Val; + input_data(Buf, EndBuf, &Val, &Val+1); + handler->handleConstantValue( ConstantFP::get(Ty, Val) ); + break; + } + + case Type::TypeTyID: + PARSE_ERROR("Type constants shouldn't live in constant table!"); + break; + + case Type::ArrayTyID: { + const ArrayType *AT = cast<ArrayType>(Ty); + unsigned NumElements = AT->getNumElements(); + std::vector<unsigned> Elements; + Elements.reserve(NumElements); + while (NumElements--) // Read all of the elements of the constant. + Elements.push_back(read_vbr_uint(Buf, EndBuf)); + + handler->handleConstantArray( AT, Elements ); + break; + } + + case Type::StructTyID: { + const StructType *ST = cast<StructType>(Ty); + std::vector<unsigned> Elements; + Elements.reserve(ST->getNumElements()); + for (unsigned i = 0; i != ST->getNumElements(); ++i) + Elements.push_back(read_vbr_uint(Buf, EndBuf)); + + handler->handleConstantStruct( ST, Elements ); + } + + case Type::PointerTyID: { // ConstantPointerRef value... + const PointerType *PT = cast<PointerType>(Ty); + unsigned Slot = read_vbr_uint(Buf, EndBuf); + handler->handleConstantPointer( PT, Slot ); + } + + default: + PARSE_ERROR("Don't know how to deserialize constant value of type '"+ + Ty->getDescription()); + } +} + +void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf, + const unsigned char *EndBuf) { + ParseConstantPool(Buf, EndBuf, ModuleTypes); +} + +void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, + const unsigned char *EndBuf, + unsigned NumEntries ){ + for (; NumEntries; --NumEntries) { + unsigned Typ = read_vbr_uint(Buf, EndBuf); + const Type *Ty = getType(Typ); + if (!isa<ArrayType>(Ty)) + throw std::string("String constant data invalid!"); + + const ArrayType *ATy = cast<ArrayType>(Ty); + if (ATy->getElementType() != Type::SByteTy && + ATy->getElementType() != Type::UByteTy) + throw std::string("String constant data invalid!"); + + // Read character data. The type tells us how long the string is. + char Data[ATy->getNumElements()]; + input_data(Buf, EndBuf, Data, Data+ATy->getNumElements()); + + std::vector<Constant*> Elements(ATy->getNumElements()); + if (ATy->getElementType() == Type::SByteTy) + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]); + else + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]); + + // Create the constant, inserting it as needed. + ConstantArray *C = cast<ConstantArray>( ConstantArray::get(ATy, Elements) ); + handler->handleConstantString( C ); + } +} + + +void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf, + const unsigned char *EndBuf, + TypeListTy &TypeTab) { + while (Buf < EndBuf) { + unsigned NumEntries = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(Buf, EndBuf); + if (Typ == Type::TypeTyID) { + ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries); + } else if (Typ == Type::VoidTyID) { + ParseStringConstants(Buf, EndBuf, NumEntries); + } else { + BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: " + << NumEntries << "\n"); + + for (unsigned i = 0; i < NumEntries; ++i) { + ParseConstantValue(Buf, EndBuf, Typ); + } + } + } + + if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer."); +} + +void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { + + handler->handleModuleGlobalsBegin(); + + // Read global variables... + unsigned VarType = read_vbr_uint(Buf, End); + while (VarType != Type::VoidTyID) { // List is terminated by Void + // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 = + // Linkage, bit4+ = slot# + unsigned SlotNo = VarType >> 5; + unsigned LinkageID = (VarType >> 2) & 7; + bool isConstant = VarType & 1; + bool hasInitializer = VarType & 2; + GlobalValue::LinkageTypes Linkage; + + switch (LinkageID) { + case 0: Linkage = GlobalValue::ExternalLinkage; break; + case 1: Linkage = GlobalValue::WeakLinkage; break; + case 2: Linkage = GlobalValue::AppendingLinkage; break; + case 3: Linkage = GlobalValue::InternalLinkage; break; + case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + default: + PARSE_ERROR("Unknown linkage type: " << LinkageID); + Linkage = GlobalValue::InternalLinkage; + break; + } + + const Type *Ty = getType(SlotNo); + if ( !Ty ) { + PARSE_ERROR("Global has no type! SlotNo=" << SlotNo); + } + + if ( !isa<PointerType>(Ty)) { + PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription()); + } + + const Type *ElTy = cast<PointerType>(Ty)->getElementType(); + + // Create the global variable... + if (hasInitializer) + handler->handleGlobalVariable( ElTy, isConstant, Linkage ); + else { + unsigned initSlot = read_vbr_uint(Buf,End); + handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot ); + } + + // Get next item + VarType = read_vbr_uint(Buf, End); + } + + // Read the function objects for all of the functions that are coming + unsigned FnSignature = read_vbr_uint(Buf, End); + while (FnSignature != Type::VoidTyID) { // List is terminated by Void + const Type *Ty = getType(FnSignature); + if (!isa<PointerType>(Ty) || + !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) { + PARSE_ERROR( "Function not a pointer to function type! Ty = " + + Ty->getDescription()); + // FIXME: what should Ty be if handler continues? + } + + // We create functions by passing the underlying FunctionType to create... + Ty = cast<PointerType>(Ty)->getElementType(); + + // Save this for later so we know type of lazily instantiated functions + FunctionSignatureList.push_back(Ty); + + handler->handleFunctionDeclaration(Ty); + + // Get Next function signature + FnSignature = read_vbr_uint(Buf, End); + } + + if (hasInconsistentModuleGlobalInfo) + align32(Buf, End); + + // This is for future proofing... in the future extra fields may be added that + // we don't understand, so we transparently ignore them. + // + Buf = End; + + handler->handleModuleGlobalsEnd(); +} + +void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) { + unsigned Version = read_vbr_uint(Buf, EndBuf); + + // Unpack version number: low four bits are for flags, top bits = version + Module::Endianness Endianness; + Module::PointerSize PointerSize; + Endianness = (Version & 1) ? Module::BigEndian : Module::LittleEndian; + PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32; + + bool hasNoEndianness = Version & 4; + bool hasNoPointerSize = Version & 8; + + RevisionNum = Version >> 4; + + // Default values for the current bytecode version + hasInconsistentModuleGlobalInfo = false; + hasExplicitPrimitiveZeros = false; + hasRestrictedGEPTypes = false; + + switch (RevisionNum) { + case 0: // LLVM 1.0, 1.1 release version + // Base LLVM 1.0 bytecode format. + hasInconsistentModuleGlobalInfo = true; + hasExplicitPrimitiveZeros = true; + // FALL THROUGH + case 1: // LLVM 1.2 release version + // LLVM 1.2 added explicit support for emitting strings efficiently. + + // Also, it fixed the problem where the size of the ModuleGlobalInfo block + // included the size for the alignment at the end, where the rest of the + // blocks did not. + + // LLVM 1.2 and before required that GEP indices be ubyte constants for + // structures and longs for sequential types. + hasRestrictedGEPTypes = true; + + // FALL THROUGH + case 2: // LLVM 1.3 release version + break; + + default: + PARSE_ERROR("Unknown bytecode version number: " << RevisionNum); + } + + if (hasNoEndianness) Endianness = Module::AnyEndianness; + if (hasNoPointerSize) PointerSize = Module::AnyPointerSize; + + handler->handleVersionInfo(RevisionNum, Endianness, PointerSize ); +} + +void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) { + unsigned Type, Size; + readBlock(Buf, EndBuf, Type, Size); + if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) + // Hrm, not a class? + PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) << + ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf))); + + // Read into instance variables... + ParseVersionInfo(Buf, EndBuf); + align32(Buf, EndBuf); + + bool SeenModuleGlobalInfo = false; + bool SeenGlobalTypePlane = false; + while (Buf < EndBuf) { + BufPtr OldBuf = Buf; + readBlock(Buf, EndBuf, Type, Size); + + switch (Type) { + + case BytecodeFormat::GlobalTypePlane: + if ( SeenGlobalTypePlane ) + PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); + + ParseGlobalTypes(Buf, Buf+Size); + SeenGlobalTypePlane = true; + break; + + case BytecodeFormat::ModuleGlobalInfo: + if ( SeenModuleGlobalInfo ) + PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); + ParseModuleGlobalInfo(Buf, Buf+Size); + SeenModuleGlobalInfo = true; + break; + + case BytecodeFormat::ConstantPool: + ParseConstantPool(Buf, Buf+Size, ModuleTypes); + break; + + case BytecodeFormat::Function: + ParseFunctionLazily(Buf, Buf+Size); + break; + + case BytecodeFormat::SymbolTable: + ParseSymbolTable(Buf, Buf+Size ); + break; + + default: + Buf += Size; + if (OldBuf > Buf) + { + PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); + } + break; + } + align32(Buf, EndBuf); + } +} + +void AbstractBytecodeParser::ParseBytecode( + BufPtr Buf, unsigned Length, + const std::string &ModuleID) { + + handler->handleStart(); + unsigned char *EndBuf = (unsigned char*)(Buf + Length); + + // Read and check signature... + unsigned Sig = read(Buf, EndBuf); + if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { + PARSE_ERROR("Invalid bytecode signature: " << Sig); + } + + handler->handleModuleBegin(ModuleID); + + this->ParseModule(Buf, EndBuf); + + handler->handleModuleEnd(ModuleID); + + handler->handleFinish(); +} + +// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/Parser.h b/lib/Bytecode/Analyzer/Parser.h new file mode 100644 index 0000000..027047b --- /dev/null +++ b/lib/Bytecode/Analyzer/Parser.h @@ -0,0 +1,178 @@ +//===-- Parser.h - Definitions internal to the reader -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the interface to the Bytecode Parser +// +//===----------------------------------------------------------------------===// + +#ifndef BYTECODE_PARSER_H +#define BYTECODE_PARSER_H + +#include "ReaderPrimitives.h" +#include "BytecodeHandler.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include <utility> +#include <vector> +#include <map> + +namespace llvm { + +struct LazyFunctionInfo { + const unsigned char *Buf, *EndBuf; + LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0) + : Buf(B), EndBuf(EB) {} +}; + +typedef std::map<const Type*, LazyFunctionInfo> LazyFunctionMap; + +class AbstractBytecodeParser { + AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT + void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT +public: + AbstractBytecodeParser( BytecodeHandler* h ) { handler = h; } + ~AbstractBytecodeParser() { } + + void ParseBytecode(const unsigned char *Buf, unsigned Length, + const std::string &ModuleID); + + void dump() const { + std::cerr << "AbstractBytecodeParser instance!\n"; + } + +private: + // Information about the module, extracted from the bytecode revision number. + unsigned char RevisionNum; // The rev # itself + + // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0) + + // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo + // block. This was fixed to be like all other blocks in 1.2 + bool hasInconsistentModuleGlobalInfo; + + // Revision #0 also explicitly encoded zero values for primitive types like + // int/sbyte/etc. + bool hasExplicitPrimitiveZeros; + + // Flags to control features specific the LLVM 1.2 and before (revision #1) + + // LLVM 1.2 and earlier required that getelementptr structure indices were + // ubyte constants and that sequential type indices were longs. + bool hasRestrictedGEPTypes; + + + /// CompactionTable - If a compaction table is active in the current function, + /// this is the mapping that it contains. + std::vector<Type*> CompactionTypeTable; + + // ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and + // forward references to constants. Such values may be referenced before they + // are defined, and if so, the temporary object that they represent is held + // here. + // + typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType; + ConstantRefsType ConstantFwdRefs; + + // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used + // to deal with forward references to types. + // + typedef std::vector<PATypeHolder> TypeListTy; + TypeListTy ModuleTypes; + TypeListTy FunctionTypes; + + // When the ModuleGlobalInfo section is read, we create a FunctionType object + // for each function in the module. When the function is loaded, this type is + // used to instantiate the actual function object. + std::vector<const Type*> FunctionSignatureList; + + // Constant values are read in after global variables. Because of this, we + // must defer setting the initializers on global variables until after module + // level constants have been read. In the mean time, this list keeps track of + // what we must do. + // + std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits; + + // For lazy reading-in of functions, we need to save away several pieces of + // information about each function: its begin and end pointer in the buffer + // and its FunctionSlot. + // + LazyFunctionMap LazyFunctionLoadMap; + + /// The handler for parsing + BytecodeHandler* handler; + +private: + const Type *AbstractBytecodeParser::getType(unsigned ID); + /// getGlobalTableType - This is just like getType, but when a compaction + /// table is in use, it is ignored. Also, no forward references or other + /// fancy features are supported. + const Type *getGlobalTableType(unsigned Slot) { + if (Slot < Type::FirstDerivedTyID) { + const Type *Ty = Type::getPrimitiveType((Type::PrimitiveID)Slot); + assert(Ty && "Not a primitive type ID?"); + return Ty; + } + Slot -= Type::FirstDerivedTyID; + if (Slot >= ModuleTypes.size()) + throw std::string("Illegal compaction table type reference!"); + return ModuleTypes[Slot]; + } + + unsigned getGlobalTableTypeSlot(const Type *Ty) { + if (Ty->isPrimitiveType()) + return Ty->getPrimitiveID(); + TypeListTy::iterator I = find(ModuleTypes.begin(), + ModuleTypes.end(), Ty); + if (I == ModuleTypes.end()) + throw std::string("Didn't find type in ModuleTypes."); + return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); + } + +public: + typedef const unsigned char* BufPtr; + void ParseModule (BufPtr &Buf, BufPtr End); + void ParseNextFunction (Type* FType) ; + void ParseAllFunctionBodies (); + +private: + void ParseVersionInfo (BufPtr &Buf, BufPtr End); + void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End); + void ParseSymbolTable (BufPtr &Buf, BufPtr End); + void ParseFunctionLazily (BufPtr &Buf, BufPtr End); + void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf); + void ParseCompactionTable (BufPtr &Buf, BufPtr End); + void ParseGlobalTypes (BufPtr &Buf, BufPtr End); + + void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo); + unsigned ParseInstructionList(BufPtr &Buf, BufPtr End); + + bool ParseInstruction (BufPtr &Buf, BufPtr End, + std::vector<unsigned>& Args); + + void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List); + void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID); + void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab, + unsigned NumEntries); + const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End); + void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries); + +}; + + +static inline void readBlock(const unsigned char *&Buf, + const unsigned char *EndBuf, + unsigned &Type, unsigned &Size) { + Type = read(Buf, EndBuf); + Size = read(Buf, EndBuf); +} + +} // End llvm namespace + +#endif +// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/ReaderPrimitives.h b/lib/Bytecode/Analyzer/ReaderPrimitives.h new file mode 100644 index 0000000..496ab2a --- /dev/null +++ b/lib/Bytecode/Analyzer/ReaderPrimitives.h @@ -0,0 +1,101 @@ +//===-- ReaderPrimitives.h - Bytecode file format reading prims -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines some basic functions for reading basic primitive types +// from a bytecode stream. +// +//===----------------------------------------------------------------------===// + +#ifndef READERPRIMITIVES_H +#define READERPRIMITIVES_H + +#include "Support/DataTypes.h" +#include <string> + +namespace llvm { + + static inline unsigned read(const unsigned char *&Buf, + const unsigned char *EndBuf) { + if (Buf+4 > EndBuf) throw std::string("Ran out of data!"); + Buf += 4; + return Buf[-4] | (Buf[-3] << 8) | (Buf[-2] << 16) | (Buf[-1] << 24); + } + + + // read_vbr - Read an unsigned integer encoded in variable bitrate format. + // + static inline unsigned read_vbr_uint(const unsigned char *&Buf, + const unsigned char *EndBuf) { + unsigned Shift = 0; + unsigned Result = 0; + + do { + if (Buf == EndBuf) throw std::string("Ran out of data!"); + Result |= (unsigned)((*Buf++) & 0x7F) << Shift; + Shift += 7; + } while (Buf[-1] & 0x80); + return Result; + } + + static inline uint64_t read_vbr_uint64(const unsigned char *&Buf, + const unsigned char *EndBuf) { + unsigned Shift = 0; + uint64_t Result = 0; + + do { + if (Buf == EndBuf) throw std::string("Ran out of data!"); + Result |= (uint64_t)((*Buf++) & 0x7F) << Shift; + Shift += 7; + } while (Buf[-1] & 0x80); + return Result; + } + + static inline int64_t read_vbr_int64(const unsigned char *&Buf, + const unsigned char *EndBuf) { + uint64_t R = read_vbr_uint64(Buf, EndBuf); + if (R & 1) { + if (R != 1) + return -(int64_t)(R >> 1); + else // There is no such thing as -0 with integers. "-0" really means + // 0x8000000000000000. + return 1LL << 63; + } else + return (int64_t)(R >> 1); + } + + // align32 - Round up to multiple of 32 bits... + static inline void align32(const unsigned char *&Buf, + const unsigned char *EndBuf) { + Buf = (const unsigned char *)((unsigned long)(Buf+3) & (~3UL)); + if (Buf > EndBuf) throw std::string("Ran out of data!"); + } + + static inline std::string read_str(const unsigned char *&Buf, + const unsigned char *EndBuf) { + unsigned Size = read_vbr_uint(Buf, EndBuf); + const unsigned char *OldBuf = Buf; + Buf += Size; + if (Buf > EndBuf) // Size invalid? + throw std::string("Ran out of data reading a string!"); + return std::string((char*)OldBuf, Size); + } + + static inline void input_data(const unsigned char *&Buf, + const unsigned char *EndBuf, + void *Ptr, void *End) { + unsigned char *Start = (unsigned char *)Ptr; + unsigned Amount = (unsigned char *)End - Start; + if (Buf+Amount > EndBuf) throw std::string("Ran out of data!"); + std::copy(Buf, Buf+Amount, Start); + Buf += Amount; + } + +} // End llvm namespace + +#endif diff --git a/lib/Bytecode/Makefile b/lib/Bytecode/Makefile index 92494b2..42daa9d 100644 --- a/lib/Bytecode/Makefile +++ b/lib/Bytecode/Makefile @@ -7,7 +7,7 @@ # ##===----------------------------------------------------------------------===## LEVEL = ../.. -DIRS = Reader Writer +DIRS = Analyzer Reader Writer include $(LEVEL)/Makefile.common diff --git a/lib/Bytecode/Reader/Analyzer.cpp b/lib/Bytecode/Reader/Analyzer.cpp new file mode 100644 index 0000000..99c3e41 --- /dev/null +++ b/lib/Bytecode/Reader/Analyzer.cpp @@ -0,0 +1,242 @@ +//===-- BytecodeHandler.cpp - Parsing Handler -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the BytecodeHandler class that gets called by the +// AbstractBytecodeParser when parsing events occur. +// +//===----------------------------------------------------------------------===// + +#include "AnalyzerInternals.h" + +using namespace llvm; + + +namespace { + +class AnalyzerHandler : public BytecodeHandler { +public: + bool handleError(const std::string& str ) + { + return false; + } + + void handleStart() + { + } + + void handleFinish() + { + } + + void handleModuleBegin(const std::string& id) + { + } + + void handleModuleEnd(const std::string& id) + { + } + + void handleVersionInfo( + unsigned char RevisionNum, ///< Byte code revision number + Module::Endianness Endianness, ///< Endianness indicator + Module::PointerSize PointerSize ///< PointerSize indicator + ) + { + } + + void handleModuleGlobalsBegin() + { + } + + void handleGlobalVariable( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes ///< The linkage type of the GV + ) + { + } + + void handleInitializedGV( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes,///< The linkage type of the GV + unsigned initSlot ///< Slot number of GV's initializer + ) + { + } + + virtual void handleType( const Type* Ty ) + { + } + + void handleFunctionDeclaration( + const Type* FuncType ///< The type of the function + ) + { + } + + void handleModuleGlobalsEnd() + { + } + + void handleCompactionTableBegin() + { + } + + void handleCompactionTablePlane( + unsigned Ty, + unsigned NumEntries + ) + { + } + + void handleCompactionTableType( + unsigned i, + unsigned TypSlot, + const Type* + ) + { + } + + void handleCompactionTableValue( + unsigned i, + unsigned ValSlot, + const Type* + ) + { + } + + void handleCompactionTableEnd() + { + } + + void handleSymbolTableBegin() + { + } + + void handleSymbolTablePlane( + unsigned Ty, + unsigned NumEntries, + const Type* Typ + ) + { + } + + void handleSymbolTableType( + unsigned i, + unsigned slot, + const std::string& name + ) + { + } + + void handleSymbolTableValue( + unsigned i, + unsigned slot, + const std::string& name + ) + { + } + + void handleSymbolTableEnd() + { + } + + void handleFunctionBegin( + const Type* FType, + GlobalValue::LinkageTypes linkage + ) + { + } + + void handleFunctionEnd( + const Type* FType + ) + { + } + + void handleBasicBlockBegin( + unsigned blocknum + ) + { + } + + bool handleInstruction( + unsigned Opcode, + const Type* iType, + std::vector<unsigned>& Operands + ) + { + return false; + } + + void handleBasicBlockEnd(unsigned blocknum) + { + } + + void handleGlobalConstantsBegin() + { + } + + void handleConstantExpression( + unsigned Opcode, + const Type* Typ, + std::vector<std::pair<const Type*,unsigned> > ArgVec + ) + { + } + + void handleConstantValue( Constant * c ) + { + } + + void handleConstantArray( + const ArrayType* AT, + std::vector<unsigned>& Elements ) + { + } + + void handleConstantStruct( + const StructType* ST, + std::vector<unsigned>& ElementSlots) + { + } + + void handleConstantPointer( + const PointerType* PT, unsigned Slot) + { + } + + void handleConstantString( const ConstantArray* CA ) + { + } + + + void handleGlobalConstantsEnd() + { + } + +}; + +} + +void llvm::BytecodeAnalyzer::AnalyzeBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID +) +{ + AnalyzerHandler TheHandler; + AbstractBytecodeParser TheParser(&TheHandler); + TheParser.ParseBytecode( Buf, Length, ModuleID ); + TheParser.ParseAllFunctionBodies(); +} + +// vim: sw=2 diff --git a/lib/Bytecode/Reader/AnalyzerInternals.h b/lib/Bytecode/Reader/AnalyzerInternals.h new file mode 100644 index 0000000..d9a2e84 --- /dev/null +++ b/lib/Bytecode/Reader/AnalyzerInternals.h @@ -0,0 +1,65 @@ +//===-- ReaderInternals.h - Definitions internal to the reader --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines various stuff that is used by the bytecode reader. +// +//===----------------------------------------------------------------------===// + +#ifndef ANALYZER_INTERNALS_H +#define ANALYZER_INTERNALS_H + +#include "Parser.h" +#include "llvm/Bytecode/Analyzer.h" + +// Enable to trace to figure out what the heck is going on when parsing fails +//#define TRACE_LEVEL 10 +//#define DEBUG_OUTPUT + +#if TRACE_LEVEL // ByteCodeReading_TRACEr +#define BCR_TRACE(n, X) \ + if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X +#else +#define BCR_TRACE(n, X) +#endif + +namespace llvm { + +class BytecodeAnalyzer { + BytecodeAnalyzer(const BytecodeAnalyzer &); // DO NOT IMPLEMENT + void operator=(const BytecodeAnalyzer &); // DO NOT IMPLEMENT +public: + BytecodeAnalyzer() { } + ~BytecodeAnalyzer() { } + + void AnalyzeBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID + ); + + void DumpBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID + ); + + void dump() const { + std::cerr << "BytecodeParser instance!\n"; + } +private: + BytecodeAnalysis TheAnalysis; +}; + +} // End llvm namespace + +#endif + +// vim: sw=2 diff --git a/lib/Bytecode/Reader/AnalyzerWrappers.cpp b/lib/Bytecode/Reader/AnalyzerWrappers.cpp new file mode 100644 index 0000000..a0e4845 --- /dev/null +++ b/lib/Bytecode/Reader/AnalyzerWrappers.cpp @@ -0,0 +1,208 @@ +//===- AnalyzerWrappers.cpp - Analyze bytecode from file or buffer -------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements loading and analysis of a bytecode file and analyzing a +// bytecode buffer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bytecode/Analyzer.h" +#include "AnalyzerInternals.h" +#include "Support/FileUtilities.h" +#include "Support/StringExtras.h" +#include "Config/unistd.h" +#include <cerrno> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// BytecodeFileAnalyzer - Analyze from an mmap'able file descriptor. +// + +namespace { + /// BytecodeFileAnalyzer - parses a bytecode file from a file + class BytecodeFileAnalyzer : public BytecodeAnalyzer { + private: + unsigned char *Buffer; + unsigned Length; + + BytecodeFileAnalyzer(const BytecodeFileAnalyzer&); // Do not implement + void operator=(const BytecodeFileAnalyzer &BFR); // Do not implement + + public: + BytecodeFileAnalyzer(const std::string &Filename, BytecodeAnalysis& bca); + ~BytecodeFileAnalyzer(); + }; +} + +static std::string ErrnoMessage (int savedErrNum, std::string descr) { + return ::strerror(savedErrNum) + std::string(", while trying to ") + descr; +} + +BytecodeFileAnalyzer::BytecodeFileAnalyzer(const std::string &Filename, + BytecodeAnalysis& bca) { + Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length); + if (Buffer == 0) + throw "Error reading file '" + Filename + "'."; + + try { + // Parse the bytecode we mmapped in + if ( bca.dumpBytecode ) + DumpBytecode(Buffer, Length, bca, Filename); + AnalyzeBytecode(Buffer, Length, bca, Filename); + } catch (...) { + UnmapFileFromAddressSpace(Buffer, Length); + throw; + } +} + +BytecodeFileAnalyzer::~BytecodeFileAnalyzer() { + // Unmmap the bytecode... + UnmapFileFromAddressSpace(Buffer, Length); +} + +//===----------------------------------------------------------------------===// +// BytecodeBufferAnalyzer - Read from a memory buffer +// + +namespace { + /// BytecodeBufferAnalyzer - parses a bytecode file from a buffer + /// + class BytecodeBufferAnalyzer : public BytecodeAnalyzer { + private: + const unsigned char *Buffer; + bool MustDelete; + + BytecodeBufferAnalyzer(const BytecodeBufferAnalyzer&); // Do not implement + void operator=(const BytecodeBufferAnalyzer &BFR); // Do not implement + + public: + BytecodeBufferAnalyzer(const unsigned char *Buf, unsigned Length, + BytecodeAnalysis& bca, const std::string &ModuleID); + ~BytecodeBufferAnalyzer(); + + }; +} + +BytecodeBufferAnalyzer::BytecodeBufferAnalyzer(const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID) { + // If not aligned, allocate a new buffer to hold the bytecode... + const unsigned char *ParseBegin = 0; + if ((intptr_t)Buf & 3) { + Buffer = new unsigned char[Length+4]; + unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned + ParseBegin = Buffer + Offset; + memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over + MustDelete = true; + } else { + // If we don't need to copy it over, just use the caller's copy + ParseBegin = Buffer = Buf; + MustDelete = false; + } + try { + if ( bca.dumpBytecode ) + DumpBytecode(ParseBegin, Length, bca, ModuleID); + AnalyzeBytecode(ParseBegin, Length, bca, ModuleID); + } catch (...) { + if (MustDelete) delete [] Buffer; + throw; + } +} + +BytecodeBufferAnalyzer::~BytecodeBufferAnalyzer() { + if (MustDelete) delete [] Buffer; +} + +//===----------------------------------------------------------------------===// +// BytecodeStdinAnalyzer - Read bytecode from Standard Input +// + +namespace { + /// BytecodeStdinAnalyzer - parses a bytecode file from stdin + /// + class BytecodeStdinAnalyzer : public BytecodeAnalyzer { + private: + std::vector<unsigned char> FileData; + unsigned char *FileBuf; + + BytecodeStdinAnalyzer(const BytecodeStdinAnalyzer&); // Do not implement + void operator=(const BytecodeStdinAnalyzer &BFR); // Do not implement + + public: + BytecodeStdinAnalyzer(BytecodeAnalysis& bca); + }; +} + +BytecodeStdinAnalyzer::BytecodeStdinAnalyzer(BytecodeAnalysis& bca ) { + int BlockSize; + unsigned char Buffer[4096*4]; + + // Read in all of the data from stdin, we cannot mmap stdin... + while ((BlockSize = ::read(0 /*stdin*/, Buffer, 4096*4))) { + if (BlockSize == -1) + throw ErrnoMessage(errno, "read from standard input"); + + FileData.insert(FileData.end(), Buffer, Buffer+BlockSize); + } + + if (FileData.empty()) + throw std::string("Standard Input empty!"); + + FileBuf = &FileData[0]; + if (bca.dumpBytecode) + DumpBytecode(&FileData[0], FileData.size(), bca, "<stdin>"); + AnalyzeBytecode(FileBuf, FileData.size(), bca, "<stdin>"); +} + +//===----------------------------------------------------------------------===// +// Wrapper functions +//===----------------------------------------------------------------------===// + +// AnalyzeBytecodeFile - analyze one file +void llvm::AnalyzeBytecodeFile(const std::string &Filename, + BytecodeAnalysis& bca, + std::string *ErrorStr) +{ + try { + if ( Filename != "-" ) + BytecodeFileAnalyzer bfa(Filename,bca); + else + BytecodeStdinAnalyzer bsa(bca); + } catch (std::string &err) { + if (ErrorStr) *ErrorStr = err; + } +} + +// AnalyzeBytecodeBuffer - analyze a buffer +void llvm::AnalyzeBytecodeBuffer( + const unsigned char* Buffer, ///< Pointer to start of bytecode buffer + unsigned BufferSize, ///< Size of the bytecode buffer + BytecodeAnalysis& Results, ///< The results of the analysis + std::string* ErrorStr ///< Errors, if any. + ) +{ + try { + BytecodeBufferAnalyzer(Buffer, BufferSize, Results, "<buffer>" ); + } catch (std::string& err ) { + if ( ErrorStr) *ErrorStr = err; + } +} + + +/// This function prints the contents of rhe BytecodeAnalysis structure in +/// a human legible form. +/// @brief Print BytecodeAnalysis structure to an ostream +void llvm::PrintBytecodeAnalysis(BytecodeAnalysis& bca, std::ostream& Out ) +{ + Out << "Not Implemented Yet.\n"; +} + +// vim: sw=2 diff --git a/lib/Bytecode/Reader/Dumper.cpp b/lib/Bytecode/Reader/Dumper.cpp new file mode 100644 index 0000000..6ff4ea0 --- /dev/null +++ b/lib/Bytecode/Reader/Dumper.cpp @@ -0,0 +1,311 @@ +//===-- BytecodeDumper.cpp - Parsing Handler --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the BytecodeDumper class that gets called by the +// AbstractBytecodeParser when parsing events occur. It merely dumps the +// information presented to it from the parser. +// +//===----------------------------------------------------------------------===// + +#include "AnalyzerInternals.h" +#include "llvm/Constant.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instruction.h" +#include "llvm/Type.h" + +using namespace llvm; + +namespace { + +class BytecodeDumper : public llvm::BytecodeHandler { +public: + + virtual bool handleError(const std::string& str ) + { + std::cout << "ERROR: " << str << "\n"; + return true; + } + + virtual void handleStart() + { + std::cout << "Bytecode {\n"; + } + + virtual void handleFinish() + { + std::cout << "} End Bytecode\n"; + } + + virtual void handleModuleBegin(const std::string& id) + { + std::cout << " Module " << id << " {\n"; + } + + virtual void handleModuleEnd(const std::string& id) + { + std::cout << " } End Module " << id << "\n"; + } + + virtual void handleVersionInfo( + unsigned char RevisionNum, ///< Byte code revision number + Module::Endianness Endianness, ///< Endianness indicator + Module::PointerSize PointerSize ///< PointerSize indicator + ) + { + std::cout << " RevisionNum: " << int(RevisionNum) + << " Endianness: " << Endianness + << " PointerSize: " << PointerSize << "\n"; + } + + virtual void handleModuleGlobalsBegin() + { + std::cout << " BLOCK: ModuleGlobalInfo {\n"; + } + + virtual void handleGlobalVariable( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes Linkage ///< The linkage type of the GV + ) + { + std::cout << " GV: Uninitialized, " + << ( isConstant? "Constant, " : "Variable, ") + << " Linkage=" << Linkage << " Type=" + << ElemType->getDescription() << "\n"; + } + + virtual void handleInitializedGV( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes Linkage,///< The linkage type of the GV + unsigned initSlot ///< Slot number of GV's initializer + ) + { + std::cout << " GV: Initialized, " + << ( isConstant? "Constant, " : "Variable, ") + << " Linkage=" << Linkage << " Type=" + << ElemType->getDescription() + << " InitializerSlot=" << initSlot << "\n"; + } + + virtual void handleType( const Type* Ty ) + { + std::cout << " Type: " << Ty->getDescription() << "\n"; + } + + virtual void handleFunctionDeclaration( const Type* FuncType ) + { + std::cout << " Function: " << FuncType->getDescription() << "\n"; + } + + virtual void handleModuleGlobalsEnd() + { + std::cout << " } END BLOCK: ModuleGlobalInfo\n"; + } + + void handleCompactionTableBegin() + { + std::cout << " BLOCK: CompactionTable {\n"; + } + + virtual void handleCompactionTablePlane( unsigned Ty, unsigned NumEntries ) + { + std::cout << " Plane: Ty=" << Ty << " Size=" << NumEntries << "\n"; + } + + virtual void handleCompactionTableType( + unsigned i, + unsigned TypSlot, + const Type* Ty + ) + { + std::cout << " Type: " << i << " Slot:" << TypSlot + << " is " << Ty->getDescription() << "\n"; + } + + virtual void handleCompactionTableValue( + unsigned i, + unsigned ValSlot, + const Type* Ty + ) + { + std::cout << " Value: " << i << " Slot:" << ValSlot + << " is " << Ty->getDescription() << "\n"; + } + + virtual void handleCompactionTableEnd() + { + std::cout << " } END BLOCK: CompactionTable\n"; + } + + virtual void handleSymbolTableBegin() + { + std::cout << " BLOCK: SymbolTable {\n"; + } + + virtual void handleSymbolTablePlane( + unsigned Ty, + unsigned NumEntries, + const Type* Typ + ) + { + std::cout << " Plane: Ty=" << Ty << " Size=" << NumEntries + << " Type: " << Typ->getDescription() << "\n"; + } + + virtual void handleSymbolTableType( + unsigned i, + unsigned slot, + const std::string& name + ) + { + std::cout << " Type " << i << " Slot=" << slot + << " Name: " << name << "\n"; + } + + virtual void handleSymbolTableValue( + unsigned i, + unsigned slot, + const std::string& name + ) + { + std::cout << " Value " << i << " Slot=" << slot + << " Name: " << name << "\n"; + } + + virtual void handleSymbolTableEnd() + { + std::cout << " } END BLOCK: SymbolTable\n"; + } + + virtual void handleFunctionBegin( + const Type* FType, + GlobalValue::LinkageTypes linkage + ) + { + std::cout << " BLOCK: Function {\n"; + std::cout << " Linkage: " << linkage << "\n"; + std::cout << " Type: " << FType->getDescription() << "\n"; + } + + virtual void handleFunctionEnd( + const Type* FType + ) + { + std::cout << " } END BLOCK: Function\n"; + } + + virtual void handleBasicBlockBegin( + unsigned blocknum + ) + { + std::cout << " BLOCK: BasicBlock #" << blocknum << "{\n"; + } + + virtual bool handleInstruction( + unsigned Opcode, + const Type* iType, + std::vector<unsigned>& Operands + ) + { + std::cout << " INST: OpCode=" + << Instruction::getOpcodeName(Opcode) << " Type=" + << iType->getDescription() << "\n"; + for ( unsigned i = 0; i < Operands.size(); ++i ) + std::cout << " Op#" << i << " Slot=" << Operands[i] << "\n"; + + return Instruction::isTerminator(Opcode); + } + + virtual void handleBasicBlockEnd(unsigned blocknum) + { + std::cout << " } END BLOCK: BasicBlock #" << blocknum << "{\n"; + } + + virtual void handleGlobalConstantsBegin() + { + std::cout << " BLOCK: GlobalConstants {\n"; + } + + virtual void handleConstantExpression( + unsigned Opcode, + const Type* Typ, + std::vector<std::pair<const Type*,unsigned> > ArgVec + ) + { + std::cout << " EXPR: " << Instruction::getOpcodeName(Opcode) + << " Type=" << Typ->getDescription() << "\n"; + for ( unsigned i = 0; i < ArgVec.size(); ++i ) + std::cout << " Arg#" << i << " Type=" + << ArgVec[i].first->getDescription() << " Slot=" + << ArgVec[i].second << "\n"; + } + + virtual void handleConstantValue( Constant * c ) + { + std::cout << " VALUE: "; + c->print(std::cout); + std::cout << "\n"; + } + + virtual void handleConstantArray( + const ArrayType* AT, + std::vector<unsigned>& Elements ) + { + std::cout << " ARRAY: " << AT->getDescription() << "\n"; + for ( unsigned i = 0; i < Elements.size(); ++i ) + std::cout << " #" << i << " Slot=" << Elements[i] << "\n"; + } + + virtual void handleConstantStruct( + const StructType* ST, + std::vector<unsigned>& Elements) + { + std::cout << " STRUC: " << ST->getDescription() << "\n"; + for ( unsigned i = 0; i < Elements.size(); ++i ) + std::cout << " #" << i << " Slot=" << Elements[i] << "\n"; + } + + virtual void handleConstantPointer( + const PointerType* PT, unsigned Slot) + { + std::cout << " POINT: " << PT->getDescription() + << " Slot=" << Slot << "\n"; + } + + virtual void handleConstantString( const ConstantArray* CA ) + { + std::cout << " STRNG: "; + CA->print(std::cout); + std::cout << "\n"; + } + + virtual void handleGlobalConstantsEnd() + { + std::cout << " } END BLOCK: GlobalConstants\n"; + } +}; + +} + +void BytecodeAnalyzer::DumpBytecode( + const unsigned char *Buf, + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID + ) +{ + BytecodeDumper TheHandler; + AbstractBytecodeParser TheParser(&TheHandler); + TheParser.ParseBytecode( Buf, Length, ModuleID ); + TheParser.ParseAllFunctionBodies(); +} + +// vim: sw=2 diff --git a/lib/Bytecode/Reader/Parser.cpp b/lib/Bytecode/Reader/Parser.cpp new file mode 100644 index 0000000..d236b64 --- /dev/null +++ b/lib/Bytecode/Reader/Parser.cpp @@ -0,0 +1,877 @@ +//===- Reader.cpp - Code to read bytecode files ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library implements the functionality defined in llvm/Bytecode/Reader.h +// +// Note that this library should be as fast as possible, reentrant, and +// threadsafe!! +// +// TODO: Allow passing in an option to ignore the symbol table +// +//===----------------------------------------------------------------------===// + +#include "AnalyzerInternals.h" +#include "llvm/Module.h" +#include "llvm/Bytecode/Format.h" +#include "Support/StringExtras.h" +#include <iostream> +#include <sstream> + +using namespace llvm; + +#define PARSE_ERROR(inserters) \ + { \ + std::ostringstream errormsg; \ + errormsg << inserters; \ + if ( ! handler->handleError( errormsg.str() ) ) \ + throw std::string(errormsg.str()); \ + } + +const Type *AbstractBytecodeParser::getType(unsigned ID) { + //cerr << "Looking up Type ID: " << ID << "\n"; + + if (ID < Type::FirstDerivedTyID) + if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) + return T; // Asked for a primitive type... + + // Otherwise, derived types need offset... + ID -= Type::FirstDerivedTyID; + + if (!CompactionTypeTable.empty()) { + if (ID >= CompactionTypeTable.size()) + PARSE_ERROR("Type ID out of range for compaction table!"); + return CompactionTypeTable[ID]; + } + + // Is it a module-level type? + if (ID < ModuleTypes.size()) + return ModuleTypes[ID].get(); + + // Nope, is it a function-level type? + ID -= ModuleTypes.size(); + if (ID < FunctionTypes.size()) + return FunctionTypes[ID].get(); + + PARSE_ERROR("Illegal type reference!"); + return Type::VoidTy; +} + +bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, + std::vector<unsigned> &Operands) { + Operands.clear(); + unsigned iType = 0; + unsigned Opcode = 0; + unsigned Op = read(Buf, EndBuf); + + // bits Instruction format: Common to all formats + // -------------------------- + // 01-00: Opcode type, fixed to 1. + // 07-02: Opcode + Opcode = (Op >> 2) & 63; + Operands.resize((Op >> 0) & 03); + + switch (Operands.size()) { + case 1: + // bits Instruction format: + // -------------------------- + // 19-08: Resulting type plane + // 31-20: Operand #1 (if set to (2^12-1), then zero operands) + // + iType = (Op >> 8) & 4095; + Operands[0] = (Op >> 20) & 4095; + if (Operands[0] == 4095) // Handle special encoding for 0 operands... + Operands.resize(0); + break; + case 2: + // bits Instruction format: + // -------------------------- + // 15-08: Resulting type plane + // 23-16: Operand #1 + // 31-24: Operand #2 + // + iType = (Op >> 8) & 255; + Operands[0] = (Op >> 16) & 255; + Operands[1] = (Op >> 24) & 255; + break; + case 3: + // bits Instruction format: + // -------------------------- + // 13-08: Resulting type plane + // 19-14: Operand #1 + // 25-20: Operand #2 + // 31-26: Operand #3 + // + iType = (Op >> 8) & 63; + Operands[0] = (Op >> 14) & 63; + Operands[1] = (Op >> 20) & 63; + Operands[2] = (Op >> 26) & 63; + break; + case 0: + Buf -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(Buf, EndBuf); + Opcode >>= 2; + iType = read_vbr_uint(Buf, EndBuf); + + unsigned NumOperands = read_vbr_uint(Buf, EndBuf); + Operands.resize(NumOperands); + + if (NumOperands == 0) + PARSE_ERROR("Zero-argument instruction found; this is invalid."); + + for (unsigned i = 0; i != NumOperands; ++i) + Operands[i] = read_vbr_uint(Buf, EndBuf); + align32(Buf, EndBuf); + break; + } + + return handler->handleInstruction(Opcode, getType(iType), Operands); +} + +/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one +/// basicblock at a time. This method reads in one of the basicblock packets. +void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf, + BufPtr EndBuf, + unsigned BlockNo) { + handler->handleBasicBlockBegin( BlockNo ); + + std::vector<unsigned> Args; + bool is_terminating = false; + while (Buf < EndBuf) + is_terminating = ParseInstruction(Buf, EndBuf, Args); + + if ( ! is_terminating ) + PARSE_ERROR( + "Failed to recognize instruction as terminating at end of block"); + + handler->handleBasicBlockEnd( BlockNo ); +} + + +/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the +/// body of a function. In post 1.0 bytecode files, we no longer emit basic +/// block individually, in order to avoid per-basic-block overhead. +unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, BufPtr EndBuf) { + unsigned BlockNo = 0; + std::vector<unsigned> Args; + + while (Buf < EndBuf) { + handler->handleBasicBlockBegin( BlockNo ); + + // Read instructions into this basic block until we get to a terminator + bool is_terminating = false; + while (Buf < EndBuf && !is_terminating ) + is_terminating = ParseInstruction(Buf, EndBuf, Args ) ; + + if (!is_terminating) + PARSE_ERROR( "Non-terminated basic block found!"); + + handler->handleBasicBlockEnd( BlockNo ); + ++BlockNo; + } + return BlockNo; +} + +void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) { + handler->handleSymbolTableBegin(); + + while (Buf < EndBuf) { + // Symtab block header: [num entries][type id number] + unsigned NumEntries = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(Buf, EndBuf); + const Type *Ty = getType(Typ); + + handler->handleSymbolTablePlane( Typ, NumEntries, Ty ); + + for (unsigned i = 0; i != NumEntries; ++i) { + // Symtab entry: [def slot #][name] + unsigned slot = read_vbr_uint(Buf, EndBuf); + std::string Name = read_str(Buf, EndBuf); + + if (Typ == Type::TypeTyID) + handler->handleSymbolTableType( i, slot, Name ); + else + handler->handleSymbolTableValue( i, slot, Name ); + } + } + + if (Buf > EndBuf) + PARSE_ERROR("Tried to read past end of buffer while reading symbol table."); + + handler->handleSymbolTableEnd(); +} + +void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { + if (FunctionSignatureList.empty()) + throw std::string("FunctionSignatureList empty!"); + + const Type *FType = FunctionSignatureList.back(); + FunctionSignatureList.pop_back(); + + // Save the information for future reading of the function + LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf); + // Pretend we've `parsed' this function + Buf = EndBuf; +} + +void AbstractBytecodeParser::ParseNextFunction(Type* FType) { + // Find {start, end} pointers and slot in the map. If not there, we're done. + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(FType); + + // Make sure we found it + if ( Fi == LazyFunctionLoadMap.end() ) { + PARSE_ERROR("Unrecognized function of type " << FType->getDescription()); + return; + } + + BufPtr Buf = Fi->second.Buf; + BufPtr EndBuf = Fi->second.EndBuf; + assert(Fi->first == FType); + + LazyFunctionLoadMap.erase(Fi); + + this->ParseFunctionBody( FType, Buf, EndBuf ); +} + +void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, + BufPtr &Buf, BufPtr EndBuf ) { + + GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage; + + unsigned LinkageType = read_vbr_uint(Buf, EndBuf); + switch (LinkageType) { + case 0: Linkage = GlobalValue::ExternalLinkage; break; + case 1: Linkage = GlobalValue::WeakLinkage; break; + case 2: Linkage = GlobalValue::AppendingLinkage; break; + case 3: Linkage = GlobalValue::InternalLinkage; break; + case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + default: + PARSE_ERROR("Invalid linkage type for Function."); + Linkage = GlobalValue::InternalLinkage; + break; + } + + handler->handleFunctionBegin(FType,Linkage); + + // Keep track of how many basic blocks we have read in... + unsigned BlockNum = 0; + bool InsertedArguments = false; + + while (Buf < EndBuf) { + unsigned Type, Size; + BufPtr OldBuf = Buf; + readBlock(Buf, EndBuf, Type, Size); + + switch (Type) { + case BytecodeFormat::ConstantPool: + ParseConstantPool(Buf, Buf+Size, FunctionTypes ); + break; + + case BytecodeFormat::CompactionTable: + ParseCompactionTable(Buf, Buf+Size); + break; + + case BytecodeFormat::BasicBlock: + ParseBasicBlock(Buf, Buf+Size, BlockNum++); + break; + + case BytecodeFormat::InstructionList: + if (BlockNum) + PARSE_ERROR("InstructionList must come before basic blocks!"); + BlockNum = ParseInstructionList(Buf, Buf+Size); + break; + + case BytecodeFormat::SymbolTable: + ParseSymbolTable(Buf, Buf+Size ); + break; + + default: + Buf += Size; + if (OldBuf > Buf) + PARSE_ERROR("Wrapped around reading bytecode"); + break; + } + + // Malformed bc file if read past end of block. + align32(Buf, EndBuf); + } + + handler->handleFunctionEnd(FType); + + // Clear out function-level types... + FunctionTypes.clear(); + CompactionTypeTable.clear(); +} + +void AbstractBytecodeParser::ParseAllFunctionBodies() { + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin(); + LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end(); + + while ( Fi != Fe ) { + const Type* FType = Fi->first; + this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf); + } +} + +void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) { + + handler->handleCompactionTableBegin(); + + while (Buf != End) { + unsigned NumEntries = read_vbr_uint(Buf, End); + unsigned Ty; + + if ((NumEntries & 3) == 3) { + NumEntries >>= 2; + Ty = read_vbr_uint(Buf, End); + } else { + Ty = NumEntries >> 2; + NumEntries &= 3; + } + + handler->handleCompactionTablePlane( Ty, NumEntries ); + + if (Ty == Type::TypeTyID) { + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned TypeSlot = read_vbr_uint(Buf,End); + const Type *Typ = getGlobalTableType(TypeSlot); + handler->handleCompactionTableType( i, TypeSlot, Typ ); + } + } else { + const Type *Typ = getType(Ty); + // Push the implicit zero + for (unsigned i = 0; i != NumEntries; ++i) { + unsigned ValSlot = read_vbr_uint(Buf, End); + handler->handleCompactionTableValue( i, ValSlot, Typ ); + } + } + } + handler->handleCompactionTableEnd(); +} + +const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, + const unsigned char *EndBuf) { + unsigned PrimType = read_vbr_uint(Buf, EndBuf); + + const Type *Val = 0; + if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) + return Val; + + switch (PrimType) { + case Type::FunctionTyID: { + const Type *RetType = getType(read_vbr_uint(Buf, EndBuf)); + + unsigned NumParams = read_vbr_uint(Buf, EndBuf); + + std::vector<const Type*> Params; + while (NumParams--) + Params.push_back(getType(read_vbr_uint(Buf, EndBuf))); + + bool isVarArg = Params.size() && Params.back() == Type::VoidTy; + if (isVarArg) Params.pop_back(); + + Type* result = FunctionType::get(RetType, Params, isVarArg); + handler->handleType( result ); + return result; + } + case Type::ArrayTyID: { + unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + const Type *ElementType = getType(ElTyp); + + unsigned NumElements = read_vbr_uint(Buf, EndBuf); + + BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size=" + << NumElements << "\n"); + Type* result = ArrayType::get(ElementType, NumElements); + handler->handleType( result ); + return result; + } + case Type::StructTyID: { + std::vector<const Type*> Elements; + unsigned Typ = read_vbr_uint(Buf, EndBuf); + while (Typ) { // List is terminated by void/0 typeid + Elements.push_back(getType(Typ)); + Typ = read_vbr_uint(Buf, EndBuf); + } + + Type* result = StructType::get(Elements); + handler->handleType( result ); + return result; + } + case Type::PointerTyID: { + unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n"); + Type* result = PointerType::get(getType(ElTyp)); + handler->handleType( result ); + return result; + } + + case Type::OpaqueTyID: { + Type* result = OpaqueType::get(); + handler->handleType( result ); + return result; + } + + default: + PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n"); + return Val; + } +} + +// ParseTypeConstants - We have to use this weird code to handle recursive +// types. We know that recursive types will only reference the current slab of +// values in the type plane, but they can forward reference types before they +// have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might +// be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix +// this ugly problem, we pessimistically insert an opaque type for each type we +// are about to read. This means that forward references will resolve to +// something and when we reread the type later, we can replace the opaque type +// with a new resolved concrete type. +// +void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, + const unsigned char *EndBuf, + TypeListTy &Tab, + unsigned NumEntries) { + assert(Tab.size() == 0 && "should not have read type constants in before!"); + + // Insert a bunch of opaque types to be resolved later... + Tab.reserve(NumEntries); + for (unsigned i = 0; i != NumEntries; ++i) + Tab.push_back(OpaqueType::get()); + + // Loop through reading all of the types. Forward types will make use of the + // opaque types just inserted. + // + for (unsigned i = 0; i != NumEntries; ++i) { + const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get(); + if (NewTy == 0) throw std::string("Couldn't parse type!"); + BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy << + "' Replacing: " << OldTy << "\n"); + + // Don't insertValue the new type... instead we want to replace the opaque + // type with the new concrete value... + // + + // Refine the abstract type to the new type. This causes all uses of the + // abstract type to use NewTy. This also will cause the opaque type to be + // deleted... + // + cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy); + + // This should have replace the old opaque type with the new type in the + // value table... or with a preexisting type that was already in the system + assert(Tab[i] != OldTy && "refineAbstractType didn't work!"); + } + + BCR_TRACE(5, "Resulting types:\n"); + for (unsigned i = 0; i < NumEntries; ++i) { + BCR_TRACE(5, (void*)Tab[i].get() << " - " << Tab[i].get() << "\n"); + } +} + + +void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, + const unsigned char *EndBuf, + unsigned TypeID) { + + // We must check for a ConstantExpr before switching by type because + // a ConstantExpr can be of any type, and has no explicit value. + // + // 0 if not expr; numArgs if is expr + unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf); + + if (isExprNumArgs) { + unsigned Opcode = read_vbr_uint(Buf, EndBuf); + const Type* Typ = getType(TypeID); + + // FIXME: Encoding of constant exprs could be much more compact! + std::vector<std::pair<const Type*,unsigned> > ArgVec; + ArgVec.reserve(isExprNumArgs); + + // Read the slot number and types of each of the arguments + for (unsigned i = 0; i != isExprNumArgs; ++i) { + unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf); + unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf); + BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot) + << "' slot: " << ArgValSlot << "\n"); + + // Get the arg value from its slot if it exists, otherwise a placeholder + ArgVec.push_back(std::make_pair(getType(ArgTypeSlot), ArgValSlot)); + } + + handler->handleConstantExpression( Opcode, Typ, ArgVec ); + return; + } + + // Ok, not an ConstantExpr. We now know how to read the given type... + const Type *Ty = getType(TypeID); + switch (Ty->getPrimitiveID()) { + case Type::BoolTyID: { + unsigned Val = read_vbr_uint(Buf, EndBuf); + if (Val != 0 && Val != 1) + PARSE_ERROR("Invalid boolean value read."); + + handler->handleConstantValue( ConstantBool::get(Val == 1)); + break; + } + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: { + unsigned Val = read_vbr_uint(Buf, EndBuf); + if (!ConstantUInt::isValueValidForType(Ty, Val)) + throw std::string("Invalid unsigned byte/short/int read."); + handler->handleConstantValue( ConstantUInt::get(Ty, Val) ); + break; + } + + case Type::ULongTyID: { + handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) ); + break; + } + + case Type::SByteTyID: // Signed integer types... + case Type::ShortTyID: + case Type::IntTyID: { + case Type::LongTyID: + int64_t Val = read_vbr_int64(Buf, EndBuf); + if (!ConstantSInt::isValueValidForType(Ty, Val)) + throw std::string("Invalid signed byte/short/int/long read."); + handler->handleConstantValue( ConstantSInt::get(Ty, Val) ); + break; + } + + case Type::FloatTyID: { + float F; + input_data(Buf, EndBuf, &F, &F+1); + handler->handleConstantValue( ConstantFP::get(Ty, F) ); + break; + } + + case Type::DoubleTyID: { + double Val; + input_data(Buf, EndBuf, &Val, &Val+1); + handler->handleConstantValue( ConstantFP::get(Ty, Val) ); + break; + } + + case Type::TypeTyID: + PARSE_ERROR("Type constants shouldn't live in constant table!"); + break; + + case Type::ArrayTyID: { + const ArrayType *AT = cast<ArrayType>(Ty); + unsigned NumElements = AT->getNumElements(); + std::vector<unsigned> Elements; + Elements.reserve(NumElements); + while (NumElements--) // Read all of the elements of the constant. + Elements.push_back(read_vbr_uint(Buf, EndBuf)); + + handler->handleConstantArray( AT, Elements ); + break; + } + + case Type::StructTyID: { + const StructType *ST = cast<StructType>(Ty); + std::vector<unsigned> Elements; + Elements.reserve(ST->getNumElements()); + for (unsigned i = 0; i != ST->getNumElements(); ++i) + Elements.push_back(read_vbr_uint(Buf, EndBuf)); + + handler->handleConstantStruct( ST, Elements ); + } + + case Type::PointerTyID: { // ConstantPointerRef value... + const PointerType *PT = cast<PointerType>(Ty); + unsigned Slot = read_vbr_uint(Buf, EndBuf); + handler->handleConstantPointer( PT, Slot ); + } + + default: + PARSE_ERROR("Don't know how to deserialize constant value of type '"+ + Ty->getDescription()); + } +} + +void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf, + const unsigned char *EndBuf) { + ParseConstantPool(Buf, EndBuf, ModuleTypes); +} + +void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, + const unsigned char *EndBuf, + unsigned NumEntries ){ + for (; NumEntries; --NumEntries) { + unsigned Typ = read_vbr_uint(Buf, EndBuf); + const Type *Ty = getType(Typ); + if (!isa<ArrayType>(Ty)) + throw std::string("String constant data invalid!"); + + const ArrayType *ATy = cast<ArrayType>(Ty); + if (ATy->getElementType() != Type::SByteTy && + ATy->getElementType() != Type::UByteTy) + throw std::string("String constant data invalid!"); + + // Read character data. The type tells us how long the string is. + char Data[ATy->getNumElements()]; + input_data(Buf, EndBuf, Data, Data+ATy->getNumElements()); + + std::vector<Constant*> Elements(ATy->getNumElements()); + if (ATy->getElementType() == Type::SByteTy) + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]); + else + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]); + + // Create the constant, inserting it as needed. + ConstantArray *C = cast<ConstantArray>( ConstantArray::get(ATy, Elements) ); + handler->handleConstantString( C ); + } +} + + +void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf, + const unsigned char *EndBuf, + TypeListTy &TypeTab) { + while (Buf < EndBuf) { + unsigned NumEntries = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(Buf, EndBuf); + if (Typ == Type::TypeTyID) { + ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries); + } else if (Typ == Type::VoidTyID) { + ParseStringConstants(Buf, EndBuf, NumEntries); + } else { + BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: " + << NumEntries << "\n"); + + for (unsigned i = 0; i < NumEntries; ++i) { + ParseConstantValue(Buf, EndBuf, Typ); + } + } + } + + if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer."); +} + +void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { + + handler->handleModuleGlobalsBegin(); + + // Read global variables... + unsigned VarType = read_vbr_uint(Buf, End); + while (VarType != Type::VoidTyID) { // List is terminated by Void + // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 = + // Linkage, bit4+ = slot# + unsigned SlotNo = VarType >> 5; + unsigned LinkageID = (VarType >> 2) & 7; + bool isConstant = VarType & 1; + bool hasInitializer = VarType & 2; + GlobalValue::LinkageTypes Linkage; + + switch (LinkageID) { + case 0: Linkage = GlobalValue::ExternalLinkage; break; + case 1: Linkage = GlobalValue::WeakLinkage; break; + case 2: Linkage = GlobalValue::AppendingLinkage; break; + case 3: Linkage = GlobalValue::InternalLinkage; break; + case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + default: + PARSE_ERROR("Unknown linkage type: " << LinkageID); + Linkage = GlobalValue::InternalLinkage; + break; + } + + const Type *Ty = getType(SlotNo); + if ( !Ty ) { + PARSE_ERROR("Global has no type! SlotNo=" << SlotNo); + } + + if ( !isa<PointerType>(Ty)) { + PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription()); + } + + const Type *ElTy = cast<PointerType>(Ty)->getElementType(); + + // Create the global variable... + if (hasInitializer) + handler->handleGlobalVariable( ElTy, isConstant, Linkage ); + else { + unsigned initSlot = read_vbr_uint(Buf,End); + handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot ); + } + + // Get next item + VarType = read_vbr_uint(Buf, End); + } + + // Read the function objects for all of the functions that are coming + unsigned FnSignature = read_vbr_uint(Buf, End); + while (FnSignature != Type::VoidTyID) { // List is terminated by Void + const Type *Ty = getType(FnSignature); + if (!isa<PointerType>(Ty) || + !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) { + PARSE_ERROR( "Function not a pointer to function type! Ty = " + + Ty->getDescription()); + // FIXME: what should Ty be if handler continues? + } + + // We create functions by passing the underlying FunctionType to create... + Ty = cast<PointerType>(Ty)->getElementType(); + + // Save this for later so we know type of lazily instantiated functions + FunctionSignatureList.push_back(Ty); + + handler->handleFunctionDeclaration(Ty); + + // Get Next function signature + FnSignature = read_vbr_uint(Buf, End); + } + + if (hasInconsistentModuleGlobalInfo) + align32(Buf, End); + + // This is for future proofing... in the future extra fields may be added that + // we don't understand, so we transparently ignore them. + // + Buf = End; + + handler->handleModuleGlobalsEnd(); +} + +void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) { + unsigned Version = read_vbr_uint(Buf, EndBuf); + + // Unpack version number: low four bits are for flags, top bits = version + Module::Endianness Endianness; + Module::PointerSize PointerSize; + Endianness = (Version & 1) ? Module::BigEndian : Module::LittleEndian; + PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32; + + bool hasNoEndianness = Version & 4; + bool hasNoPointerSize = Version & 8; + + RevisionNum = Version >> 4; + + // Default values for the current bytecode version + hasInconsistentModuleGlobalInfo = false; + hasExplicitPrimitiveZeros = false; + hasRestrictedGEPTypes = false; + + switch (RevisionNum) { + case 0: // LLVM 1.0, 1.1 release version + // Base LLVM 1.0 bytecode format. + hasInconsistentModuleGlobalInfo = true; + hasExplicitPrimitiveZeros = true; + // FALL THROUGH + case 1: // LLVM 1.2 release version + // LLVM 1.2 added explicit support for emitting strings efficiently. + + // Also, it fixed the problem where the size of the ModuleGlobalInfo block + // included the size for the alignment at the end, where the rest of the + // blocks did not. + + // LLVM 1.2 and before required that GEP indices be ubyte constants for + // structures and longs for sequential types. + hasRestrictedGEPTypes = true; + + // FALL THROUGH + case 2: // LLVM 1.3 release version + break; + + default: + PARSE_ERROR("Unknown bytecode version number: " << RevisionNum); + } + + if (hasNoEndianness) Endianness = Module::AnyEndianness; + if (hasNoPointerSize) PointerSize = Module::AnyPointerSize; + + handler->handleVersionInfo(RevisionNum, Endianness, PointerSize ); +} + +void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) { + unsigned Type, Size; + readBlock(Buf, EndBuf, Type, Size); + if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) + // Hrm, not a class? + PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) << + ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf))); + + // Read into instance variables... + ParseVersionInfo(Buf, EndBuf); + align32(Buf, EndBuf); + + bool SeenModuleGlobalInfo = false; + bool SeenGlobalTypePlane = false; + while (Buf < EndBuf) { + BufPtr OldBuf = Buf; + readBlock(Buf, EndBuf, Type, Size); + + switch (Type) { + + case BytecodeFormat::GlobalTypePlane: + if ( SeenGlobalTypePlane ) + PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); + + ParseGlobalTypes(Buf, Buf+Size); + SeenGlobalTypePlane = true; + break; + + case BytecodeFormat::ModuleGlobalInfo: + if ( SeenModuleGlobalInfo ) + PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); + ParseModuleGlobalInfo(Buf, Buf+Size); + SeenModuleGlobalInfo = true; + break; + + case BytecodeFormat::ConstantPool: + ParseConstantPool(Buf, Buf+Size, ModuleTypes); + break; + + case BytecodeFormat::Function: + ParseFunctionLazily(Buf, Buf+Size); + break; + + case BytecodeFormat::SymbolTable: + ParseSymbolTable(Buf, Buf+Size ); + break; + + default: + Buf += Size; + if (OldBuf > Buf) + { + PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); + } + break; + } + align32(Buf, EndBuf); + } +} + +void AbstractBytecodeParser::ParseBytecode( + BufPtr Buf, unsigned Length, + const std::string &ModuleID) { + + handler->handleStart(); + unsigned char *EndBuf = (unsigned char*)(Buf + Length); + + // Read and check signature... + unsigned Sig = read(Buf, EndBuf); + if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { + PARSE_ERROR("Invalid bytecode signature: " << Sig); + } + + handler->handleModuleBegin(ModuleID); + + this->ParseModule(Buf, EndBuf); + + handler->handleModuleEnd(ModuleID); + + handler->handleFinish(); +} + +// vim: sw=2 diff --git a/lib/Bytecode/Reader/Parser.h b/lib/Bytecode/Reader/Parser.h new file mode 100644 index 0000000..027047b --- /dev/null +++ b/lib/Bytecode/Reader/Parser.h @@ -0,0 +1,178 @@ +//===-- Parser.h - Definitions internal to the reader -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Reid Spencer and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines the interface to the Bytecode Parser +// +//===----------------------------------------------------------------------===// + +#ifndef BYTECODE_PARSER_H +#define BYTECODE_PARSER_H + +#include "ReaderPrimitives.h" +#include "BytecodeHandler.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include <utility> +#include <vector> +#include <map> + +namespace llvm { + +struct LazyFunctionInfo { + const unsigned char *Buf, *EndBuf; + LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0) + : Buf(B), EndBuf(EB) {} +}; + +typedef std::map<const Type*, LazyFunctionInfo> LazyFunctionMap; + +class AbstractBytecodeParser { + AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT + void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT +public: + AbstractBytecodeParser( BytecodeHandler* h ) { handler = h; } + ~AbstractBytecodeParser() { } + + void ParseBytecode(const unsigned char *Buf, unsigned Length, + const std::string &ModuleID); + + void dump() const { + std::cerr << "AbstractBytecodeParser instance!\n"; + } + +private: + // Information about the module, extracted from the bytecode revision number. + unsigned char RevisionNum; // The rev # itself + + // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0) + + // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo + // block. This was fixed to be like all other blocks in 1.2 + bool hasInconsistentModuleGlobalInfo; + + // Revision #0 also explicitly encoded zero values for primitive types like + // int/sbyte/etc. + bool hasExplicitPrimitiveZeros; + + // Flags to control features specific the LLVM 1.2 and before (revision #1) + + // LLVM 1.2 and earlier required that getelementptr structure indices were + // ubyte constants and that sequential type indices were longs. + bool hasRestrictedGEPTypes; + + + /// CompactionTable - If a compaction table is active in the current function, + /// this is the mapping that it contains. + std::vector<Type*> CompactionTypeTable; + + // ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and + // forward references to constants. Such values may be referenced before they + // are defined, and if so, the temporary object that they represent is held + // here. + // + typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType; + ConstantRefsType ConstantFwdRefs; + + // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used + // to deal with forward references to types. + // + typedef std::vector<PATypeHolder> TypeListTy; + TypeListTy ModuleTypes; + TypeListTy FunctionTypes; + + // When the ModuleGlobalInfo section is read, we create a FunctionType object + // for each function in the module. When the function is loaded, this type is + // used to instantiate the actual function object. + std::vector<const Type*> FunctionSignatureList; + + // Constant values are read in after global variables. Because of this, we + // must defer setting the initializers on global variables until after module + // level constants have been read. In the mean time, this list keeps track of + // what we must do. + // + std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits; + + // For lazy reading-in of functions, we need to save away several pieces of + // information about each function: its begin and end pointer in the buffer + // and its FunctionSlot. + // + LazyFunctionMap LazyFunctionLoadMap; + + /// The handler for parsing + BytecodeHandler* handler; + +private: + const Type *AbstractBytecodeParser::getType(unsigned ID); + /// getGlobalTableType - This is just like getType, but when a compaction + /// table is in use, it is ignored. Also, no forward references or other + /// fancy features are supported. + const Type *getGlobalTableType(unsigned Slot) { + if (Slot < Type::FirstDerivedTyID) { + const Type *Ty = Type::getPrimitiveType((Type::PrimitiveID)Slot); + assert(Ty && "Not a primitive type ID?"); + return Ty; + } + Slot -= Type::FirstDerivedTyID; + if (Slot >= ModuleTypes.size()) + throw std::string("Illegal compaction table type reference!"); + return ModuleTypes[Slot]; + } + + unsigned getGlobalTableTypeSlot(const Type *Ty) { + if (Ty->isPrimitiveType()) + return Ty->getPrimitiveID(); + TypeListTy::iterator I = find(ModuleTypes.begin(), + ModuleTypes.end(), Ty); + if (I == ModuleTypes.end()) + throw std::string("Didn't find type in ModuleTypes."); + return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); + } + +public: + typedef const unsigned char* BufPtr; + void ParseModule (BufPtr &Buf, BufPtr End); + void ParseNextFunction (Type* FType) ; + void ParseAllFunctionBodies (); + +private: + void ParseVersionInfo (BufPtr &Buf, BufPtr End); + void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End); + void ParseSymbolTable (BufPtr &Buf, BufPtr End); + void ParseFunctionLazily (BufPtr &Buf, BufPtr End); + void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf); + void ParseCompactionTable (BufPtr &Buf, BufPtr End); + void ParseGlobalTypes (BufPtr &Buf, BufPtr End); + + void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo); + unsigned ParseInstructionList(BufPtr &Buf, BufPtr End); + + bool ParseInstruction (BufPtr &Buf, BufPtr End, + std::vector<unsigned>& Args); + + void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List); + void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID); + void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab, + unsigned NumEntries); + const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End); + void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries); + +}; + + +static inline void readBlock(const unsigned char *&Buf, + const unsigned char *EndBuf, + unsigned &Type, unsigned &Size) { + Type = read(Buf, EndBuf); + Size = read(Buf, EndBuf); +} + +} // End llvm namespace + +#endif +// vim: sw=2 |