diff options
author | Reid Spencer <rspencer@reidspencer.com> | 2004-11-06 23:17:23 +0000 |
---|---|---|
committer | Reid Spencer <rspencer@reidspencer.com> | 2004-11-06 23:17:23 +0000 |
commit | 17f52c5c4617e6e1853fa7ac9335b277a90be7f4 (patch) | |
tree | d0afda75ef0a06af8201397ef86292e8b4480b7b | |
parent | 2334e6d908eccb00dcb5ef5f5a8b70a1b85525f1 (diff) | |
download | external_llvm-17f52c5c4617e6e1853fa7ac9335b277a90be7f4.zip external_llvm-17f52c5c4617e6e1853fa7ac9335b277a90be7f4.tar.gz external_llvm-17f52c5c4617e6e1853fa7ac9335b277a90be7f4.tar.bz2 |
Add support for compressed bytecode
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@17535 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/Bytecode/Writer.h | 3 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Reader.cpp | 43 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Reader.h | 17 | ||||
-rw-r--r-- | lib/Bytecode/Writer/Writer.cpp | 97 | ||||
-rw-r--r-- | tools/llvm-as/llvm-as.cpp | 5 |
5 files changed, 138 insertions, 27 deletions
diff --git a/include/llvm/Bytecode/Writer.h b/include/llvm/Bytecode/Writer.h index ae762b4..4a6f5f0 100644 --- a/include/llvm/Bytecode/Writer.h +++ b/include/llvm/Bytecode/Writer.h @@ -28,7 +28,8 @@ namespace llvm { class Module; - void WriteBytecodeToFile(const Module *M, std::ostream &Out); + void WriteBytecodeToFile(const Module *M, std::ostream &Out, + bool compress = false); } // End llvm namespace #endif diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp index 3501d87..bedfa7e 100644 --- a/lib/Bytecode/Reader/Reader.cpp +++ b/lib/Bytecode/Reader/Reader.cpp @@ -24,6 +24,7 @@ #include "llvm/SymbolTable.h" #include "llvm/Bytecode/Format.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/Compressor.h" #include "llvm/ADT/StringExtras.h" #include <sstream> #include <algorithm> @@ -2152,6 +2153,22 @@ void BytecodeReader::ParseModule() { error("Function declared, but bytecode stream ended before definition"); } +static unsigned GetUncompressionBuffer(char*&buff, unsigned& sz, void* ctxt){ + BytecodeReader::BufferInfo* bi = + reinterpret_cast<BytecodeReader::BufferInfo*>(ctxt); + unsigned new_size = bi->size * 2; + if (bi->buff == 0 ) { + buff = bi->buff = (char*) malloc(new_size); + sz = new_size; + } else { + bi->buff = (char*) ::realloc(bi->buff, new_size); + buff = bi->buff + bi->size; + sz = bi->size; + } + bi->size = new_size; + return (bi->buff == 0 ? 1 : 0); +} + /// This function completely parses a bytecode buffer given by the \p Buf /// and \p Length parameters. void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length, @@ -2167,9 +2184,25 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length, if (Handler) Handler->handleStart(TheModule, Length); // Read and check signature... - unsigned Sig = read_uint(); - if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { - error("Invalid bytecode signature: " + utostr(Sig)); + bool compressed = + (Buf[0] == 0xEC && Buf[1] == 0xEC && Buf[2] == 0xF6 && Buf[3] == 0xED); + + if (compressed) { + bi.size = Length * 2;; + // Bytecode is compressed, have to decompress it first. + unsigned uncompressedLength = Compressor::decompress((char*)Buf+4,Length-4, + GetUncompressionBuffer, (void*) &bi); + + At = MemStart = BlockStart = Buf = (BufPtr) bi.buff; + MemEnd = BlockEnd = Buf + uncompressedLength; + + } else { + if (!(Buf[0] == 'l' && Buf[1] == 'l' && Buf[2] == 'v' && Buf[3] == 'm')) + error("Invalid bytecode signature: " + + utohexstr(Buf[0]) + utohexstr(Buf[1]) + utohexstr(Buf[2]) + + utohexstr(Buf[3])); + else + At += 4; // skip the bytes } // Tell the handler we're starting a module @@ -2215,6 +2248,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length, freeState(); delete TheModule; TheModule = 0; + if (bi.buff != 0 ) + ::free(bi.buff); throw; } catch (...) { std::string msg("Unknown Exception Occurred"); @@ -2222,6 +2257,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length, freeState(); delete TheModule; TheModule = 0; + if (bi.buff != 0 ) + ::free(bi.buff); throw msg; } } diff --git a/lib/Bytecode/Reader/Reader.h b/lib/Bytecode/Reader/Reader.h index 89d079d..49d8173 100644 --- a/lib/Bytecode/Reader/Reader.h +++ b/lib/Bytecode/Reader/Reader.h @@ -47,10 +47,14 @@ public: BytecodeReader( BytecodeHandler* h = 0 ) { - Handler = h; + Handler = h; } - ~BytecodeReader() { freeState(); } + ~BytecodeReader() { + freeState(); + if (bi.buff != 0) + ::free(bi.buff); + } /// @} /// @name Types @@ -63,6 +67,13 @@ public: /// @brief The type used for a vector of potentially abstract types typedef std::vector<PATypeHolder> TypeListTy; + /// @brief An internal buffer object used for handling decompression + struct BufferInfo { + char* buff; + unsigned size; + BufferInfo() { buff = 0; size = 0; } + }; + /// This type provides a vector of Value* via the User class for /// storage of Values that have been constructed when reading the /// bytecode. Because of forward referencing, constant replacement @@ -235,6 +246,8 @@ protected: /// @name Data /// @{ private: + BufferInfo bi; ///< Buffer info for decompression + BufPtr MemStart; ///< Start of the memory buffer BufPtr MemEnd; ///< End of the memory buffer BufPtr BlockStart; ///< Start of current block being parsed diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp index 4d988f9..15d6051 100644 --- a/lib/Bytecode/Writer/Writer.cpp +++ b/lib/Bytecode/Writer/Writer.cpp @@ -25,6 +25,7 @@ #include "llvm/Module.h" #include "llvm/SymbolTable.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/Compressor.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include <cstring> @@ -1085,36 +1086,92 @@ void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { } } -void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out) { +struct CompressionContext { + char* chunk; + unsigned sz; + unsigned written; + std::ostream* Out; +}; + +static unsigned WriteCompressedData(char*&buffer, unsigned& size, void* context) { + CompressionContext* ctxt = reinterpret_cast<CompressionContext*>(context); + if (ctxt->chunk != 0 && ctxt->sz > 0 ) { + ctxt->Out->write(ctxt->chunk,ctxt->sz); + delete [] ctxt->chunk; + ctxt->written += ctxt->sz; + } + size = ctxt->sz = 1024*1024; + buffer = ctxt->chunk = new char [ctxt->sz]; + return (ctxt->chunk == 0 ? 1 : 0); +} + +void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out, + bool compress ) { assert(M && "You can't write a null module!!"); + // Create a vector of unsigned char for the bytecode output. We + // reserve 256KBytes of space in the vector so that we avoid doing + // lots of little allocations. 256KBytes is sufficient for a large + // proportion of the bytecode files we will encounter. Larger files + // will be automatically doubled in size as needed (std::vector + // behavior). std::vector<unsigned char> Buffer; - Buffer.reserve(64 * 1024); // avoid lots of little reallocs + Buffer.reserve(256 * 1024); - // This object populates buffer for us... + // The BytecodeWriter populates Buffer for us. BytecodeWriter BCW(Buffer, M); - // Keep track of how much we've written... + // Keep track of how much we've written BytesWritten += Buffer.size(); - // Okay, write the deque out to the ostream now... the deque is not - // sequential in memory, however, so write out as much as possible in big - // chunks, until we're done. - // - for (std::vector<unsigned char>::const_iterator I = Buffer.begin(), - E = Buffer.end(); I != E; ) { - // Scan to see how big this chunk is... - const unsigned char *ChunkPtr = &*I; - const unsigned char *LastPtr = ChunkPtr; - while (I != E) { - const unsigned char *ThisPtr = &*++I; - if (++LastPtr != ThisPtr) // Advanced by more than a byte of memory? - break; + // Determine start and end points of the Buffer + std::vector<unsigned char>::iterator I = Buffer.begin(); + const unsigned char *FirstByte = &(*I); + const unsigned char *LastByte = FirstByte + Buffer.size(); + + // If we're supposed to compress this mess ... + if (compress) { + + // We signal compression by using an alternate magic number for the + // file. The compressed bytecode file's magic number is the same as + // the uncompressed one but with the high bits set. So, "llvm", which + // is 0x6C 0x6C 0x76 0x6D becomes 0xEC 0xEC 0xF6 0xED + unsigned char compressed_magic[4]; + compressed_magic[0] = 0xEC; // 'l' + 0x80 + compressed_magic[1] = 0xEC; // 'l' + 0x80 + compressed_magic[2] = 0xF6; // 'v' + 0x80 + compressed_magic[3] = 0xED; // 'm' + 0x80 + + Out.write((char*)compressed_magic,4); + + // Do the compression, writing as we go. + CompressionContext ctxt; + ctxt.chunk = 0; + ctxt.sz = 0; + ctxt.written = 0; + ctxt.Out = &Out; + + // Compress everything after the magic number (which we'll alter) + uint64_t zipSize = Compressor::compress( + (char*)(FirstByte+4), // Skip the magic number + Buffer.size()-4, // Skip the magic number + WriteCompressedData, // use this function to allocate / write + Compressor::COMP_TYPE_BZIP2, // Try bzip2 compression first + (void*)&ctxt // Keep track of allocated memory + ); + + if (ctxt.chunk && ctxt.sz > 0) { + Out.write(ctxt.chunk, zipSize - ctxt.written); + delete [] ctxt.chunk; } - - // Write out the chunk... - Out.write((char*)ChunkPtr, unsigned(LastPtr-ChunkPtr)); + } else { + + // We're not compressing, so just write the entire block. + Out.write((char*)FirstByte, LastByte-FirstByte); + } + + // make sure it hits disk now Out.flush(); } diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp index 1d48f7a..edefe69 100644 --- a/tools/llvm-as/llvm-as.cpp +++ b/tools/llvm-as/llvm-as.cpp @@ -40,6 +40,9 @@ Force("f", cl::desc("Overwrite output files")); static cl::opt<bool> DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden); +static cl::opt<bool> Compress("compress", cl::Optional, + cl::desc("Compress the generated bytecode")); + static cl::opt<bool> DisableVerify("disable-verify", cl::Hidden, cl::desc("Do not run verifier on input LLVM (dangerous!)")); @@ -119,7 +122,7 @@ int main(int argc, char **argv) { return 1; } - WriteBytecodeToFile(M.get(), *Out); + WriteBytecodeToFile(M.get(), *Out, Compress); } catch (const ParseException &E) { std::cerr << argv[0] << ": " << E.getMessage() << "\n"; return 1; |