aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorReid Spencer <rspencer@reidspencer.com>2004-11-06 23:17:23 +0000
committerReid Spencer <rspencer@reidspencer.com>2004-11-06 23:17:23 +0000
commit17f52c5c4617e6e1853fa7ac9335b277a90be7f4 (patch)
treed0afda75ef0a06af8201397ef86292e8b4480b7b
parent2334e6d908eccb00dcb5ef5f5a8b70a1b85525f1 (diff)
downloadexternal_llvm-17f52c5c4617e6e1853fa7ac9335b277a90be7f4.zip
external_llvm-17f52c5c4617e6e1853fa7ac9335b277a90be7f4.tar.gz
external_llvm-17f52c5c4617e6e1853fa7ac9335b277a90be7f4.tar.bz2
Add support for compressed bytecode
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@17535 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Bytecode/Writer.h3
-rw-r--r--lib/Bytecode/Reader/Reader.cpp43
-rw-r--r--lib/Bytecode/Reader/Reader.h17
-rw-r--r--lib/Bytecode/Writer/Writer.cpp97
-rw-r--r--tools/llvm-as/llvm-as.cpp5
5 files changed, 138 insertions, 27 deletions
diff --git a/include/llvm/Bytecode/Writer.h b/include/llvm/Bytecode/Writer.h
index ae762b4..4a6f5f0 100644
--- a/include/llvm/Bytecode/Writer.h
+++ b/include/llvm/Bytecode/Writer.h
@@ -28,7 +28,8 @@
namespace llvm {
class Module;
- void WriteBytecodeToFile(const Module *M, std::ostream &Out);
+ void WriteBytecodeToFile(const Module *M, std::ostream &Out,
+ bool compress = false);
} // End llvm namespace
#endif
diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp
index 3501d87..bedfa7e 100644
--- a/lib/Bytecode/Reader/Reader.cpp
+++ b/lib/Bytecode/Reader/Reader.cpp
@@ -24,6 +24,7 @@
#include "llvm/SymbolTable.h"
#include "llvm/Bytecode/Format.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/Compressor.h"
#include "llvm/ADT/StringExtras.h"
#include <sstream>
#include <algorithm>
@@ -2152,6 +2153,22 @@ void BytecodeReader::ParseModule() {
error("Function declared, but bytecode stream ended before definition");
}
+static unsigned GetUncompressionBuffer(char*&buff, unsigned& sz, void* ctxt){
+ BytecodeReader::BufferInfo* bi =
+ reinterpret_cast<BytecodeReader::BufferInfo*>(ctxt);
+ unsigned new_size = bi->size * 2;
+ if (bi->buff == 0 ) {
+ buff = bi->buff = (char*) malloc(new_size);
+ sz = new_size;
+ } else {
+ bi->buff = (char*) ::realloc(bi->buff, new_size);
+ buff = bi->buff + bi->size;
+ sz = bi->size;
+ }
+ bi->size = new_size;
+ return (bi->buff == 0 ? 1 : 0);
+}
+
/// This function completely parses a bytecode buffer given by the \p Buf
/// and \p Length parameters.
void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
@@ -2167,9 +2184,25 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
if (Handler) Handler->handleStart(TheModule, Length);
// Read and check signature...
- unsigned Sig = read_uint();
- if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
- error("Invalid bytecode signature: " + utostr(Sig));
+ bool compressed =
+ (Buf[0] == 0xEC && Buf[1] == 0xEC && Buf[2] == 0xF6 && Buf[3] == 0xED);
+
+ if (compressed) {
+ bi.size = Length * 2;;
+ // Bytecode is compressed, have to decompress it first.
+ unsigned uncompressedLength = Compressor::decompress((char*)Buf+4,Length-4,
+ GetUncompressionBuffer, (void*) &bi);
+
+ At = MemStart = BlockStart = Buf = (BufPtr) bi.buff;
+ MemEnd = BlockEnd = Buf + uncompressedLength;
+
+ } else {
+ if (!(Buf[0] == 'l' && Buf[1] == 'l' && Buf[2] == 'v' && Buf[3] == 'm'))
+ error("Invalid bytecode signature: " +
+ utohexstr(Buf[0]) + utohexstr(Buf[1]) + utohexstr(Buf[2]) +
+ utohexstr(Buf[3]));
+ else
+ At += 4; // skip the bytes
}
// Tell the handler we're starting a module
@@ -2215,6 +2248,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
freeState();
delete TheModule;
TheModule = 0;
+ if (bi.buff != 0 )
+ ::free(bi.buff);
throw;
} catch (...) {
std::string msg("Unknown Exception Occurred");
@@ -2222,6 +2257,8 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
freeState();
delete TheModule;
TheModule = 0;
+ if (bi.buff != 0 )
+ ::free(bi.buff);
throw msg;
}
}
diff --git a/lib/Bytecode/Reader/Reader.h b/lib/Bytecode/Reader/Reader.h
index 89d079d..49d8173 100644
--- a/lib/Bytecode/Reader/Reader.h
+++ b/lib/Bytecode/Reader/Reader.h
@@ -47,10 +47,14 @@ public:
BytecodeReader(
BytecodeHandler* h = 0
) {
- Handler = h;
+ Handler = h;
}
- ~BytecodeReader() { freeState(); }
+ ~BytecodeReader() {
+ freeState();
+ if (bi.buff != 0)
+ ::free(bi.buff);
+ }
/// @}
/// @name Types
@@ -63,6 +67,13 @@ public:
/// @brief The type used for a vector of potentially abstract types
typedef std::vector<PATypeHolder> TypeListTy;
+ /// @brief An internal buffer object used for handling decompression
+ struct BufferInfo {
+ char* buff;
+ unsigned size;
+ BufferInfo() { buff = 0; size = 0; }
+ };
+
/// This type provides a vector of Value* via the User class for
/// storage of Values that have been constructed when reading the
/// bytecode. Because of forward referencing, constant replacement
@@ -235,6 +246,8 @@ protected:
/// @name Data
/// @{
private:
+ BufferInfo bi; ///< Buffer info for decompression
+
BufPtr MemStart; ///< Start of the memory buffer
BufPtr MemEnd; ///< End of the memory buffer
BufPtr BlockStart; ///< Start of current block being parsed
diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp
index 4d988f9..15d6051 100644
--- a/lib/Bytecode/Writer/Writer.cpp
+++ b/lib/Bytecode/Writer/Writer.cpp
@@ -25,6 +25,7 @@
#include "llvm/Module.h"
#include "llvm/SymbolTable.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/Compressor.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include <cstring>
@@ -1085,36 +1086,92 @@ void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) {
}
}
-void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out) {
+struct CompressionContext {
+ char* chunk;
+ unsigned sz;
+ unsigned written;
+ std::ostream* Out;
+};
+
+static unsigned WriteCompressedData(char*&buffer, unsigned& size, void* context) {
+ CompressionContext* ctxt = reinterpret_cast<CompressionContext*>(context);
+ if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
+ ctxt->Out->write(ctxt->chunk,ctxt->sz);
+ delete [] ctxt->chunk;
+ ctxt->written += ctxt->sz;
+ }
+ size = ctxt->sz = 1024*1024;
+ buffer = ctxt->chunk = new char [ctxt->sz];
+ return (ctxt->chunk == 0 ? 1 : 0);
+}
+
+void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out,
+ bool compress ) {
assert(M && "You can't write a null module!!");
+ // Create a vector of unsigned char for the bytecode output. We
+ // reserve 256KBytes of space in the vector so that we avoid doing
+ // lots of little allocations. 256KBytes is sufficient for a large
+ // proportion of the bytecode files we will encounter. Larger files
+ // will be automatically doubled in size as needed (std::vector
+ // behavior).
std::vector<unsigned char> Buffer;
- Buffer.reserve(64 * 1024); // avoid lots of little reallocs
+ Buffer.reserve(256 * 1024);
- // This object populates buffer for us...
+ // The BytecodeWriter populates Buffer for us.
BytecodeWriter BCW(Buffer, M);
- // Keep track of how much we've written...
+ // Keep track of how much we've written
BytesWritten += Buffer.size();
- // Okay, write the deque out to the ostream now... the deque is not
- // sequential in memory, however, so write out as much as possible in big
- // chunks, until we're done.
- //
- for (std::vector<unsigned char>::const_iterator I = Buffer.begin(),
- E = Buffer.end(); I != E; ) {
- // Scan to see how big this chunk is...
- const unsigned char *ChunkPtr = &*I;
- const unsigned char *LastPtr = ChunkPtr;
- while (I != E) {
- const unsigned char *ThisPtr = &*++I;
- if (++LastPtr != ThisPtr) // Advanced by more than a byte of memory?
- break;
+ // Determine start and end points of the Buffer
+ std::vector<unsigned char>::iterator I = Buffer.begin();
+ const unsigned char *FirstByte = &(*I);
+ const unsigned char *LastByte = FirstByte + Buffer.size();
+
+ // If we're supposed to compress this mess ...
+ if (compress) {
+
+ // We signal compression by using an alternate magic number for the
+ // file. The compressed bytecode file's magic number is the same as
+ // the uncompressed one but with the high bits set. So, "llvm", which
+ // is 0x6C 0x6C 0x76 0x6D becomes 0xEC 0xEC 0xF6 0xED
+ unsigned char compressed_magic[4];
+ compressed_magic[0] = 0xEC; // 'l' + 0x80
+ compressed_magic[1] = 0xEC; // 'l' + 0x80
+ compressed_magic[2] = 0xF6; // 'v' + 0x80
+ compressed_magic[3] = 0xED; // 'm' + 0x80
+
+ Out.write((char*)compressed_magic,4);
+
+ // Do the compression, writing as we go.
+ CompressionContext ctxt;
+ ctxt.chunk = 0;
+ ctxt.sz = 0;
+ ctxt.written = 0;
+ ctxt.Out = &Out;
+
+ // Compress everything after the magic number (which we'll alter)
+ uint64_t zipSize = Compressor::compress(
+ (char*)(FirstByte+4), // Skip the magic number
+ Buffer.size()-4, // Skip the magic number
+ WriteCompressedData, // use this function to allocate / write
+ Compressor::COMP_TYPE_BZIP2, // Try bzip2 compression first
+ (void*)&ctxt // Keep track of allocated memory
+ );
+
+ if (ctxt.chunk && ctxt.sz > 0) {
+ Out.write(ctxt.chunk, zipSize - ctxt.written);
+ delete [] ctxt.chunk;
}
-
- // Write out the chunk...
- Out.write((char*)ChunkPtr, unsigned(LastPtr-ChunkPtr));
+ } else {
+
+ // We're not compressing, so just write the entire block.
+ Out.write((char*)FirstByte, LastByte-FirstByte);
+
}
+
+ // make sure it hits disk now
Out.flush();
}
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index 1d48f7a..edefe69 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -40,6 +40,9 @@ Force("f", cl::desc("Overwrite output files"));
static cl::opt<bool>
DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
+static cl::opt<bool> Compress("compress", cl::Optional,
+ cl::desc("Compress the generated bytecode"));
+
static cl::opt<bool>
DisableVerify("disable-verify", cl::Hidden,
cl::desc("Do not run verifier on input LLVM (dangerous!)"));
@@ -119,7 +122,7 @@ int main(int argc, char **argv) {
return 1;
}
- WriteBytecodeToFile(M.get(), *Out);
+ WriteBytecodeToFile(M.get(), *Out, Compress);
} catch (const ParseException &E) {
std::cerr << argv[0] << ": " << E.getMessage() << "\n";
return 1;