diff options
-rw-r--r-- | docs/BitCodeFormat.html | 40 | ||||
-rw-r--r-- | include/llvm/Bitcode/BitstreamWriter.h | 14 | ||||
-rw-r--r-- | lib/Bitcode/Reader/BitcodeReader.cpp | 52 | ||||
-rw-r--r-- | lib/Bitcode/Writer/BitcodeWriter.cpp | 75 | ||||
-rw-r--r-- | lib/System/Path.cpp | 7 |
5 files changed, 179 insertions, 9 deletions
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html index 4adf75e..ed9bd08 100644 --- a/docs/BitCodeFormat.html +++ b/docs/BitCodeFormat.html @@ -22,6 +22,8 @@ <li><a href="#stdblocks">Standard Blocks</a></li> </ol> </li> + <li><a href="#wrapper">Bitcode Wrapper Format</a> + </li> <li><a href="#llvmir">LLVM IR Encoding</a> <ol> <li><a href="#basics">Basics</a></li> @@ -65,8 +67,12 @@ Unlike XML, the bitstream format is a binary encoding, and unlike XML it provides a mechanism for the file to self-describe "abbreviations", which are effectively size optimizations for the content.</p> -<p>This document first describes the LLVM bitstream format, then describes the -record structure used by LLVM IR files. +<p>LLVM IR files may be optionally embedded into a <a +href="#wrapper">wrapper</a> structure that makes it easy to embed extra data +along with LLVM IR files.</p> + +<p>This document first describes the LLVM bitstream format, describes the +wrapper format, then describes the record structure used by LLVM IR files. </p> </div> @@ -545,6 +551,36 @@ corresponding blocks. It is not safe to skip them. </div> <!-- *********************************************************************** --> +<div class="doc_section"> <a name="wrapper">Bitcode Wrapper Format</a></div> +<!-- *********************************************************************** --> + +<div class="doc_text"> + +<p>Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper +structure. This structure contains a simple header that indicates the offset +and size of the embedded BC file. This allows additional information to be +stored alongside the BC file. The structure of this file header is: +</p> + +<p> +<pre> +[Magic<sub>32</sub>, + Version<sub>32</sub>, + Offset<sub>32</sub>, + Size<sub>32</sub>, + CPUType<sub>32</sub>] +</pre></p> + +<p>Each of the fields are 32-bit fields stored in little endian form (as with +the rest of the bitcode file fields). The Magic number is always +<tt>0x0B17C0DE</tt> and the version is currently always <tt>0</tt>. The Offset +field is the offset in bytes to the start of the bitcode stream in the file, and +the Size field is a size in bytes of the stream. CPUType is a target-specific +value that can be used to encode the CPU of the target. +</div> + + +<!-- *********************************************************************** --> <div class="doc_section"> <a name="llvmir">LLVM IR Encoding</a></div> <!-- *********************************************************************** --> diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h index 3b7e405..f76bb88 100644 --- a/include/llvm/Bitcode/BitstreamWriter.h +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -157,6 +157,15 @@ public: Emit(Val, CurCodeSize); } + // BackpatchWord - Backpatch a 32-bit word in the output with the specified + // value. + void BackpatchWord(unsigned ByteNo, unsigned NewWord) { + Out[ByteNo++] = (unsigned char)(NewWord >> 0); + Out[ByteNo++] = (unsigned char)(NewWord >> 8); + Out[ByteNo++] = (unsigned char)(NewWord >> 16); + Out[ByteNo ] = (unsigned char)(NewWord >> 24); + } + //===--------------------------------------------------------------------===// // Block Manipulation //===--------------------------------------------------------------------===// @@ -227,10 +236,7 @@ public: unsigned ByteNo = B.StartSizeWord*4; // Update the block size field in the header of this sub-block. - Out[ByteNo++] = (unsigned char)(SizeInWords >> 0); - Out[ByteNo++] = (unsigned char)(SizeInWords >> 8); - Out[ByteNo++] = (unsigned char)(SizeInWords >> 16); - Out[ByteNo++] = (unsigned char)(SizeInWords >> 24); + BackpatchWord(ByteNo, SizeInWords); // Restore the inner block's code size and abbrev table. CurCodeSize = B.PrevCodeSize; diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 3fc6b17..a8c62be 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1184,6 +1184,47 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { return Error("Premature end of bitstream"); } +/// SkipWrapperHeader - Some systems wrap bc files with a special header for +/// padding or other reasons. The format of this header is: +/// +/// struct bc_header { +/// uint32_t Magic; // 0x0B17C0DE +/// uint32_t Version; // Version, currently always 0. +/// uint32_t BitcodeOffset; // Offset to traditional bitcode file. +/// uint32_t BitcodeSize; // Size of traditional bitcode file. +/// ... potentially other gunk ... +/// }; +/// +/// This function is called when we find a file with a matching magic number. +/// In this case, skip down to the subsection of the file that is actually a BC +/// file. +static bool SkipWrapperHeader(unsigned char *&BufPtr, unsigned char *&BufEnd) { + enum { + KnownHeaderSize = 4*4, // Size of header we read. + OffsetField = 2*4, // Offset in bytes to Offset field. + SizeField = 3*4 // Offset in bytes to Size field. + }; + + + // Must contain the header! + if (BufEnd-BufPtr < KnownHeaderSize) return true; + + unsigned Offset = ( BufPtr[OffsetField ] | + (BufPtr[OffsetField+1] << 8) | + (BufPtr[OffsetField+2] << 16) | + (BufPtr[OffsetField+3] << 24)); + unsigned Size = ( BufPtr[SizeField ] | + (BufPtr[SizeField +1] << 8) | + (BufPtr[SizeField +2] << 16) | + (BufPtr[SizeField +3] << 24)); + + // Verify that Offset+Size fits in the file. + if (Offset+Size > unsigned(BufEnd-BufPtr)) + return true; + BufPtr += Offset; + BufEnd = BufPtr+Size; + return false; +} bool BitcodeReader::ParseBitcode() { TheModule = 0; @@ -1192,7 +1233,16 @@ bool BitcodeReader::ParseBitcode() { return Error("Bitcode stream should be a multiple of 4 bytes in length"); unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); - Stream.init(BufPtr, BufPtr+Buffer->getBufferSize()); + unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); + + // If we have a wrapper header, parse it and ignore the non-bc file contents. + // The magic number is 0x0B17C0DE stored in little endian. + if (BufPtr != BufEnd && BufPtr[0] == 0xDE && BufPtr[1] == 0xC0 && + BufPtr[2] == 0x17 && BufPtr[3] == 0x0B) + if (SkipWrapperHeader(BufPtr, BufEnd)) + return Error("Invalid bitcode wrapper header"); + + Stream.init(BufPtr, BufEnd); // Sniff for the signature. if (Stream.Read(8) != 'B' || diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 0030aca..9794fac 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1273,6 +1273,70 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { Stream.ExitBlock(); } +/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a +/// header and trailer to make it compatible with the system archiver. To do +/// this we emit the following header, and then emit a trailer that pads the +/// file out to be a multiple of 16 bytes. +/// +/// struct bc_header { +/// uint32_t Magic; // 0x0B17C0DE +/// uint32_t Version; // Version, currently always 0. +/// uint32_t BitcodeOffset; // Offset to traditional bitcode file. +/// uint32_t BitcodeSize; // Size of traditional bitcode file. +/// uint32_t CPUType; // CPU specifier. +/// ... potentially more later ... +/// }; +enum { + DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size. + DarwinBCHeaderSize = 5*4 +}; + +static void EmitDarwinBCHeader(BitstreamWriter &Stream, + const std::string &TT) { + unsigned CPUType = ~0U; + + // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*. The CPUType is a + // magic number from /usr/include/mach/machine.h. It is ok to reproduce the + // specific constants here because they are implicitly part of the Darwin ABI. + enum { + DARWIN_CPU_ARCH_ABI64 = 0x01000000, + DARWIN_CPU_TYPE_X86 = 7, + DARWIN_CPU_TYPE_POWERPC = 18 + }; + + if (TT.find("x86_64-") == 0) + CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64; + else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' && + TT[4] == '-' && TT[1] - '3' < 6) + CPUType = DARWIN_CPU_TYPE_X86; + else if (TT.find("powerpc-") == 0) + CPUType = DARWIN_CPU_TYPE_POWERPC; + else if (TT.find("powerpc64-") == 0) + CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64; + + // Traditional Bitcode starts after header. + unsigned BCOffset = DarwinBCHeaderSize; + + Stream.Emit(0x0B17C0DE, 32); + Stream.Emit(0 , 32); // Version. + Stream.Emit(BCOffset , 32); + Stream.Emit(0 , 32); // Filled in later. + Stream.Emit(CPUType , 32); +} + +/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and +/// finalize the header. +static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) { + // Update the size field in the header. + Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize); + + // If the file is not a multiple of 16 bytes, insert dummy padding. + while (BufferSize & 15) { + Stream.Emit(0, 8); + ++BufferSize; + } +} + /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. @@ -1282,6 +1346,11 @@ void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) { Buffer.reserve(256*1024); + // If this is darwin, emit a file header and trailer if needed. + bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos; + if (isDarwin) + EmitDarwinBCHeader(Stream, M->getTargetTriple()); + // Emit the file header. Stream.Emit((unsigned)'B', 8); Stream.Emit((unsigned)'C', 8); @@ -1292,10 +1361,14 @@ void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) { // Emit the module. WriteModule(M, Stream); + + if (isDarwin) + EmitDarwinBCTrailer(Stream, Buffer.size()); + // If writing to stdout, set binary mode. if (llvm::cout == Out) - sys::Program::ChangeStdoutToBinary(); + sys::Program::ChangeStdoutToBinary(); // Write the generated bitstream to "Out". Out.write((char*)&Buffer.front(), Buffer.size()); diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp index fbb6b66..88479fe 100644 --- a/lib/System/Path.cpp +++ b/lib/System/Path.cpp @@ -52,10 +52,15 @@ Path::GetLLVMConfigDir() { } LLVMFileType -sys::IdentifyFileType(const char*magic, unsigned length) { +sys::IdentifyFileType(const char *magic, unsigned length) { assert(magic && "Invalid magic number string"); assert(length >=4 && "Invalid magic number length"); switch (magic[0]) { + case 0xDE: // 0x0B17C0DE = BC wraper + if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 && + magic[3] == (char)0x0B) + return Bitcode_FileType; + break; case 'B': if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE) return Bitcode_FileType; |