aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/BitCodeFormat.html40
-rw-r--r--include/llvm/Bitcode/BitstreamWriter.h14
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp52
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp75
-rw-r--r--lib/System/Path.cpp7
5 files changed, 179 insertions, 9 deletions
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html
index 4adf75e..ed9bd08 100644
--- a/docs/BitCodeFormat.html
+++ b/docs/BitCodeFormat.html
@@ -22,6 +22,8 @@
<li><a href="#stdblocks">Standard Blocks</a></li>
</ol>
</li>
+ <li><a href="#wrapper">Bitcode Wrapper Format</a>
+ </li>
<li><a href="#llvmir">LLVM IR Encoding</a>
<ol>
<li><a href="#basics">Basics</a></li>
@@ -65,8 +67,12 @@ Unlike XML, the bitstream format is a binary encoding, and unlike XML it
provides a mechanism for the file to self-describe "abbreviations", which are
effectively size optimizations for the content.</p>
-<p>This document first describes the LLVM bitstream format, then describes the
-record structure used by LLVM IR files.
+<p>LLVM IR files may be optionally embedded into a <a
+href="#wrapper">wrapper</a> structure that makes it easy to embed extra data
+along with LLVM IR files.</p>
+
+<p>This document first describes the LLVM bitstream format, describes the
+wrapper format, then describes the record structure used by LLVM IR files.
</p>
</div>
@@ -545,6 +551,36 @@ corresponding blocks. It is not safe to skip them.
</div>
<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="wrapper">Bitcode Wrapper Format</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper
+structure. This structure contains a simple header that indicates the offset
+and size of the embedded BC file. This allows additional information to be
+stored alongside the BC file. The structure of this file header is:
+</p>
+
+<p>
+<pre>
+[Magic<sub>32</sub>,
+ Version<sub>32</sub>,
+ Offset<sub>32</sub>,
+ Size<sub>32</sub>,
+ CPUType<sub>32</sub>]
+</pre></p>
+
+<p>Each of the fields are 32-bit fields stored in little endian form (as with
+the rest of the bitcode file fields). The Magic number is always
+<tt>0x0B17C0DE</tt> and the version is currently always <tt>0</tt>. The Offset
+field is the offset in bytes to the start of the bitcode stream in the file, and
+the Size field is a size in bytes of the stream. CPUType is a target-specific
+value that can be used to encode the CPU of the target.
+</div>
+
+
+<!-- *********************************************************************** -->
<div class="doc_section"> <a name="llvmir">LLVM IR Encoding</a></div>
<!-- *********************************************************************** -->
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 3b7e405..f76bb88 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -157,6 +157,15 @@ public:
Emit(Val, CurCodeSize);
}
+ // BackpatchWord - Backpatch a 32-bit word in the output with the specified
+ // value.
+ void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
+ Out[ByteNo++] = (unsigned char)(NewWord >> 0);
+ Out[ByteNo++] = (unsigned char)(NewWord >> 8);
+ Out[ByteNo++] = (unsigned char)(NewWord >> 16);
+ Out[ByteNo ] = (unsigned char)(NewWord >> 24);
+ }
+
//===--------------------------------------------------------------------===//
// Block Manipulation
//===--------------------------------------------------------------------===//
@@ -227,10 +236,7 @@ public:
unsigned ByteNo = B.StartSizeWord*4;
// Update the block size field in the header of this sub-block.
- Out[ByteNo++] = (unsigned char)(SizeInWords >> 0);
- Out[ByteNo++] = (unsigned char)(SizeInWords >> 8);
- Out[ByteNo++] = (unsigned char)(SizeInWords >> 16);
- Out[ByteNo++] = (unsigned char)(SizeInWords >> 24);
+ BackpatchWord(ByteNo, SizeInWords);
// Restore the inner block's code size and abbrev table.
CurCodeSize = B.PrevCodeSize;
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 3fc6b17..a8c62be 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1184,6 +1184,47 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
return Error("Premature end of bitstream");
}
+/// SkipWrapperHeader - Some systems wrap bc files with a special header for
+/// padding or other reasons. The format of this header is:
+///
+/// struct bc_header {
+/// uint32_t Magic; // 0x0B17C0DE
+/// uint32_t Version; // Version, currently always 0.
+/// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+/// uint32_t BitcodeSize; // Size of traditional bitcode file.
+/// ... potentially other gunk ...
+/// };
+///
+/// This function is called when we find a file with a matching magic number.
+/// In this case, skip down to the subsection of the file that is actually a BC
+/// file.
+static bool SkipWrapperHeader(unsigned char *&BufPtr, unsigned char *&BufEnd) {
+ enum {
+ KnownHeaderSize = 4*4, // Size of header we read.
+ OffsetField = 2*4, // Offset in bytes to Offset field.
+ SizeField = 3*4 // Offset in bytes to Size field.
+ };
+
+
+ // Must contain the header!
+ if (BufEnd-BufPtr < KnownHeaderSize) return true;
+
+ unsigned Offset = ( BufPtr[OffsetField ] |
+ (BufPtr[OffsetField+1] << 8) |
+ (BufPtr[OffsetField+2] << 16) |
+ (BufPtr[OffsetField+3] << 24));
+ unsigned Size = ( BufPtr[SizeField ] |
+ (BufPtr[SizeField +1] << 8) |
+ (BufPtr[SizeField +2] << 16) |
+ (BufPtr[SizeField +3] << 24));
+
+ // Verify that Offset+Size fits in the file.
+ if (Offset+Size > unsigned(BufEnd-BufPtr))
+ return true;
+ BufPtr += Offset;
+ BufEnd = BufPtr+Size;
+ return false;
+}
bool BitcodeReader::ParseBitcode() {
TheModule = 0;
@@ -1192,7 +1233,16 @@ bool BitcodeReader::ParseBitcode() {
return Error("Bitcode stream should be a multiple of 4 bytes in length");
unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
- Stream.init(BufPtr, BufPtr+Buffer->getBufferSize());
+ unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+ // If we have a wrapper header, parse it and ignore the non-bc file contents.
+ // The magic number is 0x0B17C0DE stored in little endian.
+ if (BufPtr != BufEnd && BufPtr[0] == 0xDE && BufPtr[1] == 0xC0 &&
+ BufPtr[2] == 0x17 && BufPtr[3] == 0x0B)
+ if (SkipWrapperHeader(BufPtr, BufEnd))
+ return Error("Invalid bitcode wrapper header");
+
+ Stream.init(BufPtr, BufEnd);
// Sniff for the signature.
if (Stream.Read(8) != 'B' ||
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 0030aca..9794fac 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1273,6 +1273,70 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
+/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a
+/// header and trailer to make it compatible with the system archiver. To do
+/// this we emit the following header, and then emit a trailer that pads the
+/// file out to be a multiple of 16 bytes.
+///
+/// struct bc_header {
+/// uint32_t Magic; // 0x0B17C0DE
+/// uint32_t Version; // Version, currently always 0.
+/// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+/// uint32_t BitcodeSize; // Size of traditional bitcode file.
+/// uint32_t CPUType; // CPU specifier.
+/// ... potentially more later ...
+/// };
+enum {
+ DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size.
+ DarwinBCHeaderSize = 5*4
+};
+
+static void EmitDarwinBCHeader(BitstreamWriter &Stream,
+ const std::string &TT) {
+ unsigned CPUType = ~0U;
+
+ // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*. The CPUType is a
+ // magic number from /usr/include/mach/machine.h. It is ok to reproduce the
+ // specific constants here because they are implicitly part of the Darwin ABI.
+ enum {
+ DARWIN_CPU_ARCH_ABI64 = 0x01000000,
+ DARWIN_CPU_TYPE_X86 = 7,
+ DARWIN_CPU_TYPE_POWERPC = 18
+ };
+
+ if (TT.find("x86_64-") == 0)
+ CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
+ else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
+ TT[4] == '-' && TT[1] - '3' < 6)
+ CPUType = DARWIN_CPU_TYPE_X86;
+ else if (TT.find("powerpc-") == 0)
+ CPUType = DARWIN_CPU_TYPE_POWERPC;
+ else if (TT.find("powerpc64-") == 0)
+ CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+
+ // Traditional Bitcode starts after header.
+ unsigned BCOffset = DarwinBCHeaderSize;
+
+ Stream.Emit(0x0B17C0DE, 32);
+ Stream.Emit(0 , 32); // Version.
+ Stream.Emit(BCOffset , 32);
+ Stream.Emit(0 , 32); // Filled in later.
+ Stream.Emit(CPUType , 32);
+}
+
+/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and
+/// finalize the header.
+static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
+ // Update the size field in the header.
+ Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
+
+ // If the file is not a multiple of 16 bytes, insert dummy padding.
+ while (BufferSize & 15) {
+ Stream.Emit(0, 8);
+ ++BufferSize;
+ }
+}
+
/// WriteBitcodeToFile - Write the specified module to the specified output
/// stream.
@@ -1282,6 +1346,11 @@ void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) {
Buffer.reserve(256*1024);
+ // If this is darwin, emit a file header and trailer if needed.
+ bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos;
+ if (isDarwin)
+ EmitDarwinBCHeader(Stream, M->getTargetTriple());
+
// Emit the file header.
Stream.Emit((unsigned)'B', 8);
Stream.Emit((unsigned)'C', 8);
@@ -1292,10 +1361,14 @@ void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) {
// Emit the module.
WriteModule(M, Stream);
+
+ if (isDarwin)
+ EmitDarwinBCTrailer(Stream, Buffer.size());
+
// If writing to stdout, set binary mode.
if (llvm::cout == Out)
- sys::Program::ChangeStdoutToBinary();
+ sys::Program::ChangeStdoutToBinary();
// Write the generated bitstream to "Out".
Out.write((char*)&Buffer.front(), Buffer.size());
diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp
index fbb6b66..88479fe 100644
--- a/lib/System/Path.cpp
+++ b/lib/System/Path.cpp
@@ -52,10 +52,15 @@ Path::GetLLVMConfigDir() {
}
LLVMFileType
-sys::IdentifyFileType(const char*magic, unsigned length) {
+sys::IdentifyFileType(const char *magic, unsigned length) {
assert(magic && "Invalid magic number string");
assert(length >=4 && "Invalid magic number length");
switch (magic[0]) {
+ case 0xDE: // 0x0B17C0DE = BC wraper
+ if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+ magic[3] == (char)0x0B)
+ return Bitcode_FileType;
+ break;
case 'B':
if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
return Bitcode_FileType;