5 files changed, 179 insertions, 9 deletions
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html
index 4adf75e..ed9bd08 100644
--- a/docs/BitCodeFormat.html
+++ b/docs/BitCodeFormat.html
@@ -22,6 +22,8 @@
     <li><a href="#stdblocks">Standard Blocks</a></li>
     </ol>
   </li>
+  <li><a href="#wrapper">Bitcode Wrapper Format</a>
+  </li>
   <li><a href="#llvmir">LLVM IR Encoding</a>
     <ol>
     <li><a href="#basics">Basics</a></li>
@@ -65,8 +67,12 @@ Unlike XML, the bitstream format is a binary encoding, and unlike XML it
 provides a mechanism for the file to self-describe "abbreviations", which are
 effectively size optimizations for the content.</p>
 
-<p>This document first describes the LLVM bitstream format, then describes the
-record structure used by LLVM IR files.
+<p>LLVM IR files may be optionally embedded into a <a 
+href="#wrapper">wrapper</a> structure that makes it easy to embed extra data
+along with LLVM IR files.</p>
+
+<p>This document first describes the LLVM bitstream format, describes the
+wrapper format, then describes the record structure used by LLVM IR files.
 </p>
 
 </div>
@@ -545,6 +551,36 @@ corresponding blocks.  It is not safe to skip them.
 </div>
 
 <!-- *********************************************************************** -->
+<div class="doc_section"> <a name="wrapper">Bitcode Wrapper Format</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper
+structure.  This structure contains a simple header that indicates the offset
+and size of the embedded BC file.  This allows additional information to be
+stored alongside the BC file.  The structure of this file header is:
+</p>
+
+<p>
+<pre>
+[Magic<sub>32</sub>,
+ Version<sub>32</sub>,
+ Offset<sub>32</sub>,
+ Size<sub>32</sub>,
+ CPUType<sub>32</sub>]
+</pre></p>
+
+<p>Each of the fields are 32-bit fields stored in little endian form (as with
+the rest of the bitcode file fields).  The Magic number is always
+<tt>0x0B17C0DE</tt> and the version is currently always <tt>0</tt>.  The Offset
+field is the offset in bytes to the start of the bitcode stream in the file, and
+the Size field is a size in bytes of the stream. CPUType is a target-specific
+value that can be used to encode the CPU of the target.
+</div>
+
+
+<!-- *********************************************************************** -->
 <div class="doc_section"> <a name="llvmir">LLVM IR Encoding</a></div>
 <!-- *********************************************************************** -->
 
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 3b7e405..f76bb88 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -157,6 +157,15 @@ public:
     Emit(Val, CurCodeSize);
   }
   
+  // BackpatchWord - Backpatch a 32-bit word in the output with the specified
+  // value.
+  void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
+    Out[ByteNo++] = (unsigned char)(NewWord >>  0);
+    Out[ByteNo++] = (unsigned char)(NewWord >>  8);
+    Out[ByteNo++] = (unsigned char)(NewWord >> 16);
+    Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
+  }
+  
   //===--------------------------------------------------------------------===//
   // Block Manipulation
   //===--------------------------------------------------------------------===//
@@ -227,10 +236,7 @@ public:
     unsigned ByteNo = B.StartSizeWord*4;
     
     // Update the block size field in the header of this sub-block.
-    Out[ByteNo++] = (unsigned char)(SizeInWords >>  0);
-    Out[ByteNo++] = (unsigned char)(SizeInWords >>  8);
-    Out[ByteNo++] = (unsigned char)(SizeInWords >> 16);
-    Out[ByteNo++] = (unsigned char)(SizeInWords >> 24);
+    BackpatchWord(ByteNo, SizeInWords);
     
     // Restore the inner block's code size and abbrev table.
     CurCodeSize = B.PrevCodeSize;
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 3fc6b17..a8c62be 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1184,6 +1184,47 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) {
   return Error("Premature end of bitstream");
 }
 
+/// SkipWrapperHeader - Some systems wrap bc files with a special header for
+/// padding or other reasons.  The format of this header is:
+///
+/// struct bc_header {
+///   uint32_t Magic;         // 0x0B17C0DE
+///   uint32_t Version;       // Version, currently always 0.
+///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
+///   ... potentially other gunk ...
+/// };
+/// 
+/// This function is called when we find a file with a matching magic number.
+/// In this case, skip down to the subsection of the file that is actually a BC
+/// file.
+static bool SkipWrapperHeader(unsigned char *&BufPtr, unsigned char *&BufEnd) {
+  enum {
+    KnownHeaderSize = 4*4,  // Size of header we read.
+    OffsetField = 2*4,      // Offset in bytes to Offset field.
+    SizeField = 3*4         // Offset in bytes to Size field.
+  };
+  
+  
+  // Must contain the header!
+  if (BufEnd-BufPtr < KnownHeaderSize) return true;
+  
+  unsigned Offset = ( BufPtr[OffsetField  ]        |
+                     (BufPtr[OffsetField+1] << 8)  |
+                     (BufPtr[OffsetField+2] << 16) |
+                     (BufPtr[OffsetField+3] << 24));
+  unsigned Size   = ( BufPtr[SizeField    ]        |
+                     (BufPtr[SizeField  +1] << 8)  |
+                     (BufPtr[SizeField  +2] << 16) |
+                     (BufPtr[SizeField  +3] << 24));
+  
+  // Verify that Offset+Size fits in the file.
+  if (Offset+Size > unsigned(BufEnd-BufPtr))
+    return true;
+  BufPtr += Offset;
+  BufEnd = BufPtr+Size;
+  return false;
+}
 
 bool BitcodeReader::ParseBitcode() {
   TheModule = 0;
@@ -1192,7 +1233,16 @@ bool BitcodeReader::ParseBitcode() {
     return Error("Bitcode stream should be a multiple of 4 bytes in length");
   
   unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
-  Stream.init(BufPtr, BufPtr+Buffer->getBufferSize());
+  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+  
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (BufPtr != BufEnd && BufPtr[0] == 0xDE && BufPtr[1] == 0xC0 && 
+      BufPtr[2] == 0x17 && BufPtr[3] == 0x0B)
+    if (SkipWrapperHeader(BufPtr, BufEnd))
+      return Error("Invalid bitcode wrapper header");
+  
+  Stream.init(BufPtr, BufEnd);
   
   // Sniff for the signature.
   if (Stream.Read(8) != 'B' ||
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 0030aca..9794fac 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1273,6 +1273,70 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   Stream.ExitBlock();
 }
 
+/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a
+/// header and trailer to make it compatible with the system archiver.  To do
+/// this we emit the following header, and then emit a trailer that pads the
+/// file out to be a multiple of 16 bytes.
+/// 
+/// struct bc_header {
+///   uint32_t Magic;         // 0x0B17C0DE
+///   uint32_t Version;       // Version, currently always 0.
+///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
+///   uint32_t CPUType;       // CPU specifier.
+///   ... potentially more later ...
+/// };
+enum {
+  DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size.
+  DarwinBCHeaderSize = 5*4
+};
+
+static void EmitDarwinBCHeader(BitstreamWriter &Stream,
+                               const std::string &TT) {
+  unsigned CPUType = ~0U;
+  
+  // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*.  The CPUType is a
+  // magic number from /usr/include/mach/machine.h.  It is ok to reproduce the
+  // specific constants here because they are implicitly part of the Darwin ABI.
+  enum {
+    DARWIN_CPU_ARCH_ABI64      = 0x01000000,
+    DARWIN_CPU_TYPE_X86        = 7,
+    DARWIN_CPU_TYPE_POWERPC    = 18
+  };
+  
+  if (TT.find("x86_64-") == 0)
+    CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
+  else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
+           TT[4] == '-' && TT[1] - '3' < 6)
+    CPUType = DARWIN_CPU_TYPE_X86;
+  else if (TT.find("powerpc-") == 0)
+    CPUType = DARWIN_CPU_TYPE_POWERPC;
+  else if (TT.find("powerpc64-") == 0)
+    CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+  
+  // Traditional Bitcode starts after header.
+  unsigned BCOffset = DarwinBCHeaderSize;
+  
+  Stream.Emit(0x0B17C0DE, 32);
+  Stream.Emit(0         , 32);  // Version.
+  Stream.Emit(BCOffset  , 32);
+  Stream.Emit(0         , 32);  // Filled in later.
+  Stream.Emit(CPUType   , 32);
+}
+
+/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and
+/// finalize the header.
+static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
+  // Update the size field in the header.
+  Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
+  
+  // If the file is not a multiple of 16 bytes, insert dummy padding.
+  while (BufferSize & 15) {
+    Stream.Emit(0, 8);
+    ++BufferSize;
+  }
+}
+
 
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
@@ -1282,6 +1346,11 @@ void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) {
   
   Buffer.reserve(256*1024);
   
+  // If this is darwin, emit a file header and trailer if needed.
+  bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos;
+  if (isDarwin)
+    EmitDarwinBCHeader(Stream, M->getTargetTriple());
+  
   // Emit the file header.
   Stream.Emit((unsigned)'B', 8);
   Stream.Emit((unsigned)'C', 8);
@@ -1292,10 +1361,14 @@ void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) {
 
   // Emit the module.
   WriteModule(M, Stream);
+
+  if (isDarwin)
+    EmitDarwinBCTrailer(Stream, Buffer.size());
+
   
   // If writing to stdout, set binary mode.
   if (llvm::cout == Out)
-      sys::Program::ChangeStdoutToBinary();
+    sys::Program::ChangeStdoutToBinary();
 
   // Write the generated bitstream to "Out".
   Out.write((char*)&Buffer.front(), Buffer.size());
diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp
index fbb6b66..88479fe 100644
--- a/lib/System/Path.cpp
+++ b/lib/System/Path.cpp
@@ -52,10 +52,15 @@ Path::GetLLVMConfigDir() {
 }
 
 LLVMFileType
-sys::IdentifyFileType(const char*magic, unsigned length) {
+sys::IdentifyFileType(const char *magic, unsigned length) {
   assert(magic && "Invalid magic number string");
   assert(length >=4 && "Invalid magic number length");
   switch (magic[0]) {
+    case 0xDE:  // 0x0B17C0DE = BC wraper
+      if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+          magic[3] == (char)0x0B)
+        return Bitcode_FileType;
+      break;
     case 'B':
       if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
         return Bitcode_FileType;