aboutsummaryrefslogtreecommitdiffstats
path: root/docs
diff options
context:
space:
mode:
authorDan Gohman <djg@cray.com>2007-07-18 16:29:46 +0000
committerDan Gohman <djg@cray.com>2007-07-18 16:29:46 +0000
commitf17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc (patch)
treeebb79ea1ee5e3bc1fdf38541a811a8b804f0679a /docs
downloadexternal_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.zip
external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.gz
external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.bz2
It's not necessary to do rounding for alloca operations when the requested
alignment is equal to the stack alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'docs')
-rw-r--r--docs/AliasAnalysis.html965
-rw-r--r--docs/BitCodeFormat.html614
-rw-r--r--docs/Bugpoint.html244
-rw-r--r--docs/CFEBuildInstrs.html97
-rw-r--r--docs/CodeGenerator.html1962
-rw-r--r--docs/CodingStandards.html769
-rw-r--r--docs/CommandGuide/Makefile101
-rw-r--r--docs/CommandGuide/bugpoint.pod134
-rw-r--r--docs/CommandGuide/html/manpage.css256
-rw-r--r--docs/CommandGuide/index.html160
-rw-r--r--docs/CommandGuide/llc.pod191
-rw-r--r--docs/CommandGuide/lli.pod97
-rw-r--r--docs/CommandGuide/llvm-ar.pod406
-rw-r--r--docs/CommandGuide/llvm-as.pod77
-rw-r--r--docs/CommandGuide/llvm-bcanalyzer.pod315
-rw-r--r--docs/CommandGuide/llvm-config.pod131
-rw-r--r--docs/CommandGuide/llvm-db.pod16
-rw-r--r--docs/CommandGuide/llvm-dis.pod60
-rw-r--r--docs/CommandGuide/llvm-extract.pod63
-rw-r--r--docs/CommandGuide/llvm-ld.pod269
-rw-r--r--docs/CommandGuide/llvm-link.pod74
-rw-r--r--docs/CommandGuide/llvm-nm.pod122
-rw-r--r--docs/CommandGuide/llvm-prof.pod57
-rw-r--r--docs/CommandGuide/llvm-ranlib.pod52
-rw-r--r--docs/CommandGuide/llvm-upgrade.pod66
-rw-r--r--docs/CommandGuide/llvm2cpp.pod217
-rw-r--r--docs/CommandGuide/llvmc.pod431
-rw-r--r--docs/CommandGuide/llvmgcc.pod85
-rw-r--r--docs/CommandGuide/llvmgxx.pod85
-rw-r--r--docs/CommandGuide/manpage.css256
-rw-r--r--docs/CommandGuide/opt.pod138
-rw-r--r--docs/CommandGuide/stkrc.pod96
-rw-r--r--docs/CommandGuide/tblgen.pod115
-rw-r--r--docs/CommandLine.html1938
-rw-r--r--docs/CompilerDriver.html823
-rw-r--r--docs/CompilerWriterInfo.html261
-rw-r--r--docs/DeveloperPolicy.html504
-rw-r--r--docs/ExceptionHandling.html439
-rw-r--r--docs/ExtendingLLVM.html392
-rw-r--r--docs/FAQ.html713
-rw-r--r--docs/GarbageCollection.html534
-rw-r--r--docs/GetElementPtr.html311
-rw-r--r--docs/GettingStarted.html1640
-rw-r--r--docs/GettingStartedVS.html354
-rw-r--r--docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt74
-rw-r--r--docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt199
-rw-r--r--docs/HistoricalNotes/2000-12-06-EncodingIdea.txt30
-rw-r--r--docs/HistoricalNotes/2000-12-06-MeetingSummary.txt83
-rw-r--r--docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt39
-rw-r--r--docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt67
-rw-r--r--docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt75
-rw-r--r--docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt53
-rw-r--r--docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt89
-rw-r--r--docs/HistoricalNotes/2001-02-09-AdveComments.txt120
-rw-r--r--docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt245
-rw-r--r--docs/HistoricalNotes/2001-02-13-Reference-Memory.txt39
-rw-r--r--docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt47
-rw-r--r--docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt49
-rw-r--r--docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt202
-rw-r--r--docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt45
-rw-r--r--docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt63
-rw-r--r--docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt71
-rw-r--r--docs/HistoricalNotes/2001-06-20-.NET-Differences.txt30
-rw-r--r--docs/HistoricalNotes/2001-07-06-LoweringIRForCodeGen.txt31
-rw-r--r--docs/HistoricalNotes/2001-07-08-InstructionSelection.txt51
-rw-r--r--docs/HistoricalNotes/2001-07-08-InstructionSelection2.txt25
-rw-r--r--docs/HistoricalNotes/2001-09-18-OptimizeExceptions.txt56
-rw-r--r--docs/HistoricalNotes/2002-05-12-InstListChange.txt55
-rw-r--r--docs/HistoricalNotes/2002-06-25-MegaPatchInfo.txt72
-rw-r--r--docs/HistoricalNotes/2003-01-23-CygwinNotes.txt28
-rw-r--r--docs/HistoricalNotes/2003-06-25-Reoptimizer1.txt137
-rw-r--r--docs/HistoricalNotes/2003-06-26-Reoptimizer2.txt110
-rw-r--r--docs/HowToReleaseLLVM.html526
-rw-r--r--docs/HowToSubmitABug.html355
-rw-r--r--docs/LangRef.html4882
-rw-r--r--docs/Lexicon.html181
-rw-r--r--docs/LinkTimeOptimization.html392
-rw-r--r--docs/Makefile91
-rw-r--r--docs/MakefileGuide.html1046
-rw-r--r--docs/Passes.html1026
-rw-r--r--docs/ProgrammersManual.html3090
-rw-r--r--docs/Projects.html460
-rw-r--r--docs/ReleaseNotes.html861
-rw-r--r--docs/SourceLevelDebugging.html1782
-rw-r--r--docs/Stacker.html1418
-rw-r--r--docs/SystemLibrary.html344
-rw-r--r--docs/TableGenFundamentals.html646
-rw-r--r--docs/TestingGuide.html813
-rw-r--r--docs/UsingLibraries.html449
-rw-r--r--docs/WritingAnLLVMBackend.html260
-rw-r--r--docs/WritingAnLLVMPass.html1817
-rw-r--r--docs/doxygen.cfg.in1230
-rw-r--r--docs/doxygen.css378
-rw-r--r--docs/doxygen.footer13
-rw-r--r--docs/doxygen.header9
-rw-r--r--docs/doxygen.intro18
-rw-r--r--docs/img/Debugging.gifbin0 -> 20390 bytes
-rw-r--r--docs/img/libdeps.gifbin0 -> 52679 bytes
-rw-r--r--docs/img/lines.gifbin0 -> 91 bytes
-rw-r--r--docs/img/objdeps.gifbin0 -> 16201 bytes
-rw-r--r--docs/img/venusflytrap.jpgbin0 -> 56606 bytes
-rw-r--r--docs/index.html273
-rw-r--r--docs/llvm.css84
103 files changed, 41259 insertions, 0 deletions
diff --git a/docs/AliasAnalysis.html b/docs/AliasAnalysis.html
new file mode 100644
index 0000000..748b3cb
--- /dev/null
+++ b/docs/AliasAnalysis.html
@@ -0,0 +1,965 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM Alias Analysis Infrastructure</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ LLVM Alias Analysis Infrastructure
+</div>
+
+<ol>
+ <li><a href="#introduction">Introduction</a></li>
+
+ <li><a href="#overview"><tt>AliasAnalysis</tt> Class Overview</a>
+ <ul>
+ <li><a href="#pointers">Representation of Pointers</a></li>
+ <li><a href="#alias">The <tt>alias</tt> method</a></li>
+ <li><a href="#ModRefInfo">The <tt>getModRefInfo</tt> methods</a></li>
+ <li><a href="#OtherItfs">Other useful <tt>AliasAnalysis</tt> methods</a></li>
+ </ul>
+ </li>
+
+ <li><a href="#writingnew">Writing a new <tt>AliasAnalysis</tt> Implementation</a>
+ <ul>
+ <li><a href="#passsubclasses">Different Pass styles</a></li>
+ <li><a href="#requiredcalls">Required initialization calls</a></li>
+ <li><a href="#interfaces">Interfaces which may be specified</a></li>
+ <li><a href="#chaining"><tt>AliasAnalysis</tt> chaining behavior</a></li>
+ <li><a href="#updating">Updating analysis results for transformations</a></li>
+ <li><a href="#implefficiency">Efficiency Issues</a></li>
+ </ul>
+ </li>
+
+ <li><a href="#using">Using alias analysis results</a>
+ <ul>
+ <li><a href="#loadvn">Using the <tt>-load-vn</tt> Pass</a></li>
+ <li><a href="#ast">Using the <tt>AliasSetTracker</tt> class</a></li>
+ <li><a href="#direct">Using the <tt>AliasAnalysis</tt> interface directly</a></li>
+ </ul>
+ </li>
+
+ <li><a href="#exist">Existing alias analysis implementations and clients</a>
+ <ul>
+ <li><a href="#impls">Available <tt>AliasAnalysis</tt> implementations</a></li>
+ <li><a href="#aliasanalysis-xforms">Alias analysis driven transformations</a></li>
+ <li><a href="#aliasanalysis-debug">Clients for debugging and evaluation of
+ implementations</a></li>
+ </ul>
+ </li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="introduction">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Alias Analysis (aka Pointer Analysis) is a class of techniques which attempt
+to determine whether or not two pointers ever can point to the same object in
+memory. There are many different algorithms for alias analysis and many
+different ways of classifying them: flow-sensitive vs flow-insensitive,
+context-sensitive vs context-insensitive, field-sensitive vs field-insensitive,
+unification-based vs subset-based, etc. Traditionally, alias analyses respond
+to a query with a <a href="#MustMayNo">Must, May, or No</a> alias response,
+indicating that two pointers always point to the same object, might point to the
+same object, or are known to never point to the same object.</p>
+
+<p>The LLVM <a
+href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a>
+class is the primary interface used by clients and implementations of alias
+analyses in the LLVM system. This class is the common interface between clients
+of alias analysis information and the implementations providing it, and is
+designed to support a wide range of implementations and clients (but currently
+all clients are assumed to be flow-insensitive). In addition to simple alias
+analysis information, this class exposes Mod/Ref information from those
+implementations which can provide it, allowing for powerful analyses and
+transformations to work well together.</p>
+
+<p>This document contains information necessary to successfully implement this
+interface, use it, and to test both sides. It also explains some of the finer
+points about what exactly results mean. If you feel that something is unclear
+or should be added, please <a href="mailto:sabre@nondot.org">let me
+know</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="overview"><tt>AliasAnalysis</tt> Class Overview</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The <a
+href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a>
+class defines the interface that the various alias analysis implementations
+should support. This class exports two important enums: <tt>AliasResult</tt>
+and <tt>ModRefResult</tt> which represent the result of an alias query or a
+mod/ref query, respectively.</p>
+
+<p>The <tt>AliasAnalysis</tt> interface exposes information about memory,
+represented in several different ways. In particular, memory objects are
+represented as a starting address and size, and function calls are represented
+as the actual <tt>call</tt> or <tt>invoke</tt> instructions that performs the
+call. The <tt>AliasAnalysis</tt> interface also exposes some helper methods
+which allow you to get mod/ref information for arbitrary instructions.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="pointers">Representation of Pointers</a>
+</div>
+
+<div class="doc_text">
+
+<p>Most importantly, the <tt>AliasAnalysis</tt> class provides several methods
+which are used to query whether or not two memory objects alias, whether
+function calls can modify or read a memory object, etc. For all of these
+queries, memory objects are represented as a pair of their starting address (a
+symbolic LLVM <tt>Value*</tt>) and a static size.</p>
+
+<p>Representing memory objects as a starting address and a size is critically
+important for correct Alias Analyses. For example, consider this (silly, but
+possible) C code:</p>
+
+<div class="doc_code">
+<pre>
+int i;
+char C[2];
+char A[10];
+/* ... */
+for (i = 0; i != 10; ++i) {
+ C[0] = A[i]; /* One byte store */
+ C[1] = A[9-i]; /* One byte store */
+}
+</pre>
+</div>
+
+<p>In this case, the <tt>basicaa</tt> pass will disambiguate the stores to
+<tt>C[0]</tt> and <tt>C[1]</tt> because they are accesses to two distinct
+locations one byte apart, and the accesses are each one byte. In this case, the
+LICM pass can use store motion to remove the stores from the loop. In
+constrast, the following code:</p>
+
+<div class="doc_code">
+<pre>
+int i;
+char C[2];
+char A[10];
+/* ... */
+for (i = 0; i != 10; ++i) {
+ ((short*)C)[0] = A[i]; /* Two byte store! */
+ C[1] = A[9-i]; /* One byte store */
+}
+</pre>
+</div>
+
+<p>In this case, the two stores to C do alias each other, because the access to
+the <tt>&amp;C[0]</tt> element is a two byte access. If size information wasn't
+available in the query, even the first case would have to conservatively assume
+that the accesses alias.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="alias">The <tt>alias</tt> method</a>
+</div>
+
+<div class="doc_text">
+The <tt>alias</tt> method is the primary interface used to determine whether or
+not two memory objects alias each other. It takes two memory objects as input
+and returns MustAlias, MayAlias, or NoAlias as appropriate.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="MustMayNo">Must, May, and No Alias Responses</a>
+</div>
+
+<div class="doc_text">
+
+<p>An Alias Analysis implementation can return one of three responses:
+MustAlias, MayAlias, and NoAlias. The No and May alias results are obvious: if
+the two pointers can never equal each other, return NoAlias, if they might,
+return MayAlias.</p>
+
+<p>The MustAlias response is trickier though. In LLVM, the Must Alias response
+may only be returned if the two memory objects are guaranteed to always start at
+exactly the same location. If two memory objects overlap, but do not start at
+the same location, return MayAlias.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ModRefInfo">The <tt>getModRefInfo</tt> methods</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>getModRefInfo</tt> methods return information about whether the
+execution of an instruction can read or modify a memory location. Mod/Ref
+information is always conservative: if an instruction <b>might</b> read or write
+a location, ModRef is returned.</p>
+
+<p>The <tt>AliasAnalysis</tt> class also provides a <tt>getModRefInfo</tt>
+method for testing dependencies between function calls. This method takes two
+call sites (CS1 &amp; CS2), returns NoModRef if the two calls refer to disjoint
+memory locations, Ref if CS1 reads memory written by CS2, Mod if CS1 writes to
+memory read or written by CS2, or ModRef if CS1 might read or write memory
+accessed by CS2. Note that this relation is not commutative. Clients that use
+this method should be predicated on the <tt>hasNoModRefInfoForCalls()</tt>
+method, which indicates whether or not an analysis can provide mod/ref
+information for function call pairs (most can not). If this predicate is false,
+the client shouldn't waste analysis time querying the <tt>getModRefInfo</tt>
+method many times.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="OtherItfs">Other useful <tt>AliasAnalysis</tt> methods</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+Several other tidbits of information are often collected by various alias
+analysis implementations and can be put to good use by various clients.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ The <tt>getMustAliases</tt> method
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>getMustAliases</tt> method returns all values that are known to
+always must alias a pointer. This information can be provided in some cases for
+important objects like the null pointer and global values. Knowing that a
+pointer always points to a particular function allows indirect calls to be
+turned into direct calls, for example.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ The <tt>pointsToConstantMemory</tt> method
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>pointsToConstantMemory</tt> method returns true if and only if the
+analysis can prove that the pointer only points to unchanging memory locations
+(functions, constant global variables, and the null pointer). This information
+can be used to refine mod/ref information: it is impossible for an unchanging
+memory location to be modified.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="simplemodref">The <tt>doesNotAccessMemory</tt> and
+ <tt>onlyReadsMemory</tt> methods</a>
+</div>
+
+<div class="doc_text">
+
+<p>These methods are used to provide very simple mod/ref information for
+function calls. The <tt>doesNotAccessMemory</tt> method returns true for a
+function if the analysis can prove that the function never reads or writes to
+memory, or if the function only reads from constant memory. Functions with this
+property are side-effect free and only depend on their input arguments, allowing
+them to be eliminated if they form common subexpressions or be hoisted out of
+loops. Many common functions behave this way (e.g., <tt>sin</tt> and
+<tt>cos</tt>) but many others do not (e.g., <tt>acos</tt>, which modifies the
+<tt>errno</tt> variable).</p>
+
+<p>The <tt>onlyReadsMemory</tt> method returns true for a function if analysis
+can prove that (at most) the function only reads from non-volatile memory.
+Functions with this property are side-effect free, only depending on their input
+arguments and the state of memory when they are called. This property allows
+calls to these functions to be eliminated and moved around, as long as there is
+no store instruction that changes the contents of memory. Note that all
+functions that satisfy the <tt>doesNotAccessMemory</tt> method also satisfies
+<tt>onlyReadsMemory</tt>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="writingnew">Writing a new <tt>AliasAnalysis</tt> Implementation</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Writing a new alias analysis implementation for LLVM is quite
+straight-forward. There are already several implementations that you can use
+for examples, and the following information should help fill in any details.
+For a examples, take a look at the <a href="#impls">various alias analysis
+implementations</a> included with LLVM.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="passsubclasses">Different Pass styles</a>
+</div>
+
+<div class="doc_text">
+
+<p>The first step to determining what type of <a
+href="WritingAnLLVMPass.html">LLVM pass</a> you need to use for your Alias
+Analysis. As is the case with most other analyses and transformations, the
+answer should be fairly obvious from what type of problem you are trying to
+solve:</p>
+
+<ol>
+ <li>If you require interprocedural analysis, it should be a
+ <tt>Pass</tt>.</li>
+ <li>If you are a function-local analysis, subclass <tt>FunctionPass</tt>.</li>
+ <li>If you don't need to look at the program at all, subclass
+ <tt>ImmutablePass</tt>.</li>
+</ol>
+
+<p>In addition to the pass that you subclass, you should also inherit from the
+<tt>AliasAnalysis</tt> interface, of course, and use the
+<tt>RegisterAnalysisGroup</tt> template to register as an implementation of
+<tt>AliasAnalysis</tt>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="requiredcalls">Required initialization calls</a>
+</div>
+
+<div class="doc_text">
+
+<p>Your subclass of <tt>AliasAnalysis</tt> is required to invoke two methods on
+the <tt>AliasAnalysis</tt> base class: <tt>getAnalysisUsage</tt> and
+<tt>InitializeAliasAnalysis</tt>. In particular, your implementation of
+<tt>getAnalysisUsage</tt> should explicitly call into the
+<tt>AliasAnalysis::getAnalysisUsage</tt> method in addition to doing any
+declaring any pass dependencies your pass has. Thus you should have something
+like this:</p>
+
+<div class="doc_code">
+<pre>
+void getAnalysisUsage(AnalysisUsage &amp;AU) const {
+ AliasAnalysis::getAnalysisUsage(AU);
+ <i>// declare your dependencies here.</i>
+}
+</pre>
+</div>
+
+<p>Additionally, your must invoke the <tt>InitializeAliasAnalysis</tt> method
+from your analysis run method (<tt>run</tt> for a <tt>Pass</tt>,
+<tt>runOnFunction</tt> for a <tt>FunctionPass</tt>, or <tt>InitializePass</tt>
+for an <tt>ImmutablePass</tt>). For example (as part of a <tt>Pass</tt>):</p>
+
+<div class="doc_code">
+<pre>
+bool run(Module &amp;M) {
+ InitializeAliasAnalysis(this);
+ <i>// Perform analysis here...</i>
+ return false;
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="interfaces">Interfaces which may be specified</a>
+</div>
+
+<div class="doc_text">
+
+<p>All of the <a
+href="/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a>
+virtual methods default to providing <a href="#chaining">chaining</a> to another
+alias analysis implementation, which ends up returning conservatively correct
+information (returning "May" Alias and "Mod/Ref" for alias and mod/ref queries
+respectively). Depending on the capabilities of the analysis you are
+implementing, you just override the interfaces you can improve.</p>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="chaining"><tt>AliasAnalysis</tt> chaining behavior</a>
+</div>
+
+<div class="doc_text">
+
+<p>With only two special exceptions (the <tt><a
+href="#basic-aa">basicaa</a></tt> and <a href="#no-aa"><tt>no-aa</tt></a>
+passes) every alias analysis pass chains to another alias analysis
+implementation (for example, the user can specify "<tt>-basicaa -ds-aa
+-anders-aa -licm</tt>" to get the maximum benefit from the three alias
+analyses). The alias analysis class automatically takes care of most of this
+for methods that you don't override. For methods that you do override, in code
+paths that return a conservative MayAlias or Mod/Ref result, simply return
+whatever the superclass computes. For example:</p>
+
+<div class="doc_code">
+<pre>
+AliasAnalysis::AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size) {
+ if (...)
+ return NoAlias;
+ ...
+
+ <i>// Couldn't determine a must or no-alias result.</i>
+ return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
+}
+</pre>
+</div>
+
+<p>In addition to analysis queries, you must make sure to unconditionally pass
+LLVM <a href="#updating">update notification</a> methods to the superclass as
+well if you override them, which allows all alias analyses in a change to be
+updated.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="updating">Updating analysis results for transformations</a>
+</div>
+
+<div class="doc_text">
+<p>
+Alias analysis information is initially computed for a static snapshot of the
+program, but clients will use this information to make transformations to the
+code. All but the most trivial forms of alias analysis will need to have their
+analysis results updated to reflect the changes made by these transformations.
+</p>
+
+<p>
+The <tt>AliasAnalysis</tt> interface exposes two methods which are used to
+communicate program changes from the clients to the analysis implementations.
+Various alias analysis implementations should use these methods to ensure that
+their internal data structures are kept up-to-date as the program changes (for
+example, when an instruction is deleted), and clients of alias analysis must be
+sure to call these interfaces appropriately.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">The <tt>deleteValue</tt> method</div>
+
+<div class="doc_text">
+The <tt>deleteValue</tt> method is called by transformations when they remove an
+instruction or any other value from the program (including values that do not
+use pointers). Typically alias analyses keep data structures that have entries
+for each value in the program. When this method is called, they should remove
+any entries for the specified value, if they exist.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">The <tt>copyValue</tt> method</div>
+
+<div class="doc_text">
+The <tt>copyValue</tt> method is used when a new value is introduced into the
+program. There is no way to introduce a value into the program that did not
+exist before (this doesn't make sense for a safe compiler transformation), so
+this is the only way to introduce a new value. This method indicates that the
+new value has exactly the same properties as the value being copied.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">The <tt>replaceWithNewValue</tt> method</div>
+
+<div class="doc_text">
+This method is a simple helper method that is provided to make clients easier to
+use. It is implemented by copying the old analysis information to the new
+value, then deleting the old value. This method cannot be overridden by alias
+analysis implementations.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="implefficiency">Efficiency Issues</a>
+</div>
+
+<div class="doc_text">
+
+<p>From the LLVM perspective, the only thing you need to do to provide an
+efficient alias analysis is to make sure that alias analysis <b>queries</b> are
+serviced quickly. The actual calculation of the alias analysis results (the
+"run" method) is only performed once, but many (perhaps duplicate) queries may
+be performed. Because of this, try to move as much computation to the run
+method as possible (within reason).</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="using">Using alias analysis results</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>There are several different ways to use alias analysis results. In order of
+preference, these are...</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="loadvn">Using the <tt>-load-vn</tt> Pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>load-vn</tt> pass uses alias analysis to provide value numbering
+information for <tt>load</tt> instructions and pointer values. If your analysis
+or transformation can be modeled in a form that uses value numbering
+information, you don't have to do anything special to handle load instructions:
+just use the <tt>load-vn</tt> pass, which uses alias analysis.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ast">Using the <tt>AliasSetTracker</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>Many transformations need information about alias <b>sets</b> that are active
+in some scope, rather than information about pairwise aliasing. The <tt><a
+href="/doxygen/classllvm_1_1AliasSetTracker.html">AliasSetTracker</a></tt> class
+is used to efficiently build these Alias Sets from the pairwise alias analysis
+information provided by the <tt>AliasAnalysis</tt> interface.</p>
+
+<p>First you initialize the AliasSetTracker by using the "<tt>add</tt>" methods
+to add information about various potentially aliasing instructions in the scope
+you are interested in. Once all of the alias sets are completed, your pass
+should simply iterate through the constructed alias sets, using the
+<tt>AliasSetTracker</tt> <tt>begin()</tt>/<tt>end()</tt> methods.</p>
+
+<p>The <tt>AliasSet</tt>s formed by the <tt>AliasSetTracker</tt> are guaranteed
+to be disjoint, calculate mod/ref information and volatility for the set, and
+keep track of whether or not all of the pointers in the set are Must aliases.
+The AliasSetTracker also makes sure that sets are properly folded due to call
+instructions, and can provide a list of pointers in each set.</p>
+
+<p>As an example user of this, the <a href="/doxygen/structLICM.html">Loop
+Invariant Code Motion</a> pass uses <tt>AliasSetTracker</tt>s to calculate alias
+sets for each loop nest. If an <tt>AliasSet</tt> in a loop is not modified,
+then all load instructions from that set may be hoisted out of the loop. If any
+alias sets are stored to <b>and</b> are must alias sets, then the stores may be
+sunk to outside of the loop, promoting the memory location to a register for the
+duration of the loop nest. Both of these transformations only apply if the
+pointer argument is loop-invariant.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ The AliasSetTracker implementation
+</div>
+
+<div class="doc_text">
+
+<p>The AliasSetTracker class is implemented to be as efficient as possible. It
+uses the union-find algorithm to efficiently merge AliasSets when a pointer is
+inserted into the AliasSetTracker that aliases multiple sets. The primary data
+structure is a hash table mapping pointers to the AliasSet they are in.</p>
+
+<p>The AliasSetTracker class must maintain a list of all of the LLVM Value*'s
+that are in each AliasSet. Since the hash table already has entries for each
+LLVM Value* of interest, the AliasesSets thread the linked list through these
+hash-table nodes to avoid having to allocate memory unnecessarily, and to make
+merging alias sets extremely efficient (the linked list merge is constant time).
+</p>
+
+<p>You shouldn't need to understand these details if you are just a client of
+the AliasSetTracker, but if you look at the code, hopefully this brief
+description will help make sense of why things are designed the way they
+are.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="direct">Using the <tt>AliasAnalysis</tt> interface directly</a>
+</div>
+
+<div class="doc_text">
+
+<p>If neither of these utility class are what your pass needs, you should use
+the interfaces exposed by the <tt>AliasAnalysis</tt> class directly. Try to use
+the higher-level methods when possible (e.g., use mod/ref information instead of
+the <a href="#alias"><tt>alias</tt></a> method directly if possible) to get the
+best precision and efficiency.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="exist">Existing alias analysis implementations and clients</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you're going to be working with the LLVM alias analysis infrastructure,
+you should know what clients and implementations of alias analysis are
+available. In particular, if you are implementing an alias analysis, you should
+be aware of the <a href="#aliasanalysis-debug">the clients</a> that are useful
+for monitoring and evaluating different implementations.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="impls">Available <tt>AliasAnalysis</tt> implementations</a>
+</div>
+
+<div class="doc_text">
+
+<p>This section lists the various implementations of the <tt>AliasAnalysis</tt>
+interface. With the exception of the <a href="#no-aa"><tt>-no-aa</tt></a> and
+<a href="#basic-aa"><tt>-basicaa</tt></a> implementations, all of these <a
+href="#chaining">chain</a> to other alias analysis implementations.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="no-aa">The <tt>-no-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-no-aa</tt> pass is just like what it sounds: an alias analysis that
+never returns any useful information. This pass can be useful if you think that
+alias analysis is doing something wrong and are trying to narrow down a
+problem.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="basic-aa">The <tt>-basicaa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-basicaa</tt> pass is the default LLVM alias analysis. It is an
+aggressive local analysis that "knows" many important facts:</p>
+
+<ul>
+<li>Distinct globals, stack allocations, and heap allocations can never
+ alias.</li>
+<li>Globals, stack allocations, and heap allocations never alias the null
+ pointer.</li>
+<li>Different fields of a structure do not alias.</li>
+<li>Indexes into arrays with statically differing subscripts cannot alias.</li>
+<li>Many common standard C library functions <a
+ href="#simplemodref">never access memory or only read memory</a>.</li>
+<li>Pointers that obviously point to constant globals
+ "<tt>pointToConstantMemory</tt>".</li>
+<li>Function calls can not modify or references stack allocations if they never
+ escape from the function that allocates them (a common case for automatic
+ arrays).</li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="globalsmodref">The <tt>-globalsmodref-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>This pass implements a simple context-sensitive mod/ref and alias analysis
+for internal global variables that don't "have their address taken". If a
+global does not have its address taken, the pass knows that no pointers alias
+the global. This pass also keeps track of functions that it knows never access
+memory or never read memory. This allows certain optimizations (e.g. GCSE) to
+eliminate call instructions entirely.
+</p>
+
+<p>The real power of this pass is that it provides context-sensitive mod/ref
+information for call instructions. This allows the optimizer to know that
+calls to a function do not clobber or read the value of the global, allowing
+loads and stores to be eliminated.</p>
+
+<p>Note that this pass is somewhat limited in its scope (only support
+non-address taken globals), but is very quick analysis.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="anders-aa">The <tt>-anders-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-anders-aa</tt> pass implements the well-known "Andersen's algorithm"
+for interprocedural alias analysis. This algorithm is a subset-based,
+flow-insensitive, context-insensitive, and field-insensitive alias analysis that
+is widely believed to be fairly precise. Unfortunately, this algorithm is also
+O(N<sup>3</sup>). The LLVM implementation currently does not implement any of
+the refinements (such as "online cycle elimination" or "offline variable
+substitution") to improve its efficiency, so it can be quite slow in common
+cases.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="steens-aa">The <tt>-steens-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-steens-aa</tt> pass implements a variation on the well-known
+"Steensgaard's algorithm" for interprocedural alias analysis. Steensgaard's
+algorithm is a unification-based, flow-insensitive, context-insensitive, and
+field-insensitive alias analysis that is also very scalable (effectively linear
+time).</p>
+
+<p>The LLVM <tt>-steens-aa</tt> pass implements a "speculatively
+field-<b>sensitive</b>" version of Steensgaard's algorithm using the Data
+Structure Analysis framework. This gives it substantially more precision than
+the standard algorithm while maintaining excellent analysis scalability.</p>
+
+<p>Note that <tt>-steens-aa</tt> is available in the optional "poolalloc"
+module, it is not part of the LLVM core.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ds-aa">The <tt>-ds-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-ds-aa</tt> pass implements the full Data Structure Analysis
+algorithm. Data Structure Analysis is a modular unification-based,
+flow-insensitive, context-<b>sensitive</b>, and speculatively
+field-<b>sensitive</b> alias analysis that is also quite scalable, usually at
+O(n*log(n)).</p>
+
+<p>This algorithm is capable of responding to a full variety of alias analysis
+queries, and can provide context-sensitive mod/ref information as well. The
+only major facility not implemented so far is support for must-alias
+information.</p>
+
+<p>Note that <tt>-ds-aa</tt> is available in the optional "poolalloc"
+module, it is not part of the LLVM core.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="aliasanalysis-xforms">Alias analysis driven transformations</a>
+</div>
+
+<div class="doc_text">
+LLVM includes several alias-analysis driven transformations which can be used
+with any of the implementations above.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="adce">The <tt>-adce</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-adce</tt> pass, which implements Aggressive Dead Code Elimination
+uses the <tt>AliasAnalysis</tt> interface to delete calls to functions that do
+not have side-effects and are not used.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="licm">The <tt>-licm</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-licm</tt> pass implements various Loop Invariant Code Motion related
+transformations. It uses the <tt>AliasAnalysis</tt> interface for several
+different transformations:</p>
+
+<ul>
+<li>It uses mod/ref information to hoist or sink load instructions out of loops
+if there are no instructions in the loop that modifies the memory loaded.</li>
+
+<li>It uses mod/ref information to hoist function calls out of loops that do not
+write to memory and are loop-invariant.</li>
+
+<li>If uses alias information to promote memory objects that are loaded and
+stored to in loops to live in a register instead. It can do this if there are
+no may aliases to the loaded/stored memory location.</li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="argpromotion">The <tt>-argpromotion</tt> pass</a>
+</div>
+
+<div class="doc_text">
+<p>
+The <tt>-argpromotion</tt> pass promotes by-reference arguments to be passed in
+by-value instead. In particular, if pointer arguments are only loaded from it
+passes in the value loaded instead of the address to the function. This pass
+uses alias information to make sure that the value loaded from the argument
+pointer is not modified between the entry of the function and any load of the
+pointer.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="gcseloadvn">The <tt>-load-vn</tt> &amp; <tt>-gcse</tt> passes</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-load-vn</tt> pass uses alias analysis to "<a href="#loadvn">value
+number</a>" loads and pointers values, which is used by the GCSE pass to
+eliminate instructions. The <tt>-load-vn</tt> pass relies on alias information
+and must-alias information. This combination of passes can make the following
+transformations:</p>
+
+<ul>
+<li>Redundant load instructions are eliminated.</li>
+<li>Load instructions that follow a store to the same location are replaced with
+the stored value ("store forwarding").</li>
+<li>Pointers values (e.g. formal arguments) that must-alias simpler expressions
+(e.g. global variables or the null pointer) are replaced. Note that this
+implements transformations like "virtual method resolution", turning indirect
+calls into direct calls.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="aliasanalysis-debug">Clients for debugging and evaluation of
+ implementations</a>
+</div>
+
+<div class="doc_text">
+
+<p>These passes are useful for evaluating the various alias analysis
+implementations. You can use them with commands like '<tt>opt -anders-aa -ds-aa
+-aa-eval foo.bc -disable-output -stats</tt>'.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="print-alias-sets">The <tt>-print-alias-sets</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-print-alias-sets</tt> pass is exposed as part of the
+<tt>opt</tt> tool to print out the Alias Sets formed by the <a
+href="#ast"><tt>AliasSetTracker</tt></a> class. This is useful if you're using
+the <tt>AliasSetTracker</tt> class. To use it, use something like:</p>
+
+<div class="doc_code">
+<pre>
+% opt -ds-aa -print-alias-sets -disable-output
+</pre>
+</div>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="count-aa">The <tt>-count-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-count-aa</tt> pass is useful to see how many queries a particular
+pass is making and what responses are returned by the alias analysis. As an
+example,</p>
+
+<div class="doc_code">
+<pre>
+% opt -basicaa -count-aa -ds-aa -count-aa -licm
+</pre>
+</div>
+
+<p>will print out how many queries (and what responses are returned) by the
+<tt>-licm</tt> pass (of the <tt>-ds-aa</tt> pass) and how many queries are made
+of the <tt>-basicaa</tt> pass by the <tt>-ds-aa</tt> pass. This can be useful
+when debugging a transformation or an alias analysis implementation.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="aa-eval">The <tt>-aa-eval</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-aa-eval</tt> pass simply iterates through all pairs of pointers in a
+function and asks an alias analysis whether or not the pointers alias. This
+gives an indication of the precision of the alias analysis. Statistics are
+printed indicating the percent of no/may/must aliases found (a more precise
+algorithm will have a lower number of may aliases).</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html
new file mode 100644
index 0000000..7194c7a
--- /dev/null
+++ b/docs/BitCodeFormat.html
@@ -0,0 +1,614 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <title>LLVM Bitcode File Format</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+<div class="doc_title"> LLVM Bitcode File Format </div>
+<ol>
+ <li><a href="#abstract">Abstract</a></li>
+ <li><a href="#overview">Overview</a></li>
+ <li><a href="#bitstream">Bitstream Format</a>
+ <ol>
+ <li><a href="#magic">Magic Numbers</a></li>
+ <li><a href="#primitives">Primitives</a></li>
+ <li><a href="#abbrevid">Abbreviation IDs</a></li>
+ <li><a href="#blocks">Blocks</a></li>
+ <li><a href="#datarecord">Data Records</a></li>
+ <li><a href="#abbreviations">Abbreviations</a></li>
+ <li><a href="#stdblocks">Standard Blocks</a></li>
+ </ol>
+ </li>
+ <li><a href="#llvmir">LLVM IR Encoding</a>
+ <ol>
+ <li><a href="#basics">Basics</a></li>
+ </ol>
+ </li>
+</ol>
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>.
+</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="abstract">Abstract</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document describes the LLVM bitstream file format and the encoding of
+the LLVM IR into it.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="overview">Overview</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+What is commonly known as the LLVM bitcode file format (also, sometimes
+anachronistically known as bytecode) is actually two things: a <a
+href="#bitstream">bitstream container format</a>
+and an <a href="#llvmir">encoding of LLVM IR</a> into the container format.</p>
+
+<p>
+The bitstream format is an abstract encoding of structured data, very
+similar to XML in some ways. Like XML, bitstream files contain tags, and nested
+structures, and you can parse the file without having to understand the tags.
+Unlike XML, the bitstream format is a binary encoding, and unlike XML it
+provides a mechanism for the file to self-describe "abbreviations", which are
+effectively size optimizations for the content.</p>
+
+<p>This document first describes the LLVM bitstream format, then describes the
+record structure used by LLVM IR files.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="bitstream">Bitstream Format</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The bitstream format is literally a stream of bits, with a very simple
+structure. This structure consists of the following concepts:
+</p>
+
+<ul>
+<li>A "<a href="#magic">magic number</a>" that identifies the contents of
+ the stream.</li>
+<li>Encoding <a href="#primitives">primitives</a> like variable bit-rate
+ integers.</li>
+<li><a href="#blocks">Blocks</a>, which define nested content.</li>
+<li><a href="#datarecord">Data Records</a>, which describe entities within the
+ file.</li>
+<li>Abbreviations, which specify compression optimizations for the file.</li>
+</ul>
+
+<p>Note that the <a
+href="CommandGuide/html/llvm-bcanalyzer.html">llvm-bcanalyzer</a> tool can be
+used to dump and inspect arbitrary bitstreams, which is very useful for
+understanding the encoding.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="magic">Magic Numbers</a>
+</div>
+
+<div class="doc_text">
+
+<p>The first four bytes of the stream identify the encoding of the file. This
+is used by a reader to know what is contained in the file.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="primitives">Primitives</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+A bitstream literally consists of a stream of bits. This stream is made up of a
+number of primitive values that encode a stream of unsigned integer values.
+These
+integers are are encoded in two ways: either as <a href="#fixedwidth">Fixed
+Width Integers</a> or as <a href="#variablewidth">Variable Width
+Integers</a>.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="fixedwidth">Fixed Width Integers</a>
+</div>
+
+<div class="doc_text">
+
+<p>Fixed-width integer values have their low bits emitted directly to the file.
+ For example, a 3-bit integer value encodes 1 as 001. Fixed width integers
+ are used when there are a well-known number of options for a field. For
+ example, boolean values are usually encoded with a 1-bit wide integer.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="variablewidth">Variable Width
+Integers</a></div>
+
+<div class="doc_text">
+
+<p>Variable-width integer (VBR) values encode values of arbitrary size,
+optimizing for the case where the values are small. Given a 4-bit VBR field,
+any 3-bit value (0 through 7) is encoded directly, with the high bit set to
+zero. Values larger than N-1 bits emit their bits in a series of N-1 bit
+chunks, where all but the last set the high bit.</p>
+
+<p>For example, the value 27 (0x1B) is encoded as 1011 0011 when emitted as a
+vbr4 value. The first set of four bits indicates the value 3 (011) with a
+continuation piece (indicated by a high bit of 1). The next word indicates a
+value of 24 (011 << 3) with no continuation. The sum (3+24) yields the value
+27.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="char6">6-bit characters</a></div>
+
+<div class="doc_text">
+
+<p>6-bit characters encode common characters into a fixed 6-bit field. They
+represent the following characters with the following 6-bit values:</p>
+
+<ul>
+<li>'a' .. 'z' - 0 .. 25</li>
+<li>'A' .. 'Z' - 26 .. 52</li>
+<li>'0' .. '9' - 53 .. 61</li>
+<li>'.' - 62</li>
+<li>'_' - 63</li>
+</ul>
+
+<p>This encoding is only suitable for encoding characters and strings that
+consist only of the above characters. It is completely incapable of encoding
+characters not in the set.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="wordalign">Word Alignment</a></div>
+
+<div class="doc_text">
+
+<p>Occasionally, it is useful to emit zero bits until the bitstream is a
+multiple of 32 bits. This ensures that the bit position in the stream can be
+represented as a multiple of 32-bit words.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="abbrevid">Abbreviation IDs</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+A bitstream is a sequential series of <a href="#blocks">Blocks</a> and
+<a href="#datarecord">Data Records</a>. Both of these start with an
+abbreviation ID encoded as a fixed-bitwidth field. The width is specified by
+the current block, as described below. The value of the abbreviation ID
+specifies either a builtin ID (which have special meanings, defined below) or
+one of the abbreviation IDs defined by the stream itself.
+</p>
+
+<p>
+The set of builtin abbrev IDs is:
+</p>
+
+<ul>
+<li>0 - <a href="#END_BLOCK">END_BLOCK</a> - This abbrev ID marks the end of the
+ current block.</li>
+<li>1 - <a href="#ENTER_SUBBLOCK">ENTER_SUBBLOCK</a> - This abbrev ID marks the
+ beginning of a new block.</li>
+<li>2 - <a href="#DEFINE_ABBREV">DEFINE_ABBREV</a> - This defines a new
+ abbreviation.</li>
+<li>3 - <a href="#UNABBREV_RECORD">UNABBREV_RECORD</a> - This ID specifies the
+ definition of an unabbreviated record.</li>
+</ul>
+
+<p>Abbreviation IDs 4 and above are defined by the stream itself, and specify
+an <a href="#abbrev_records">abbreviated record encoding</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="blocks">Blocks</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+Blocks in a bitstream denote nested regions of the stream, and are identified by
+a content-specific id number (for example, LLVM IR uses an ID of 12 to represent
+function bodies). Nested blocks capture the hierachical structure of the data
+encoded in it, and various properties are associated with blocks as the file is
+parsed. Block definitions allow the reader to efficiently skip blocks
+in constant time if the reader wants a summary of blocks, or if it wants to
+efficiently skip data they do not understand. The LLVM IR reader uses this
+mechanism to skip function bodies, lazily reading them on demand.
+</p>
+
+<p>
+When reading and encoding the stream, several properties are maintained for the
+block. In particular, each block maintains:
+</p>
+
+<ol>
+<li>A current abbrev id width. This value starts at 2, and is set every time a
+ block record is entered. The block entry specifies the abbrev id width for
+ the body of the block.</li>
+
+<li>A set of abbreviations. Abbreviations may be defined within a block, or
+ they may be associated with all blocks of a particular ID.
+</li>
+</ol>
+
+<p>As sub blocks are entered, these properties are saved and the new sub-block
+has its own set of abbreviations, and its own abbrev id width. When a sub-block
+is popped, the saved values are restored.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="ENTER_SUBBLOCK">ENTER_SUBBLOCK
+Encoding</a></div>
+
+<div class="doc_text">
+
+<p><tt>[ENTER_SUBBLOCK, blockid<sub>vbr8</sub>, newabbrevlen<sub>vbr4</sub>,
+ &lt;align32bits&gt;, blocklen<sub>32</sub>]</tt></p>
+
+<p>
+The ENTER_SUBBLOCK abbreviation ID specifies the start of a new block record.
+The <tt>blockid</tt> value is encoded as a 8-bit VBR identifier, and indicates
+the type of block being entered (which is application specific). The
+<tt>newabbrevlen</tt> value is a 4-bit VBR which specifies the
+abbrev id width for the sub-block. The <tt>blocklen</tt> is a 32-bit aligned
+value that specifies the size of the subblock, in 32-bit words. This value
+allows the reader to skip over the entire block in one jump.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="END_BLOCK">END_BLOCK
+Encoding</a></div>
+
+<div class="doc_text">
+
+<p><tt>[END_BLOCK, &lt;align32bits&gt;]</tt></p>
+
+<p>
+The END_BLOCK abbreviation ID specifies the end of the current block record.
+Its end is aligned to 32-bits to ensure that the size of the block is an even
+multiple of 32-bits.</p>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="datarecord">Data Records</a>
+</div>
+
+<div class="doc_text">
+<p>
+Data records consist of a record code and a number of (up to) 64-bit integer
+values. The interpretation of the code and values is application specific and
+there are multiple different ways to encode a record (with an unabbrev record
+or with an abbreviation). In the LLVM IR format, for example, there is a record
+which encodes the target triple of a module. The code is MODULE_CODE_TRIPLE,
+and the values of the record are the ascii codes for the characters in the
+string.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="UNABBREV_RECORD">UNABBREV_RECORD
+Encoding</a></div>
+
+<div class="doc_text">
+
+<p><tt>[UNABBREV_RECORD, code<sub>vbr6</sub>, numops<sub>vbr6</sub>,
+ op0<sub>vbr6</sub>, op1<sub>vbr6</sub>, ...]</tt></p>
+
+<p>An UNABBREV_RECORD provides a default fallback encoding, which is both
+completely general and also extremely inefficient. It can describe an arbitrary
+record, by emitting the code and operands as vbrs.</p>
+
+<p>For example, emitting an LLVM IR target triple as an unabbreviated record
+requires emitting the UNABBREV_RECORD abbrevid, a vbr6 for the
+MODULE_CODE_TRIPLE code, a vbr6 for the length of the string (which is equal to
+the number of operands), and a vbr6 for each character. Since there are no
+letters with value less than 32, each letter would need to be emitted as at
+least a two-part VBR, which means that each letter would require at least 12
+bits. This is not an efficient encoding, but it is fully general.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="abbrev_records">Abbreviated Record
+Encoding</a></div>
+
+<div class="doc_text">
+
+<p><tt>[&lt;abbrevid&gt;, fields...]</tt></p>
+
+<p>An abbreviated record is a abbreviation id followed by a set of fields that
+are encoded according to the <a href="#abbreviations">abbreviation
+definition</a>. This allows records to be encoded significantly more densely
+than records encoded with the <a href="#UNABBREV_RECORD">UNABBREV_RECORD</a>
+type, and allows the abbreviation types to be specified in the stream itself,
+which allows the files to be completely self describing. The actual encoding
+of abbreviations is defined below.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="abbreviations">Abbreviations</a>
+</div>
+
+<div class="doc_text">
+<p>
+Abbreviations are an important form of compression for bitstreams. The idea is
+to specify a dense encoding for a class of records once, then use that encoding
+to emit many records. It takes space to emit the encoding into the file, but
+the space is recouped (hopefully plus some) when the records that use it are
+emitted.
+</p>
+
+<p>
+Abbreviations can be determined dynamically per client, per file. Since the
+abbreviations are stored in the bitstream itself, different streams of the same
+format can contain different sets of abbreviations if the specific stream does
+not need it. As a concrete example, LLVM IR files usually emit an abbreviation
+for binary operators. If a specific LLVM module contained no or few binary
+operators, the abbreviation does not need to be emitted.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="DEFINE_ABBREV">DEFINE_ABBREV
+ Encoding</a></div>
+
+<div class="doc_text">
+
+<p><tt>[DEFINE_ABBREV, numabbrevops<sub>vbr5</sub>, abbrevop0, abbrevop1,
+ ...]</tt></p>
+
+<p>An abbreviation definition consists of the DEFINE_ABBREV abbrevid followed
+by a VBR that specifies the number of abbrev operands, then the abbrev
+operands themselves. Abbreviation operands come in three forms. They all start
+with a single bit that indicates whether the abbrev operand is a literal operand
+(when the bit is 1) or an encoding operand (when the bit is 0).</p>
+
+<ol>
+<li>Literal operands - <tt>[1<sub>1</sub>, litvalue<sub>vbr8</sub>]</tt> -
+Literal operands specify that the value in the result
+is always a single specific value. This specific value is emitted as a vbr8
+after the bit indicating that it is a literal operand.</li>
+<li>Encoding info without data - <tt>[0<sub>1</sub>, encoding<sub>3</sub>]</tt>
+ - Operand encodings that do not have extra data are just emitted as their code.
+</li>
+<li>Encoding info with data - <tt>[0<sub>1</sub>, encoding<sub>3</sub>,
+value<sub>vbr5</sub>]</tt> - Operand encodings that do have extra data are
+emitted as their code, followed by the extra data.
+</li>
+</ol>
+
+<p>The possible operand encodings are:</p>
+
+<ul>
+<li>1 - Fixed - The field should be emitted as a <a
+ href="#fixedwidth">fixed-width value</a>, whose width
+ is specified by the encoding operand.</li>
+<li>2 - VBR - The field should be emitted as a <a
+ href="#variablewidth">variable-width value</a>, whose width
+ is specified by the encoding operand.</li>
+<li>3 - Array - This field is an array of values. The element type of the array
+ is specified by the next encoding operand.</li>
+<li>4 - Char6 - This field should be emitted as a <a href="#char6">char6-encoded
+ value</a>.</li>
+</ul>
+
+<p>For example, target triples in LLVM modules are encoded as a record of the
+form <tt>[TRIPLE, 'a', 'b', 'c', 'd']</tt>. Consider if the bitstream emitted
+the following abbrev entry:</p>
+
+<ul>
+<li><tt>[0, Fixed, 4]</tt></li>
+<li><tt>[0, Array]</tt></li>
+<li><tt>[0, Char6]</tt></li>
+</ul>
+
+<p>When emitting a record with this abbreviation, the above entry would be
+emitted as:</p>
+
+<p><tt>[4<sub>abbrevwidth</sub>, 2<sub>4</sub>, 4<sub>vbr6</sub>,
+ 0<sub>6</sub>, 1<sub>6</sub>, 2<sub>6</sub>, 3<sub>6</sub>]</tt></p>
+
+<p>These values are:</p>
+
+<ol>
+<li>The first value, 4, is the abbreviation ID for this abbreviation.</li>
+<li>The second value, 2, is the code for TRIPLE in LLVM IR files.</li>
+<li>The third value, 4, is the length of the array.</li>
+<li>The rest of the values are the char6 encoded values for "abcd".</li>
+</ol>
+
+<p>With this abbreviation, the triple is emitted with only 37 bits (assuming a
+abbrev id width of 3). Without the abbreviation, significantly more space would
+be required to emit the target triple. Also, since the TRIPLE value is not
+emitted as a literal in the abbreviation, the abbreviation can also be used for
+any other string value.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="stdblocks">Standard Blocks</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+In addition to the basic block structure and record encodings, the bitstream
+also defines specific builtin block types. These block types specify how the
+stream is to be decoded or other metadata. In the future, new standard blocks
+may be added.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="BLOCKINFO">#0 - BLOCKINFO
+Block</a></div>
+
+<div class="doc_text">
+
+<p>The BLOCKINFO block allows the description of metadata for other blocks. The
+ currently specified records are:</p>
+
+<ul>
+<li><tt>[SETBID (#1), blockid]</tt></li>
+<li><tt>[DEFINE_ABBREV, ...]</tt></li>
+</ul>
+
+<p>
+The SETBID record indicates which block ID is being described. The standard
+DEFINE_ABBREV record specifies an abbreviation. The abbreviation is associated
+with the record ID, and any records with matching ID automatically get the
+abbreviation.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="llvmir">LLVM IR Encoding</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM IR is encoded into a bitstream by defining blocks and records. It uses
+blocks for things like constant pools, functions, symbol tables, etc. It uses
+records for things like instructions, global variable descriptors, type
+descriptions, etc. This document does not describe the set of abbreviations
+that the writer uses, as these are fully self-described in the file, and the
+reader is not allowed to build in any knowledge of this.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="basics">Basics</a>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="ir_magic">LLVM IR Magic Number</a></div>
+
+<div class="doc_text">
+
+<p>
+The magic number for LLVM IR files is:
+</p>
+
+<p><tt>['B'<sub>8</sub>, 'C'<sub>8</sub>, 0x0<sub>4</sub>, 0xC<sub>4</sub>,
+0xE<sub>4</sub>, 0xD<sub>4</sub>]</tt></p>
+
+<p>When viewed as bytes, this is "BC 0xC0DE".</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="ir_signed_vbr">Signed VBRs</a></div>
+
+<div class="doc_text">
+
+<p>
+<a href="#variablewidth">Variable Width Integers</a> are an efficient way to
+encode arbitrary sized unsigned values, but is an extremely inefficient way to
+encode signed values (as signed values are otherwise treated as maximally large
+unsigned values).</p>
+
+<p>As such, signed vbr values of a specific width are emitted as follows:</p>
+
+<ul>
+<li>Positive values are emitted as vbrs of the specified width, but with their
+ value shifted left by one.</li>
+<li>Negative values are emitted as vbrs of the specified width, but the negated
+ value is shifted left by one, and the low bit is set.</li>
+</ul>
+
+<p>With this encoding, small positive and small negative values can both be
+emitted efficiently.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="ir_blocks">LLVM IR Blocks</a></div>
+
+<div class="doc_text">
+
+<p>
+LLVM IR is defined with the following blocks:
+</p>
+
+<ul>
+<li>8 - MODULE_BLOCK - This is the top-level block that contains the
+ entire module, and describes a variety of per-module information.</li>
+<li>9 - PARAMATTR_BLOCK - This enumerates the parameter attributes.</li>
+<li>10 - TYPE_BLOCK - This describes all of the types in the module.</li>
+<li>11 - CONSTANTS_BLOCK - This describes constants for a module or
+ function.</li>
+<li>12 - FUNCTION_BLOCK - This describes a function body.</li>
+<li>13 - TYPE_SYMTAB_BLOCK - This describes the type symbol table.</li>
+<li>14 - VALUE_SYMTAB_BLOCK - This describes a value symbol table.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="MODULE_BLOCK">MODULE_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<hr>
+<address> <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+<a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/Bugpoint.html b/docs/Bugpoint.html
new file mode 100644
index 0000000..bdfd313
--- /dev/null
+++ b/docs/Bugpoint.html
@@ -0,0 +1,244 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM bugpoint tool: design and usage</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<div class="doc_title">
+ LLVM bugpoint tool: design and usage
+</div>
+
+<ul>
+ <li><a href="#desc">Description</a></li>
+ <li><a href="#design">Design Philosophy</a>
+ <ul>
+ <li><a href="#autoselect">Automatic Debugger Selection</a></li>
+ <li><a href="#crashdebug">Crash debugger</a></li>
+ <li><a href="#codegendebug">Code generator debugger</a></li>
+ <li><a href="#miscompilationdebug">Miscompilation debugger</a></li>
+ </ul></li>
+ <li><a href="#advice">Advice for using <tt>bugpoint</tt></a></li>
+</ul>
+
+<div class="doc_author">
+<p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="desc">Description</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><tt>bugpoint</tt> narrows down the source of problems in LLVM tools and
+passes. It can be used to debug three types of failures: optimizer crashes,
+miscompilations by optimizers, or bad native code generation (including problems
+in the static and JIT compilers). It aims to reduce large test cases to small,
+useful ones. For example, if <tt>opt</tt> crashes while optimizing a
+file, it will identify the optimization (or combination of optimizations) that
+causes the crash, and reduce the file down to a small example which triggers the
+crash.</p>
+
+<p>For detailed case scenarios, such as debugging <tt>opt</tt>,
+<tt>llvm-ld</tt>, or one of the LLVM code generators, see <a
+href="HowToSubmitABug.html">How To Submit a Bug Report document</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="design">Design Philosophy</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><tt>bugpoint</tt> is designed to be a useful tool without requiring any
+hooks into the LLVM infrastructure at all. It works with any and all LLVM
+passes and code generators, and does not need to "know" how they work. Because
+of this, it may appear to do stupid things or miss obvious
+simplifications. <tt>bugpoint</tt> is also designed to trade off programmer
+time for computer time in the compiler-debugging process; consequently, it may
+take a long period of (unattended) time to reduce a test case, but we feel it
+is still worth it. Note that <tt>bugpoint</tt> is generally very quick unless
+debugging a miscompilation where each test of the program (which requires
+executing it) takes a long time.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="autoselect">Automatic Debugger Selection</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>bugpoint</tt> reads each <tt>.bc</tt> or <tt>.ll</tt> file specified on
+the command line and links them together into a single module, called the test
+program. If any LLVM passes are specified on the command line, it runs these
+passes on the test program. If any of the passes crash, or if they produce
+malformed output (which causes the verifier to abort), <tt>bugpoint</tt> starts
+the <a href="#crashdebug">crash debugger</a>.</p>
+
+<p>Otherwise, if the <tt>-output</tt> option was not specified,
+<tt>bugpoint</tt> runs the test program with the C backend (which is assumed to
+generate good code) to generate a reference output. Once <tt>bugpoint</tt> has
+a reference output for the test program, it tries executing it with the
+selected code generator. If the selected code generator crashes,
+<tt>bugpoint</tt> starts the <a href="#crashdebug">crash debugger</a> on the
+code generator. Otherwise, if the resulting output differs from the reference
+output, it assumes the difference resulted from a code generator failure, and
+starts the <a href="#codegendebug">code generator debugger</a>.</p>
+
+<p>Finally, if the output of the selected code generator matches the reference
+output, <tt>bugpoint</tt> runs the test program after all of the LLVM passes
+have been applied to it. If its output differs from the reference output, it
+assumes the difference resulted from a failure in one of the LLVM passes, and
+enters the <a href="#miscompilationdebug">miscompilation debugger</a>.
+Otherwise, there is no problem <tt>bugpoint</tt> can debug.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="crashdebug">Crash debugger</a>
+</div>
+
+<div class="doc_text">
+
+<p>If an optimizer or code generator crashes, <tt>bugpoint</tt> will try as hard
+as it can to reduce the list of passes (for optimizer crashes) and the size of
+the test program. First, <tt>bugpoint</tt> figures out which combination of
+optimizer passes triggers the bug. This is useful when debugging a problem
+exposed by <tt>opt</tt>, for example, because it runs over 38 passes.</p>
+
+<p>Next, <tt>bugpoint</tt> tries removing functions from the test program, to
+reduce its size. Usually it is able to reduce a test program to a single
+function, when debugging intraprocedural optimizations. Once the number of
+functions has been reduced, it attempts to delete various edges in the control
+flow graph, to reduce the size of the function as much as possible. Finally,
+<tt>bugpoint</tt> deletes any individual LLVM instructions whose absence does
+not eliminate the failure. At the end, <tt>bugpoint</tt> should tell you what
+passes crash, give you a bitcode file, and give you instructions on how to
+reproduce the failure with <tt>opt</tt> or <tt>llc</tt>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="codegendebug">Code generator debugger</a>
+</div>
+
+<div class="doc_text">
+
+<p>The code generator debugger attempts to narrow down the amount of code that
+is being miscompiled by the selected code generator. To do this, it takes the
+test program and partitions it into two pieces: one piece which it compiles
+with the C backend (into a shared object), and one piece which it runs with
+either the JIT or the static LLC compiler. It uses several techniques to
+reduce the amount of code pushed through the LLVM code generator, to reduce the
+potential scope of the problem. After it is finished, it emits two bitcode
+files (called "test" [to be compiled with the code generator] and "safe" [to be
+compiled with the C backend], respectively), and instructions for reproducing
+the problem. The code generator debugger assumes that the C backend produces
+good code.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="miscompilationdebug">Miscompilation debugger</a>
+</div>
+
+<div class="doc_text">
+
+<p>The miscompilation debugger works similarly to the code generator debugger.
+It works by splitting the test program into two pieces, running the
+optimizations specified on one piece, linking the two pieces back together, and
+then executing the result. It attempts to narrow down the list of passes to
+the one (or few) which are causing the miscompilation, then reduce the portion
+of the test program which is being miscompiled. The miscompilation debugger
+assumes that the selected code generator is working properly.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="advice">Advice for using bugpoint</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<tt>bugpoint</tt> can be a remarkably useful tool, but it sometimes works in
+non-obvious ways. Here are some hints and tips:<p>
+
+<ol>
+<li>In the code generator and miscompilation debuggers, <tt>bugpoint</tt> only
+ works with programs that have deterministic output. Thus, if the program
+ outputs <tt>argv[0]</tt>, the date, time, or any other "random" data,
+ <tt>bugpoint</tt> may misinterpret differences in these data, when output,
+ as the result of a miscompilation. Programs should be temporarily modified
+ to disable outputs that are likely to vary from run to run.
+
+<li>In the code generator and miscompilation debuggers, debugging will go
+ faster if you manually modify the program or its inputs to reduce the
+ runtime, but still exhibit the problem.
+
+<li><tt>bugpoint</tt> is extremely useful when working on a new optimization:
+ it helps track down regressions quickly. To avoid having to relink
+ <tt>bugpoint</tt> every time you change your optimization however, have
+ <tt>bugpoint</tt> dynamically load your optimization with the
+ <tt>-load</tt> option.
+
+<li><p><tt>bugpoint</tt> can generate a lot of output and run for a long period
+ of time. It is often useful to capture the output of the program to file.
+ For example, in the C shell, you can run:</p>
+
+<div class="doc_code">
+<p><tt>bugpoint ... |&amp; tee bugpoint.log</tt></p>
+</div>
+
+ <p>to get a copy of <tt>bugpoint</tt>'s output in the file
+ <tt>bugpoint.log</tt>, as well as on your terminal.</p>
+
+<li><tt>bugpoint</tt> cannot debug problems with the LLVM linker. If
+ <tt>bugpoint</tt> crashes before you see its "All input ok" message,
+ you might try <tt>llvm-link -v</tt> on the same set of input files. If
+ that also crashes, you may be experiencing a linker bug.
+
+<li>If your program is <b>supposed</b> to crash, <tt>bugpoint</tt> will be
+ confused. One way to deal with this is to cause bugpoint to ignore the exit
+ code from your program, by giving it the <tt>-check-exit-code=false</tt>
+ option.
+
+<li><tt>bugpoint</tt> is useful for proactively finding bugs in LLVM.
+ Invoking <tt>bugpoint</tt> with the <tt>-find-bugs</tt> option will cause
+ the list of specified optimizations to be randomized and applied to the
+ program. This process will repeat until a bug is found or the user
+ kills <tt>bugpoint</tt>.
+
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/CFEBuildInstrs.html b/docs/CFEBuildInstrs.html
new file mode 100644
index 0000000..65c375c
--- /dev/null
+++ b/docs/CFEBuildInstrs.html
@@ -0,0 +1,97 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+ <link rel="stylesheet" href="llvm.css" type="text/css" media="screen">
+ <title>Building the LLVM C/C++ Front-End</title>
+</head>
+<body>
+
+<div class="doc_title">
+ Building the LLVM C/C++ Front-End
+</div>
+
+<ol>
+ <li><a href="#instructions">Building llvm-gcc 4 from Source</a></li>
+ <li><a href="#license">License Information</a></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by the LLVM Team</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="instructions">Building llvm-gcc 4 from Source</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section describes how to aquire and build llvm-gcc4, which is based on
+the GCC 4.0.1 front-end. This front-end supports C, C++, Objective-C, and
+Objective-C++. Note that the instructions for building this front-end are
+completely different (and much easier!) than those for building llvm-gcc3 in
+the past.</p>
+
+<ol>
+ <li><p>Retrieve the appropriate llvm-gcc4-x.y.source.tar.gz archive from the
+ <a href="http://llvm.org/releases/">llvm web site</a>.</p>
+
+ <p>It is also possible to download the sources of the llvm-gcc4 front end
+ from a read-only mirror using subversion. To check out the code the
+ first time use:</p>
+
+<div class="doc_code">
+<pre>
+svn co svn://anonsvn.opensource.apple.com/svn/llvm/trunk <i>dst-directory</i>
+</pre>
+</div>
+
+ <p>After that, the code can be be updated in the destination directory
+ using:</p>
+
+<div class="doc_code">
+<pre>svn update</pre>
+</div>
+
+ <p>The mirror is brought up to date every evening.</p></li>
+
+ <li>Follow the directions in the top-level <tt>README.LLVM</tt> file for
+ up-to-date instructions on how to build llvm-gcc4.</li>
+</ol>
+
+</div>
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="license">License Information</a>
+</div>
+
+<div class="doc_text">
+<p>
+The LLVM GCC frontend is licensed to you under the GNU General Public License
+and the GNU Lesser General Public License. Please see the files COPYING and
+COPYING.LIB for more details.
+</p>
+
+<p>
+More information is <a href="FAQ.html#license">available in the FAQ</a>.
+</p>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
new file mode 100644
index 0000000..bc82b46
--- /dev/null
+++ b/docs/CodeGenerator.html
@@ -0,0 +1,1962 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>The LLVM Target-Independent Code Generator</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ The LLVM Target-Independent Code Generator
+</div>
+
+<ol>
+ <li><a href="#introduction">Introduction</a>
+ <ul>
+ <li><a href="#required">Required components in the code generator</a></li>
+ <li><a href="#high-level-design">The high-level design of the code
+ generator</a></li>
+ <li><a href="#tablegen">Using TableGen for target description</a></li>
+ </ul>
+ </li>
+ <li><a href="#targetdesc">Target description classes</a>
+ <ul>
+ <li><a href="#targetmachine">The <tt>TargetMachine</tt> class</a></li>
+ <li><a href="#targetdata">The <tt>TargetData</tt> class</a></li>
+ <li><a href="#targetlowering">The <tt>TargetLowering</tt> class</a></li>
+ <li><a href="#mregisterinfo">The <tt>MRegisterInfo</tt> class</a></li>
+ <li><a href="#targetinstrinfo">The <tt>TargetInstrInfo</tt> class</a></li>
+ <li><a href="#targetframeinfo">The <tt>TargetFrameInfo</tt> class</a></li>
+ <li><a href="#targetsubtarget">The <tt>TargetSubtarget</tt> class</a></li>
+ <li><a href="#targetjitinfo">The <tt>TargetJITInfo</tt> class</a></li>
+ </ul>
+ </li>
+ <li><a href="#codegendesc">Machine code description classes</a>
+ <ul>
+ <li><a href="#machineinstr">The <tt>MachineInstr</tt> class</a></li>
+ <li><a href="#machinebasicblock">The <tt>MachineBasicBlock</tt>
+ class</a></li>
+ <li><a href="#machinefunction">The <tt>MachineFunction</tt> class</a></li>
+ </ul>
+ </li>
+ <li><a href="#codegenalgs">Target-independent code generation algorithms</a>
+ <ul>
+ <li><a href="#instselect">Instruction Selection</a>
+ <ul>
+ <li><a href="#selectiondag_intro">Introduction to SelectionDAGs</a></li>
+ <li><a href="#selectiondag_process">SelectionDAG Code Generation
+ Process</a></li>
+ <li><a href="#selectiondag_build">Initial SelectionDAG
+ Construction</a></li>
+ <li><a href="#selectiondag_legalize">SelectionDAG Legalize Phase</a></li>
+ <li><a href="#selectiondag_optimize">SelectionDAG Optimization
+ Phase: the DAG Combiner</a></li>
+ <li><a href="#selectiondag_select">SelectionDAG Select Phase</a></li>
+ <li><a href="#selectiondag_sched">SelectionDAG Scheduling and Formation
+ Phase</a></li>
+ <li><a href="#selectiondag_future">Future directions for the
+ SelectionDAG</a></li>
+ </ul></li>
+ <li><a href="#liveintervals">Live Intervals</a>
+ <ul>
+ <li><a href="#livevariable_analysis">Live Variable Analysis</a></li>
+ <li><a href="#liveintervals_analysis">Live Intervals Analysis</a></li>
+ </ul></li>
+ <li><a href="#regalloc">Register Allocation</a>
+ <ul>
+ <li><a href="#regAlloc_represent">How registers are represented in
+ LLVM</a></li>
+ <li><a href="#regAlloc_howTo">Mapping virtual registers to physical
+ registers</a></li>
+ <li><a href="#regAlloc_twoAddr">Handling two address instructions</a></li>
+ <li><a href="#regAlloc_ssaDecon">The SSA deconstruction phase</a></li>
+ <li><a href="#regAlloc_fold">Instruction folding</a></li>
+ <li><a href="#regAlloc_builtIn">Built in register allocators</a></li>
+ </ul></li>
+ <li><a href="#codeemit">Code Emission</a>
+ <ul>
+ <li><a href="#codeemit_asm">Generating Assembly Code</a></li>
+ <li><a href="#codeemit_bin">Generating Binary Machine Code</a></li>
+ </ul></li>
+ </ul>
+ </li>
+ <li><a href="#targetimpls">Target-specific Implementation Notes</a>
+ <ul>
+ <li><a href="#x86">The X86 backend</a></li>
+ <li><a href="#ppc">The PowerPC backend</a>
+ <ul>
+ <li><a href="#ppc_abi">LLVM PowerPC ABI</a></li>
+ <li><a href="#ppc_frame">Frame Layout</a></li>
+ <li><a href="#ppc_prolog">Prolog/Epilog</a></li>
+ <li><a href="#ppc_dynamic">Dynamic Allocation</a></li>
+ </ul></li>
+ </ul></li>
+
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>,
+ <a href="mailto:isanbard@gmail.com">Bill Wendling</a>,
+ <a href="mailto:pronesto@gmail.com">Fernando Magno Quintao
+ Pereira</a> and
+ <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
+</div>
+
+<div class="doc_warning">
+ <p>Warning: This is a work in progress.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="introduction">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM target-independent code generator is a framework that provides a
+suite of reusable components for translating the LLVM internal representation to
+the machine code for a specified target&mdash;either in assembly form (suitable
+for a static compiler) or in binary machine code format (usable for a JIT
+compiler). The LLVM target-independent code generator consists of five main
+components:</p>
+
+<ol>
+<li><a href="#targetdesc">Abstract target description</a> interfaces which
+capture important properties about various aspects of the machine, independently
+of how they will be used. These interfaces are defined in
+<tt>include/llvm/Target/</tt>.</li>
+
+<li>Classes used to represent the <a href="#codegendesc">machine code</a> being
+generated for a target. These classes are intended to be abstract enough to
+represent the machine code for <i>any</i> target machine. These classes are
+defined in <tt>include/llvm/CodeGen/</tt>.</li>
+
+<li><a href="#codegenalgs">Target-independent algorithms</a> used to implement
+various phases of native code generation (register allocation, scheduling, stack
+frame representation, etc). This code lives in <tt>lib/CodeGen/</tt>.</li>
+
+<li><a href="#targetimpls">Implementations of the abstract target description
+interfaces</a> for particular targets. These machine descriptions make use of
+the components provided by LLVM, and can optionally provide custom
+target-specific passes, to build complete code generators for a specific target.
+Target descriptions live in <tt>lib/Target/</tt>.</li>
+
+<li><a href="#jit">The target-independent JIT components</a>. The LLVM JIT is
+completely target independent (it uses the <tt>TargetJITInfo</tt> structure to
+interface for target-specific issues. The code for the target-independent
+JIT lives in <tt>lib/ExecutionEngine/JIT</tt>.</li>
+
+</ol>
+
+<p>
+Depending on which part of the code generator you are interested in working on,
+different pieces of this will be useful to you. In any case, you should be
+familiar with the <a href="#targetdesc">target description</a> and <a
+href="#codegendesc">machine code representation</a> classes. If you want to add
+a backend for a new target, you will need to <a href="#targetimpls">implement the
+target description</a> classes for your new target and understand the <a
+href="LangRef.html">LLVM code representation</a>. If you are interested in
+implementing a new <a href="#codegenalgs">code generation algorithm</a>, it
+should only depend on the target-description and machine code representation
+classes, ensuring that it is portable.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="required">Required components in the code generator</a>
+</div>
+
+<div class="doc_text">
+
+<p>The two pieces of the LLVM code generator are the high-level interface to the
+code generator and the set of reusable components that can be used to build
+target-specific backends. The two most important interfaces (<a
+href="#targetmachine"><tt>TargetMachine</tt></a> and <a
+href="#targetdata"><tt>TargetData</tt></a>) are the only ones that are
+required to be defined for a backend to fit into the LLVM system, but the others
+must be defined if the reusable code generator components are going to be
+used.</p>
+
+<p>This design has two important implications. The first is that LLVM can
+support completely non-traditional code generation targets. For example, the C
+backend does not require register allocation, instruction selection, or any of
+the other standard components provided by the system. As such, it only
+implements these two interfaces, and does its own thing. Another example of a
+code generator like this is a (purely hypothetical) backend that converts LLVM
+to the GCC RTL form and uses GCC to emit machine code for a target.</p>
+
+<p>This design also implies that it is possible to design and
+implement radically different code generators in the LLVM system that do not
+make use of any of the built-in components. Doing so is not recommended at all,
+but could be required for radically different targets that do not fit into the
+LLVM machine description model: FPGAs for example.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="high-level-design">The high-level design of the code generator</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM target-independent code generator is designed to support efficient and
+quality code generation for standard register-based microprocessors. Code
+generation in this model is divided into the following stages:</p>
+
+<ol>
+<li><b><a href="#instselect">Instruction Selection</a></b> - This phase
+determines an efficient way to express the input LLVM code in the target
+instruction set.
+This stage produces the initial code for the program in the target instruction
+set, then makes use of virtual registers in SSA form and physical registers that
+represent any required register assignments due to target constraints or calling
+conventions. This step turns the LLVM code into a DAG of target
+instructions.</li>
+
+<li><b><a href="#selectiondag_sched">Scheduling and Formation</a></b> - This
+phase takes the DAG of target instructions produced by the instruction selection
+phase, determines an ordering of the instructions, then emits the instructions
+as <tt><a href="#machineinstr">MachineInstr</a></tt>s with that ordering. Note
+that we describe this in the <a href="#instselect">instruction selection
+section</a> because it operates on a <a
+href="#selectiondag_intro">SelectionDAG</a>.
+</li>
+
+<li><b><a href="#ssamco">SSA-based Machine Code Optimizations</a></b> - This
+optional stage consists of a series of machine-code optimizations that
+operate on the SSA-form produced by the instruction selector. Optimizations
+like modulo-scheduling or peephole optimization work here.
+</li>
+
+<li><b><a href="#regalloc">Register Allocation</a></b> - The
+target code is transformed from an infinite virtual register file in SSA form
+to the concrete register file used by the target. This phase introduces spill
+code and eliminates all virtual register references from the program.</li>
+
+<li><b><a href="#proepicode">Prolog/Epilog Code Insertion</a></b> - Once the
+machine code has been generated for the function and the amount of stack space
+required is known (used for LLVM alloca's and spill slots), the prolog and
+epilog code for the function can be inserted and "abstract stack location
+references" can be eliminated. This stage is responsible for implementing
+optimizations like frame-pointer elimination and stack packing.</li>
+
+<li><b><a href="#latemco">Late Machine Code Optimizations</a></b> - Optimizations
+that operate on "final" machine code can go here, such as spill code scheduling
+and peephole optimizations.</li>
+
+<li><b><a href="#codeemit">Code Emission</a></b> - The final stage actually
+puts out the code for the current function, either in the target assembler
+format or in machine code.</li>
+
+</ol>
+
+<p>The code generator is based on the assumption that the instruction selector
+will use an optimal pattern matching selector to create high-quality sequences of
+native instructions. Alternative code generator designs based on pattern
+expansion and aggressive iterative peephole optimization are much slower. This
+design permits efficient compilation (important for JIT environments) and
+aggressive optimization (used when generating code offline) by allowing
+components of varying levels of sophistication to be used for any step of
+compilation.</p>
+
+<p>In addition to these stages, target implementations can insert arbitrary
+target-specific passes into the flow. For example, the X86 target uses a
+special pass to handle the 80x87 floating point stack architecture. Other
+targets with unusual requirements can be supported with custom passes as
+needed.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="tablegen">Using TableGen for target description</a>
+</div>
+
+<div class="doc_text">
+
+<p>The target description classes require a detailed description of the target
+architecture. These target descriptions often have a large amount of common
+information (e.g., an <tt>add</tt> instruction is almost identical to a
+<tt>sub</tt> instruction).
+In order to allow the maximum amount of commonality to be factored out, the LLVM
+code generator uses the <a href="TableGenFundamentals.html">TableGen</a> tool to
+describe big chunks of the target machine, which allows the use of
+domain-specific and target-specific abstractions to reduce the amount of
+repetition.</p>
+
+<p>As LLVM continues to be developed and refined, we plan to move more and more
+of the target description to the <tt>.td</tt> form. Doing so gives us a
+number of advantages. The most important is that it makes it easier to port
+LLVM because it reduces the amount of C++ code that has to be written, and the
+surface area of the code generator that needs to be understood before someone
+can get something working. Second, it makes it easier to change things. In
+particular, if tables and other things are all emitted by <tt>tblgen</tt>, we
+only need a change in one place (<tt>tblgen</tt>) to update all of the targets
+to a new interface.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="targetdesc">Target description classes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM target description classes (located in the
+<tt>include/llvm/Target</tt> directory) provide an abstract description of the
+target machine independent of any particular client. These classes are
+designed to capture the <i>abstract</i> properties of the target (such as the
+instructions and registers it has), and do not incorporate any particular pieces
+of code generation algorithms.</p>
+
+<p>All of the target description classes (except the <tt><a
+href="#targetdata">TargetData</a></tt> class) are designed to be subclassed by
+the concrete target implementation, and have virtual methods implemented. To
+get to these implementations, the <tt><a
+href="#targetmachine">TargetMachine</a></tt> class provides accessors that
+should be implemented by the target.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="targetmachine">The <tt>TargetMachine</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetMachine</tt> class provides virtual methods that are used to
+access the target-specific implementations of the various target description
+classes via the <tt>get*Info</tt> methods (<tt>getInstrInfo</tt>,
+<tt>getRegisterInfo</tt>, <tt>getFrameInfo</tt>, etc.). This class is
+designed to be specialized by
+a concrete target implementation (e.g., <tt>X86TargetMachine</tt>) which
+implements the various virtual methods. The only required target description
+class is the <a href="#targetdata"><tt>TargetData</tt></a> class, but if the
+code generator components are to be used, the other interfaces should be
+implemented as well.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="targetdata">The <tt>TargetData</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetData</tt> class is the only required target description class,
+and it is the only class that is not extensible (you cannot derived a new
+class from it). <tt>TargetData</tt> specifies information about how the target
+lays out memory for structures, the alignment requirements for various data
+types, the size of pointers in the target, and whether the target is
+little-endian or big-endian.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="targetlowering">The <tt>TargetLowering</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetLowering</tt> class is used by SelectionDAG based instruction
+selectors primarily to describe how LLVM code should be lowered to SelectionDAG
+operations. Among other things, this class indicates:</p>
+
+<ul>
+ <li>an initial register class to use for various <tt>ValueType</tt>s</li>
+ <li>which operations are natively supported by the target machine</li>
+ <li>the return type of <tt>setcc</tt> operations</li>
+ <li>the type to use for shift amounts</li>
+ <li>various high-level characteristics, like whether it is profitable to turn
+ division by a constant into a multiplication sequence</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="mregisterinfo">The <tt>MRegisterInfo</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>MRegisterInfo</tt> class (which will eventually be renamed to
+<tt>TargetRegisterInfo</tt>) is used to describe the register file of the
+target and any interactions between the registers.</p>
+
+<p>Registers in the code generator are represented in the code generator by
+unsigned integers. Physical registers (those that actually exist in the target
+description) are unique small numbers, and virtual registers are generally
+large. Note that register #0 is reserved as a flag value.</p>
+
+<p>Each register in the processor description has an associated
+<tt>TargetRegisterDesc</tt> entry, which provides a textual name for the
+register (used for assembly output and debugging dumps) and a set of aliases
+(used to indicate whether one register overlaps with another).
+</p>
+
+<p>In addition to the per-register description, the <tt>MRegisterInfo</tt> class
+exposes a set of processor specific register classes (instances of the
+<tt>TargetRegisterClass</tt> class). Each register class contains sets of
+registers that have the same properties (for example, they are all 32-bit
+integer registers). Each SSA virtual register created by the instruction
+selector has an associated register class. When the register allocator runs, it
+replaces virtual registers with a physical register in the set.</p>
+
+<p>
+The target-specific implementations of these classes is auto-generated from a <a
+href="TableGenFundamentals.html">TableGen</a> description of the register file.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="targetinstrinfo">The <tt>TargetInstrInfo</tt> class</a>
+</div>
+
+<div class="doc_text">
+ <p>The <tt>TargetInstrInfo</tt> class is used to describe the machine
+ instructions supported by the target. It is essentially an array of
+ <tt>TargetInstrDescriptor</tt> objects, each of which describes one
+ instruction the target supports. Descriptors define things like the mnemonic
+ for the opcode, the number of operands, the list of implicit register uses
+ and defs, whether the instruction has certain target-independent properties
+ (accesses memory, is commutable, etc), and holds any target-specific
+ flags.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="targetframeinfo">The <tt>TargetFrameInfo</tt> class</a>
+</div>
+
+<div class="doc_text">
+ <p>The <tt>TargetFrameInfo</tt> class is used to provide information about the
+ stack frame layout of the target. It holds the direction of stack growth,
+ the known stack alignment on entry to each function, and the offset to the
+ local area. The offset to the local area is the offset from the stack
+ pointer on function entry to the first location where function data (local
+ variables, spill locations) can be stored.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="targetsubtarget">The <tt>TargetSubtarget</tt> class</a>
+</div>
+
+<div class="doc_text">
+ <p>The <tt>TargetSubtarget</tt> class is used to provide information about the
+ specific chip set being targeted. A sub-target informs code generation of
+ which instructions are supported, instruction latencies and instruction
+ execution itinerary; i.e., which processing units are used, in what order, and
+ for how long.</p>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="targetjitinfo">The <tt>TargetJITInfo</tt> class</a>
+</div>
+
+<div class="doc_text">
+ <p>The <tt>TargetJITInfo</tt> class exposes an abstract interface used by the
+ Just-In-Time code generator to perform target-specific activities, such as
+ emitting stubs. If a <tt>TargetMachine</tt> supports JIT code generation, it
+ should provide one of these objects through the <tt>getJITInfo</tt>
+ method.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="codegendesc">Machine code description classes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>At the high-level, LLVM code is translated to a machine specific
+representation formed out of
+<a href="#machinefunction"><tt>MachineFunction</tt></a>,
+<a href="#machinebasicblock"><tt>MachineBasicBlock</tt></a>, and <a
+href="#machineinstr"><tt>MachineInstr</tt></a> instances
+(defined in <tt>include/llvm/CodeGen</tt>). This representation is completely
+target agnostic, representing instructions in their most abstract form: an
+opcode and a series of operands. This representation is designed to support
+both an SSA representation for machine code, as well as a register allocated,
+non-SSA form.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="machineinstr">The <tt>MachineInstr</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>Target machine instructions are represented as instances of the
+<tt>MachineInstr</tt> class. This class is an extremely abstract way of
+representing machine instructions. In particular, it only keeps track of
+an opcode number and a set of operands.</p>
+
+<p>The opcode number is a simple unsigned integer that only has meaning to a
+specific backend. All of the instructions for a target should be defined in
+the <tt>*InstrInfo.td</tt> file for the target. The opcode enum values
+are auto-generated from this description. The <tt>MachineInstr</tt> class does
+not have any information about how to interpret the instruction (i.e., what the
+semantics of the instruction are); for that you must refer to the
+<tt><a href="#targetinstrinfo">TargetInstrInfo</a></tt> class.</p>
+
+<p>The operands of a machine instruction can be of several different types:
+a register reference, a constant integer, a basic block reference, etc. In
+addition, a machine operand should be marked as a def or a use of the value
+(though only registers are allowed to be defs).</p>
+
+<p>By convention, the LLVM code generator orders instruction operands so that
+all register definitions come before the register uses, even on architectures
+that are normally printed in other orders. For example, the SPARC add
+instruction: "<tt>add %i1, %i2, %i3</tt>" adds the "%i1", and "%i2" registers
+and stores the result into the "%i3" register. In the LLVM code generator,
+the operands should be stored as "<tt>%i3, %i1, %i2</tt>": with the destination
+first.</p>
+
+<p>Keeping destination (definition) operands at the beginning of the operand
+list has several advantages. In particular, the debugging printer will print
+the instruction like this:</p>
+
+<div class="doc_code">
+<pre>
+%r3 = add %i1, %i2
+</pre>
+</div>
+
+<p>Also if the first operand is a def, it is easier to <a
+href="#buildmi">create instructions</a> whose only def is the first
+operand.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="buildmi">Using the <tt>MachineInstrBuilder.h</tt> functions</a>
+</div>
+
+<div class="doc_text">
+
+<p>Machine instructions are created by using the <tt>BuildMI</tt> functions,
+located in the <tt>include/llvm/CodeGen/MachineInstrBuilder.h</tt> file. The
+<tt>BuildMI</tt> functions make it easy to build arbitrary machine
+instructions. Usage of the <tt>BuildMI</tt> functions look like this:</p>
+
+<div class="doc_code">
+<pre>
+// Create a 'DestReg = mov 42' (rendered in X86 assembly as 'mov DestReg, 42')
+// instruction. The '1' specifies how many operands will be added.
+MachineInstr *MI = BuildMI(X86::MOV32ri, 1, DestReg).addImm(42);
+
+// Create the same instr, but insert it at the end of a basic block.
+MachineBasicBlock &amp;MBB = ...
+BuildMI(MBB, X86::MOV32ri, 1, DestReg).addImm(42);
+
+// Create the same instr, but insert it before a specified iterator point.
+MachineBasicBlock::iterator MBBI = ...
+BuildMI(MBB, MBBI, X86::MOV32ri, 1, DestReg).addImm(42);
+
+// Create a 'cmp Reg, 0' instruction, no destination reg.
+MI = BuildMI(X86::CMP32ri, 2).addReg(Reg).addImm(0);
+// Create an 'sahf' instruction which takes no operands and stores nothing.
+MI = BuildMI(X86::SAHF, 0);
+
+// Create a self looping branch instruction.
+BuildMI(MBB, X86::JNE, 1).addMBB(&amp;MBB);
+</pre>
+</div>
+
+<p>The key thing to remember with the <tt>BuildMI</tt> functions is that you
+have to specify the number of operands that the machine instruction will take.
+This allows for efficient memory allocation. You also need to specify if
+operands default to be uses of values, not definitions. If you need to add a
+definition operand (other than the optional destination register), you must
+explicitly mark it as such:</p>
+
+<div class="doc_code">
+<pre>
+MI.addReg(Reg, MachineOperand::Def);
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="fixedregs">Fixed (preassigned) registers</a>
+</div>
+
+<div class="doc_text">
+
+<p>One important issue that the code generator needs to be aware of is the
+presence of fixed registers. In particular, there are often places in the
+instruction stream where the register allocator <em>must</em> arrange for a
+particular value to be in a particular register. This can occur due to
+limitations of the instruction set (e.g., the X86 can only do a 32-bit divide
+with the <tt>EAX</tt>/<tt>EDX</tt> registers), or external factors like calling
+conventions. In any case, the instruction selector should emit code that
+copies a virtual register into or out of a physical register when needed.</p>
+
+<p>For example, consider this simple LLVM example:</p>
+
+<div class="doc_code">
+<pre>
+int %test(int %X, int %Y) {
+ %Z = div int %X, %Y
+ ret int %Z
+}
+</pre>
+</div>
+
+<p>The X86 instruction selector produces this machine code for the <tt>div</tt>
+and <tt>ret</tt> (use
+"<tt>llc X.bc -march=x86 -print-machineinstrs</tt>" to get this):</p>
+
+<div class="doc_code">
+<pre>
+;; Start of div
+%EAX = mov %reg1024 ;; Copy X (in reg1024) into EAX
+%reg1027 = sar %reg1024, 31
+%EDX = mov %reg1027 ;; Sign extend X into EDX
+idiv %reg1025 ;; Divide by Y (in reg1025)
+%reg1026 = mov %EAX ;; Read the result (Z) out of EAX
+
+;; Start of ret
+%EAX = mov %reg1026 ;; 32-bit return value goes in EAX
+ret
+</pre>
+</div>
+
+<p>By the end of code generation, the register allocator has coalesced
+the registers and deleted the resultant identity moves producing the
+following code:</p>
+
+<div class="doc_code">
+<pre>
+;; X is in EAX, Y is in ECX
+mov %EAX, %EDX
+sar %EDX, 31
+idiv %ECX
+ret
+</pre>
+</div>
+
+<p>This approach is extremely general (if it can handle the X86 architecture,
+it can handle anything!) and allows all of the target specific
+knowledge about the instruction stream to be isolated in the instruction
+selector. Note that physical registers should have a short lifetime for good
+code generation, and all physical registers are assumed dead on entry to and
+exit from basic blocks (before register allocation). Thus, if you need a value
+to be live across basic block boundaries, it <em>must</em> live in a virtual
+register.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ssa">Machine code in SSA form</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>MachineInstr</tt>'s are initially selected in SSA-form, and
+are maintained in SSA-form until register allocation happens. For the most
+part, this is trivially simple since LLVM is already in SSA form; LLVM PHI nodes
+become machine code PHI nodes, and virtual registers are only allowed to have a
+single definition.</p>
+
+<p>After register allocation, machine code is no longer in SSA-form because there
+are no virtual registers left in the code.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="machinebasicblock">The <tt>MachineBasicBlock</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>MachineBasicBlock</tt> class contains a list of machine instructions
+(<tt><a href="#machineinstr">MachineInstr</a></tt> instances). It roughly
+corresponds to the LLVM code input to the instruction selector, but there can be
+a one-to-many mapping (i.e. one LLVM basic block can map to multiple machine
+basic blocks). The <tt>MachineBasicBlock</tt> class has a
+"<tt>getBasicBlock</tt>" method, which returns the LLVM basic block that it
+comes from.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="machinefunction">The <tt>MachineFunction</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>MachineFunction</tt> class contains a list of machine basic blocks
+(<tt><a href="#machinebasicblock">MachineBasicBlock</a></tt> instances). It
+corresponds one-to-one with the LLVM function input to the instruction selector.
+In addition to a list of basic blocks, the <tt>MachineFunction</tt> contains a
+a <tt>MachineConstantPool</tt>, a <tt>MachineFrameInfo</tt>, a
+<tt>MachineFunctionInfo</tt>, a <tt>SSARegMap</tt>, and a set of live in and
+live out registers for the function. See
+<tt>include/llvm/CodeGen/MachineFunction.h</tt> for more information.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="codegenalgs">Target-independent code generation algorithms</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section documents the phases described in the <a
+href="#high-level-design">high-level design of the code generator</a>. It
+explains how they work and some of the rationale behind their design.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="instselect">Instruction Selection</a>
+</div>
+
+<div class="doc_text">
+<p>
+Instruction Selection is the process of translating LLVM code presented to the
+code generator into target-specific machine instructions. There are several
+well-known ways to do this in the literature. In LLVM there are two main forms:
+the SelectionDAG based instruction selector framework and an old-style 'simple'
+instruction selector, which effectively peephole selects each LLVM instruction
+into a series of machine instructions. We recommend that all targets use the
+SelectionDAG infrastructure.
+</p>
+
+<p>Portions of the DAG instruction selector are generated from the target
+description (<tt>*.td</tt>) files. Our goal is for the entire instruction
+selector to be generated from these <tt>.td</tt> files.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="selectiondag_intro">Introduction to SelectionDAGs</a>
+</div>
+
+<div class="doc_text">
+
+<p>The SelectionDAG provides an abstraction for code representation in a way
+that is amenable to instruction selection using automatic techniques
+(e.g. dynamic-programming based optimal pattern matching selectors). It is also
+well-suited to other phases of code generation; in particular,
+instruction scheduling (SelectionDAG's are very close to scheduling DAGs
+post-selection). Additionally, the SelectionDAG provides a host representation
+where a large variety of very-low-level (but target-independent)
+<a href="#selectiondag_optimize">optimizations</a> may be
+performed; ones which require extensive information about the instructions
+efficiently supported by the target.</p>
+
+<p>The SelectionDAG is a Directed-Acyclic-Graph whose nodes are instances of the
+<tt>SDNode</tt> class. The primary payload of the <tt>SDNode</tt> is its
+operation code (Opcode) that indicates what operation the node performs and
+the operands to the operation.
+The various operation node types are described at the top of the
+<tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt> file.</p>
+
+<p>Although most operations define a single value, each node in the graph may
+define multiple values. For example, a combined div/rem operation will define
+both the dividend and the remainder. Many other situations require multiple
+values as well. Each node also has some number of operands, which are edges
+to the node defining the used value. Because nodes may define multiple values,
+edges are represented by instances of the <tt>SDOperand</tt> class, which is
+a <tt>&lt;SDNode, unsigned&gt;</tt> pair, indicating the node and result
+value being used, respectively. Each value produced by an <tt>SDNode</tt> has
+an associated <tt>MVT::ValueType</tt> indicating what type the value is.</p>
+
+<p>SelectionDAGs contain two different kinds of values: those that represent
+data flow and those that represent control flow dependencies. Data values are
+simple edges with an integer or floating point value type. Control edges are
+represented as "chain" edges which are of type <tt>MVT::Other</tt>. These edges
+provide an ordering between nodes that have side effects (such as
+loads, stores, calls, returns, etc). All nodes that have side effects should
+take a token chain as input and produce a new one as output. By convention,
+token chain inputs are always operand #0, and chain results are always the last
+value produced by an operation.</p>
+
+<p>A SelectionDAG has designated "Entry" and "Root" nodes. The Entry node is
+always a marker node with an Opcode of <tt>ISD::EntryToken</tt>. The Root node
+is the final side-effecting node in the token chain. For example, in a single
+basic block function it would be the return node.</p>
+
+<p>One important concept for SelectionDAGs is the notion of a "legal" vs.
+"illegal" DAG. A legal DAG for a target is one that only uses supported
+operations and supported types. On a 32-bit PowerPC, for example, a DAG with
+a value of type i1, i8, i16, or i64 would be illegal, as would a DAG that uses a
+SREM or UREM operation. The
+<a href="#selectiondag_legalize">legalize</a> phase is responsible for turning
+an illegal DAG into a legal DAG.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="selectiondag_process">SelectionDAG Instruction Selection Process</a>
+</div>
+
+<div class="doc_text">
+
+<p>SelectionDAG-based instruction selection consists of the following steps:</p>
+
+<ol>
+<li><a href="#selectiondag_build">Build initial DAG</a> - This stage
+ performs a simple translation from the input LLVM code to an illegal
+ SelectionDAG.</li>
+<li><a href="#selectiondag_optimize">Optimize SelectionDAG</a> - This stage
+ performs simple optimizations on the SelectionDAG to simplify it, and
+ recognize meta instructions (like rotates and <tt>div</tt>/<tt>rem</tt>
+ pairs) for targets that support these meta operations. This makes the
+ resultant code more efficient and the <a href="#selectiondag_select">select
+ instructions from DAG</a> phase (below) simpler.</li>
+<li><a href="#selectiondag_legalize">Legalize SelectionDAG</a> - This stage
+ converts the illegal SelectionDAG to a legal SelectionDAG by eliminating
+ unsupported operations and data types.</li>
+<li><a href="#selectiondag_optimize">Optimize SelectionDAG (#2)</a> - This
+ second run of the SelectionDAG optimizes the newly legalized DAG to
+ eliminate inefficiencies introduced by legalization.</li>
+<li><a href="#selectiondag_select">Select instructions from DAG</a> - Finally,
+ the target instruction selector matches the DAG operations to target
+ instructions. This process translates the target-independent input DAG into
+ another DAG of target instructions.</li>
+<li><a href="#selectiondag_sched">SelectionDAG Scheduling and Formation</a>
+ - The last phase assigns a linear order to the instructions in the
+ target-instruction DAG and emits them into the MachineFunction being
+ compiled. This step uses traditional prepass scheduling techniques.</li>
+</ol>
+
+<p>After all of these steps are complete, the SelectionDAG is destroyed and the
+rest of the code generation passes are run.</p>
+
+<p>One great way to visualize what is going on here is to take advantage of a
+few LLC command line options. In particular, the <tt>-view-isel-dags</tt>
+option pops up a window with the SelectionDAG input to the Select phase for all
+of the code compiled (if you only get errors printed to the console while using
+this, you probably <a href="ProgrammersManual.html#ViewGraph">need to configure
+your system</a> to add support for it). The <tt>-view-sched-dags</tt> option
+views the SelectionDAG output from the Select phase and input to the Scheduler
+phase.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="selectiondag_build">Initial SelectionDAG Construction</a>
+</div>
+
+<div class="doc_text">
+
+<p>The initial SelectionDAG is na&iuml;vely peephole expanded from the LLVM
+input by the <tt>SelectionDAGLowering</tt> class in the
+<tt>lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp</tt> file. The intent of this
+pass is to expose as much low-level, target-specific details to the SelectionDAG
+as possible. This pass is mostly hard-coded (e.g. an LLVM <tt>add</tt> turns
+into an <tt>SDNode add</tt> while a <tt>geteelementptr</tt> is expanded into the
+obvious arithmetic). This pass requires target-specific hooks to lower calls,
+returns, varargs, etc. For these features, the
+<tt><a href="#targetlowering">TargetLowering</a></tt> interface is used.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="selectiondag_legalize">SelectionDAG Legalize Phase</a>
+</div>
+
+<div class="doc_text">
+
+<p>The Legalize phase is in charge of converting a DAG to only use the types and
+operations that are natively supported by the target. This involves two major
+tasks:</p>
+
+<ol>
+<li><p>Convert values of unsupported types to values of supported types.</p>
+ <p>There are two main ways of doing this: converting small types to
+ larger types ("promoting"), and breaking up large integer types
+ into smaller ones ("expanding"). For example, a target might require
+ that all f32 values are promoted to f64 and that all i1/i8/i16 values
+ are promoted to i32. The same target might require that all i64 values
+ be expanded into i32 values. These changes can insert sign and zero
+ extensions as needed to make sure that the final code has the same
+ behavior as the input.</p>
+ <p>A target implementation tells the legalizer which types are supported
+ (and which register class to use for them) by calling the
+ <tt>addRegisterClass</tt> method in its TargetLowering constructor.</p>
+</li>
+
+<li><p>Eliminate operations that are not supported by the target.</p>
+ <p>Targets often have weird constraints, such as not supporting every
+ operation on every supported datatype (e.g. X86 does not support byte
+ conditional moves and PowerPC does not support sign-extending loads from
+ a 16-bit memory location). Legalize takes care of this by open-coding
+ another sequence of operations to emulate the operation ("expansion"), by
+ promoting one type to a larger type that supports the operation
+ ("promotion"), or by using a target-specific hook to implement the
+ legalization ("custom").</p>
+ <p>A target implementation tells the legalizer which operations are not
+ supported (and which of the above three actions to take) by calling the
+ <tt>setOperationAction</tt> method in its <tt>TargetLowering</tt>
+ constructor.</p>
+</li>
+</ol>
+
+<p>Prior to the existance of the Legalize pass, we required that every target
+<a href="#selectiondag_optimize">selector</a> supported and handled every
+operator and type even if they are not natively supported. The introduction of
+the Legalize phase allows all of the cannonicalization patterns to be shared
+across targets, and makes it very easy to optimize the cannonicalized code
+because it is still in the form of a DAG.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="selectiondag_optimize">SelectionDAG Optimization Phase: the DAG
+ Combiner</a>
+</div>
+
+<div class="doc_text">
+
+<p>The SelectionDAG optimization phase is run twice for code generation: once
+immediately after the DAG is built and once after legalization. The first run
+of the pass allows the initial code to be cleaned up (e.g. performing
+optimizations that depend on knowing that the operators have restricted type
+inputs). The second run of the pass cleans up the messy code generated by the
+Legalize pass, which allows Legalize to be very simple (it can focus on making
+code legal instead of focusing on generating <em>good</em> and legal code).</p>
+
+<p>One important class of optimizations performed is optimizing inserted sign
+and zero extension instructions. We currently use ad-hoc techniques, but could
+move to more rigorous techniques in the future. Here are some good papers on
+the subject:</p>
+
+<p>
+ "<a href="http://www.eecs.harvard.edu/~nr/pubs/widen-abstract.html">Widening
+ integer arithmetic</a>"<br>
+ Kevin Redwine and Norman Ramsey<br>
+ International Conference on Compiler Construction (CC) 2004
+</p>
+
+
+<p>
+ "<a href="http://portal.acm.org/citation.cfm?doid=512529.512552">Effective
+ sign extension elimination</a>"<br>
+ Motohiro Kawahito, Hideaki Komatsu, and Toshio Nakatani<br>
+ Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language Design
+ and Implementation.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="selectiondag_select">SelectionDAG Select Phase</a>
+</div>
+
+<div class="doc_text">
+
+<p>The Select phase is the bulk of the target-specific code for instruction
+selection. This phase takes a legal SelectionDAG as input, pattern matches the
+instructions supported by the target to this DAG, and produces a new DAG of
+target code. For example, consider the following LLVM fragment:</p>
+
+<div class="doc_code">
+<pre>
+%t1 = add float %W, %X
+%t2 = mul float %t1, %Y
+%t3 = add float %t2, %Z
+</pre>
+</div>
+
+<p>This LLVM code corresponds to a SelectionDAG that looks basically like
+this:</p>
+
+<div class="doc_code">
+<pre>
+(fadd:f32 (fmul:f32 (fadd:f32 W, X), Y), Z)
+</pre>
+</div>
+
+<p>If a target supports floating point multiply-and-add (FMA) operations, one
+of the adds can be merged with the multiply. On the PowerPC, for example, the
+output of the instruction selector might look like this DAG:</p>
+
+<div class="doc_code">
+<pre>
+(FMADDS (FADDS W, X), Y, Z)
+</pre>
+</div>
+
+<p>The <tt>FMADDS</tt> instruction is a ternary instruction that multiplies its
+first two operands and adds the third (as single-precision floating-point
+numbers). The <tt>FADDS</tt> instruction is a simple binary single-precision
+add instruction. To perform this pattern match, the PowerPC backend includes
+the following instruction definitions:</p>
+
+<div class="doc_code">
+<pre>
+def FMADDS : AForm_1&lt;59, 29,
+ (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmadds $FRT, $FRA, $FRC, $FRB",
+ [<b>(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))</b>]&gt;;
+def FADDS : AForm_2&lt;59, 21,
+ (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB),
+ "fadds $FRT, $FRA, $FRB",
+ [<b>(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))</b>]&gt;;
+</pre>
+</div>
+
+<p>The portion of the instruction definition in bold indicates the pattern used
+to match the instruction. The DAG operators (like <tt>fmul</tt>/<tt>fadd</tt>)
+are defined in the <tt>lib/Target/TargetSelectionDAG.td</tt> file.
+"<tt>F4RC</tt>" is the register class of the input and result values.<p>
+
+<p>The TableGen DAG instruction selector generator reads the instruction
+patterns in the <tt>.td</tt> file and automatically builds parts of the pattern
+matching code for your target. It has the following strengths:</p>
+
+<ul>
+<li>At compiler-compiler time, it analyzes your instruction patterns and tells
+ you if your patterns make sense or not.</li>
+<li>It can handle arbitrary constraints on operands for the pattern match. In
+ particular, it is straight-forward to say things like "match any immediate
+ that is a 13-bit sign-extended value". For examples, see the
+ <tt>immSExt16</tt> and related <tt>tblgen</tt> classes in the PowerPC
+ backend.</li>
+<li>It knows several important identities for the patterns defined. For
+ example, it knows that addition is commutative, so it allows the
+ <tt>FMADDS</tt> pattern above to match "<tt>(fadd X, (fmul Y, Z))</tt>" as
+ well as "<tt>(fadd (fmul X, Y), Z)</tt>", without the target author having
+ to specially handle this case.</li>
+<li>It has a full-featured type-inferencing system. In particular, you should
+ rarely have to explicitly tell the system what type parts of your patterns
+ are. In the <tt>FMADDS</tt> case above, we didn't have to tell
+ <tt>tblgen</tt> that all of the nodes in the pattern are of type 'f32'. It
+ was able to infer and propagate this knowledge from the fact that
+ <tt>F4RC</tt> has type 'f32'.</li>
+<li>Targets can define their own (and rely on built-in) "pattern fragments".
+ Pattern fragments are chunks of reusable patterns that get inlined into your
+ patterns during compiler-compiler time. For example, the integer
+ "<tt>(not x)</tt>" operation is actually defined as a pattern fragment that
+ expands as "<tt>(xor x, -1)</tt>", since the SelectionDAG does not have a
+ native '<tt>not</tt>' operation. Targets can define their own short-hand
+ fragments as they see fit. See the definition of '<tt>not</tt>' and
+ '<tt>ineg</tt>' for examples.</li>
+<li>In addition to instructions, targets can specify arbitrary patterns that
+ map to one or more instructions using the 'Pat' class. For example,
+ the PowerPC has no way to load an arbitrary integer immediate into a
+ register in one instruction. To tell tblgen how to do this, it defines:
+ <br>
+ <br>
+ <div class="doc_code">
+ <pre>
+// Arbitrary immediate support. Implement in terms of LIS/ORI.
+def : Pat&lt;(i32 imm:$imm),
+ (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))&gt;;
+ </pre>
+ </div>
+ <br>
+ If none of the single-instruction patterns for loading an immediate into a
+ register match, this will be used. This rule says "match an arbitrary i32
+ immediate, turning it into an <tt>ORI</tt> ('or a 16-bit immediate') and an
+ <tt>LIS</tt> ('load 16-bit immediate, where the immediate is shifted to the
+ left 16 bits') instruction". To make this work, the
+ <tt>LO16</tt>/<tt>HI16</tt> node transformations are used to manipulate the
+ input immediate (in this case, take the high or low 16-bits of the
+ immediate).</li>
+<li>While the system does automate a lot, it still allows you to write custom
+ C++ code to match special cases if there is something that is hard to
+ express.</li>
+</ul>
+
+<p>While it has many strengths, the system currently has some limitations,
+primarily because it is a work in progress and is not yet finished:</p>
+
+<ul>
+<li>Overall, there is no way to define or match SelectionDAG nodes that define
+ multiple values (e.g. <tt>ADD_PARTS</tt>, <tt>LOAD</tt>, <tt>CALL</tt>,
+ etc). This is the biggest reason that you currently still <em>have to</em>
+ write custom C++ code for your instruction selector.</li>
+<li>There is no great way to support matching complex addressing modes yet. In
+ the future, we will extend pattern fragments to allow them to define
+ multiple values (e.g. the four operands of the <a href="#x86_memory">X86
+ addressing mode</a>). In addition, we'll extend fragments so that a
+ fragment can match multiple different patterns.</li>
+<li>We don't automatically infer flags like isStore/isLoad yet.</li>
+<li>We don't automatically generate the set of supported registers and
+ operations for the <a href="#selectiondag_legalize">Legalizer</a> yet.</li>
+<li>We don't have a way of tying in custom legalized nodes yet.</li>
+</ul>
+
+<p>Despite these limitations, the instruction selector generator is still quite
+useful for most of the binary and logical operations in typical instruction
+sets. If you run into any problems or can't figure out how to do something,
+please let Chris know!</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="selectiondag_sched">SelectionDAG Scheduling and Formation Phase</a>
+</div>
+
+<div class="doc_text">
+
+<p>The scheduling phase takes the DAG of target instructions from the selection
+phase and assigns an order. The scheduler can pick an order depending on
+various constraints of the machines (i.e. order for minimal register pressure or
+try to cover instruction latencies). Once an order is established, the DAG is
+converted to a list of <tt><a href="#machineinstr">MachineInstr</a></tt>s and
+the SelectionDAG is destroyed.</p>
+
+<p>Note that this phase is logically separate from the instruction selection
+phase, but is tied to it closely in the code because it operates on
+SelectionDAGs.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="selectiondag_future">Future directions for the SelectionDAG</a>
+</div>
+
+<div class="doc_text">
+
+<ol>
+<li>Optional function-at-a-time selection.</li>
+<li>Auto-generate entire selector from <tt>.td</tt> file.</li>
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ssamco">SSA-based Machine Code Optimizations</a>
+</div>
+<div class="doc_text"><p>To Be Written</p></div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="liveintervals">Live Intervals</a>
+</div>
+
+<div class="doc_text">
+
+<p>Live Intervals are the ranges (intervals) where a variable is <i>live</i>.
+They are used by some <a href="#regalloc">register allocator</a> passes to
+determine if two or more virtual registers which require the same physical
+register are live at the same point in the program (i.e., they conflict). When
+this situation occurs, one virtual register must be <i>spilled</i>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="livevariable_analysis">Live Variable Analysis</a>
+</div>
+
+<div class="doc_text">
+
+<p>The first step in determining the live intervals of variables is to
+calculate the set of registers that are immediately dead after the
+instruction (i.e., the instruction calculates the value, but it is
+never used) and the set of registers that are used by the instruction,
+but are never used after the instruction (i.e., they are killed). Live
+variable information is computed for each <i>virtual</i> register and
+<i>register allocatable</i> physical register in the function. This
+is done in a very efficient manner because it uses SSA to sparsely
+compute lifetime information for virtual registers (which are in SSA
+form) and only has to track physical registers within a block. Before
+register allocation, LLVM can assume that physical registers are only
+live within a single basic block. This allows it to do a single,
+local analysis to resolve physical register lifetimes within each
+basic block. If a physical register is not register allocatable (e.g.,
+a stack pointer or condition codes), it is not tracked.</p>
+
+<p>Physical registers may be live in to or out of a function. Live in values
+are typically arguments in registers. Live out values are typically return
+values in registers. Live in values are marked as such, and are given a dummy
+"defining" instruction during live intervals analysis. If the last basic block
+of a function is a <tt>return</tt>, then it's marked as using all live out
+values in the function.</p>
+
+<p><tt>PHI</tt> nodes need to be handled specially, because the calculation
+of the live variable information from a depth first traversal of the CFG of
+the function won't guarantee that a virtual register used by the <tt>PHI</tt>
+node is defined before it's used. When a <tt>PHI</tt> node is encounted, only
+the definition is handled, because the uses will be handled in other basic
+blocks.</p>
+
+<p>For each <tt>PHI</tt> node of the current basic block, we simulate an
+assignment at the end of the current basic block and traverse the successor
+basic blocks. If a successor basic block has a <tt>PHI</tt> node and one of
+the <tt>PHI</tt> node's operands is coming from the current basic block,
+then the variable is marked as <i>alive</i> within the current basic block
+and all of its predecessor basic blocks, until the basic block with the
+defining instruction is encountered.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="liveintervals_analysis">Live Intervals Analysis</a>
+</div>
+
+<div class="doc_text">
+
+<p>We now have the information available to perform the live intervals analysis
+and build the live intervals themselves. We start off by numbering the basic
+blocks and machine instructions. We then handle the "live-in" values. These
+are in physical registers, so the physical register is assumed to be killed by
+the end of the basic block. Live intervals for virtual registers are computed
+for some ordering of the machine instructions <tt>[1, N]</tt>. A live interval
+is an interval <tt>[i, j)</tt>, where <tt>1 <= i <= j < N</tt>, for which a
+variable is live.</p>
+
+<p><i><b>More to come...</b></i></p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="regalloc">Register Allocation</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <i>Register Allocation problem</i> consists in mapping a program
+<i>P<sub>v</sub></i>, that can use an unbounded number of virtual
+registers, to a program <i>P<sub>p</sub></i> that contains a finite
+(possibly small) number of physical registers. Each target architecture has
+a different number of physical registers. If the number of physical
+registers is not enough to accommodate all the virtual registers, some of
+them will have to be mapped into memory. These virtuals are called
+<i>spilled virtuals</i>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_subsubsection">
+ <a name="regAlloc_represent">How registers are represented in LLVM</a>
+</div>
+
+<div class="doc_text">
+
+<p>In LLVM, physical registers are denoted by integer numbers that
+normally range from 1 to 1023. To see how this numbering is defined
+for a particular architecture, you can read the
+<tt>GenRegisterNames.inc</tt> file for that architecture. For
+instance, by inspecting
+<tt>lib/Target/X86/X86GenRegisterNames.inc</tt> we see that the 32-bit
+register <tt>EAX</tt> is denoted by 15, and the MMX register
+<tt>MM0</tt> is mapped to 48.</p>
+
+<p>Some architectures contain registers that share the same physical
+location. A notable example is the X86 platform. For instance, in the
+X86 architecture, the registers <tt>EAX</tt>, <tt>AX</tt> and
+<tt>AL</tt> share the first eight bits. These physical registers are
+marked as <i>aliased</i> in LLVM. Given a particular architecture, you
+can check which registers are aliased by inspecting its
+<tt>RegisterInfo.td</tt> file. Moreover, the method
+<tt>MRegisterInfo::getAliasSet(p_reg)</tt> returns an array containing
+all the physical registers aliased to the register <tt>p_reg</tt>.</p>
+
+<p>Physical registers, in LLVM, are grouped in <i>Register Classes</i>.
+Elements in the same register class are functionally equivalent, and can
+be interchangeably used. Each virtual register can only be mapped to
+physical registers of a particular class. For instance, in the X86
+architecture, some virtuals can only be allocated to 8 bit registers.
+A register class is described by <tt>TargetRegisterClass</tt> objects.
+To discover if a virtual register is compatible with a given physical,
+this code can be used:
+</p>
+
+<div class="doc_code">
+<pre>
+bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
+ unsigned v_reg,
+ unsigned p_reg) {
+ assert(MRegisterInfo::isPhysicalRegister(p_reg) &amp;&amp;
+ "Target register must be physical");
+ const TargetRegisterClass *trc = mf.getSSARegMap()->getRegClass(v_reg);
+ return trc->contains(p_reg);
+}
+</pre>
+</div>
+
+<p>Sometimes, mostly for debugging purposes, it is useful to change
+the number of physical registers available in the target
+architecture. This must be done statically, inside the
+<tt>TargetRegsterInfo.td</tt> file. Just <tt>grep</tt> for
+<tt>RegisterClass</tt>, the last parameter of which is a list of
+registers. Just commenting some out is one simple way to avoid them
+being used. A more polite way is to explicitly exclude some registers
+from the <i>allocation order</i>. See the definition of the
+<tt>GR</tt> register class in
+<tt>lib/Target/IA64/IA64RegisterInfo.td</tt> for an example of this
+(e.g., <tt>numReservedRegs</tt> registers are hidden.)</p>
+
+<p>Virtual registers are also denoted by integer numbers. Contrary to
+physical registers, different virtual registers never share the same
+number. The smallest virtual register is normally assigned the number
+1024. This may change, so, in order to know which is the first virtual
+register, you should access
+<tt>MRegisterInfo::FirstVirtualRegister</tt>. Any register whose
+number is greater than or equal to
+<tt>MRegisterInfo::FirstVirtualRegister</tt> is considered a virtual
+register. Whereas physical registers are statically defined in a
+<tt>TargetRegisterInfo.td</tt> file and cannot be created by the
+application developer, that is not the case with virtual registers.
+In order to create new virtual registers, use the method
+<tt>SSARegMap::createVirtualRegister()</tt>. This method will return a
+virtual register with the highest code.
+</p>
+
+<p>Before register allocation, the operands of an instruction are
+mostly virtual registers, although physical registers may also be
+used. In order to check if a given machine operand is a register, use
+the boolean function <tt>MachineOperand::isRegister()</tt>. To obtain
+the integer code of a register, use
+<tt>MachineOperand::getReg()</tt>. An instruction may define or use a
+register. For instance, <tt>ADD reg:1026 := reg:1025 reg:1024</tt>
+defines the registers 1024, and uses registers 1025 and 1026. Given a
+register operand, the method <tt>MachineOperand::isUse()</tt> informs
+if that register is being used by the instruction. The method
+<tt>MachineOperand::isDef()</tt> informs if that registers is being
+defined.</p>
+
+<p>We will call physical registers present in the LLVM bitcode before
+register allocation <i>pre-colored registers</i>. Pre-colored
+registers are used in many different situations, for instance, to pass
+parameters of functions calls, and to store results of particular
+instructions. There are two types of pre-colored registers: the ones
+<i>implicitly</i> defined, and those <i>explicitly</i>
+defined. Explicitly defined registers are normal operands, and can be
+accessed with <tt>MachineInstr::getOperand(int)::getReg()</tt>. In
+order to check which registers are implicitly defined by an
+instruction, use the
+<tt>TargetInstrInfo::get(opcode)::ImplicitDefs</tt>, where
+<tt>opcode</tt> is the opcode of the target instruction. One important
+difference between explicit and implicit physical registers is that
+the latter are defined statically for each instruction, whereas the
+former may vary depending on the program being compiled. For example,
+an instruction that represents a function call will always implicitly
+define or use the same set of physical registers. To read the
+registers implicitly used by an instruction, use
+<tt>TargetInstrInfo::get(opcode)::ImplicitUses</tt>. Pre-colored
+registers impose constraints on any register allocation algorithm. The
+register allocator must make sure that none of them is been
+overwritten by the values of virtual registers while still alive.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_subsubsection">
+ <a name="regAlloc_howTo">Mapping virtual registers to physical registers</a>
+</div>
+
+<div class="doc_text">
+
+<p>There are two ways to map virtual registers to physical registers (or to
+memory slots). The first way, that we will call <i>direct mapping</i>,
+is based on the use of methods of the classes <tt>MRegisterInfo</tt>,
+and <tt>MachineOperand</tt>. The second way, that we will call
+<i>indirect mapping</i>, relies on the <tt>VirtRegMap</tt> class in
+order to insert loads and stores sending and getting values to and from
+memory.</p>
+
+<p>The direct mapping provides more flexibility to the developer of
+the register allocator; however, it is more error prone, and demands
+more implementation work. Basically, the programmer will have to
+specify where load and store instructions should be inserted in the
+target function being compiled in order to get and store values in
+memory. To assign a physical register to a virtual register present in
+a given operand, use <tt>MachineOperand::setReg(p_reg)</tt>. To insert
+a store instruction, use
+<tt>MRegisterInfo::storeRegToStackSlot(...)</tt>, and to insert a load
+instruction, use <tt>MRegisterInfo::loadRegFromStackSlot</tt>.</p>
+
+<p>The indirect mapping shields the application developer from the
+complexities of inserting load and store instructions. In order to map
+a virtual register to a physical one, use
+<tt>VirtRegMap::assignVirt2Phys(vreg, preg)</tt>. In order to map a
+certain virtual register to memory, use
+<tt>VirtRegMap::assignVirt2StackSlot(vreg)</tt>. This method will
+return the stack slot where <tt>vreg</tt>'s value will be located. If
+it is necessary to map another virtual register to the same stack
+slot, use <tt>VirtRegMap::assignVirt2StackSlot(vreg,
+stack_location)</tt>. One important point to consider when using the
+indirect mapping, is that even if a virtual register is mapped to
+memory, it still needs to be mapped to a physical register. This
+physical register is the location where the virtual register is
+supposed to be found before being stored or after being reloaded.</p>
+
+<p>If the indirect strategy is used, after all the virtual registers
+have been mapped to physical registers or stack slots, it is necessary
+to use a spiller object to place load and store instructions in the
+code. Every virtual that has been mapped to a stack slot will be
+stored to memory after been defined and will be loaded before being
+used. The implementation of the spiller tries to recycle load/store
+instructions, avoiding unnecessary instructions. For an example of how
+to invoke the spiller, see
+<tt>RegAllocLinearScan::runOnMachineFunction</tt> in
+<tt>lib/CodeGen/RegAllocLinearScan.cpp</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="regAlloc_twoAddr">Handling two address instructions</a>
+</div>
+
+<div class="doc_text">
+
+<p>With very rare exceptions (e.g., function calls), the LLVM machine
+code instructions are three address instructions. That is, each
+instruction is expected to define at most one register, and to use at
+most two registers. However, some architectures use two address
+instructions. In this case, the defined register is also one of the
+used register. For instance, an instruction such as <tt>ADD %EAX,
+%EBX</tt>, in X86 is actually equivalent to <tt>%EAX = %EAX +
+%EBX</tt>.</p>
+
+<p>In order to produce correct code, LLVM must convert three address
+instructions that represent two address instructions into true two
+address instructions. LLVM provides the pass
+<tt>TwoAddressInstructionPass</tt> for this specific purpose. It must
+be run before register allocation takes place. After its execution,
+the resulting code may no longer be in SSA form. This happens, for
+instance, in situations where an instruction such as <tt>%a = ADD %b
+%c</tt> is converted to two instructions such as:</p>
+
+<div class="doc_code">
+<pre>
+%a = MOVE %b
+%a = ADD %a %b
+</pre>
+</div>
+
+<p>Notice that, internally, the second instruction is represented as
+<tt>ADD %a[def/use] %b</tt>. I.e., the register operand <tt>%a</tt> is
+both used and defined by the instruction.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="regAlloc_ssaDecon">The SSA deconstruction phase</a>
+</div>
+
+<div class="doc_text">
+
+<p>An important transformation that happens during register allocation is called
+the <i>SSA Deconstruction Phase</i>. The SSA form simplifies many
+analyses that are performed on the control flow graph of
+programs. However, traditional instruction sets do not implement
+PHI instructions. Thus, in order to generate executable code, compilers
+must replace PHI instructions with other instructions that preserve their
+semantics.</p>
+
+<p>There are many ways in which PHI instructions can safely be removed
+from the target code. The most traditional PHI deconstruction
+algorithm replaces PHI instructions with copy instructions. That is
+the strategy adopted by LLVM. The SSA deconstruction algorithm is
+implemented in n<tt>lib/CodeGen/>PHIElimination.cpp</tt>. In order to
+invoke this pass, the identifier <tt>PHIEliminationID</tt> must be
+marked as required in the code of the register allocator.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="regAlloc_fold">Instruction folding</a>
+</div>
+
+<div class="doc_text">
+
+<p><i>Instruction folding</i> is an optimization performed during
+register allocation that removes unnecessary copy instructions. For
+instance, a sequence of instructions such as:</p>
+
+<div class="doc_code">
+<pre>
+%EBX = LOAD %mem_address
+%EAX = COPY %EBX
+</pre>
+</div>
+
+<p>can be safely substituted by the single instruction:
+
+<div class="doc_code">
+<pre>
+%EAX = LOAD %mem_address
+</pre>
+</div>
+
+<p>Instructions can be folded with the
+<tt>MRegisterInfo::foldMemoryOperand(...)</tt> method. Care must be
+taken when folding instructions; a folded instruction can be quite
+different from the original instruction. See
+<tt>LiveIntervals::addIntervalsForSpills</tt> in
+<tt>lib/CodeGen/LiveIntervalAnalysis.cpp</tt> for an example of its use.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_subsubsection">
+ <a name="regAlloc_builtIn">Built in register allocators</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM infrastructure provides the application developer with
+three different register allocators:</p>
+
+<ul>
+ <li><i>Simple</i> - This is a very simple implementation that does
+ not keep values in registers across instructions. This register
+ allocator immediately spills every value right after it is
+ computed, and reloads all used operands from memory to temporary
+ registers before each instruction.</li>
+ <li><i>Local</i> - This register allocator is an improvement on the
+ <i>Simple</i> implementation. It allocates registers on a basic
+ block level, attempting to keep values in registers and reusing
+ registers as appropriate.</li>
+ <li><i>Linear Scan</i> - <i>The default allocator</i>. This is the
+ well-know linear scan register allocator. Whereas the
+ <i>Simple</i> and <i>Local</i> algorithms use a direct mapping
+ implementation technique, the <i>Linear Scan</i> implementation
+ uses a spiller in order to place load and stores.</li>
+</ul>
+
+<p>The type of register allocator used in <tt>llc</tt> can be chosen with the
+command line option <tt>-regalloc=...</tt>:</p>
+
+<div class="doc_code">
+<pre>
+$ llc -f -regalloc=simple file.bc -o sp.s;
+$ llc -f -regalloc=local file.bc -o lc.s;
+$ llc -f -regalloc=linearscan file.bc -o ln.s;
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="proepicode">Prolog/Epilog Code Insertion</a>
+</div>
+<div class="doc_text"><p>To Be Written</p></div>
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="latemco">Late Machine Code Optimizations</a>
+</div>
+<div class="doc_text"><p>To Be Written</p></div>
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="codeemit">Code Emission</a>
+</div>
+<div class="doc_text"><p>To Be Written</p></div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="codeemit_asm">Generating Assembly Code</a>
+</div>
+<div class="doc_text"><p>To Be Written</p></div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="codeemit_bin">Generating Binary Machine Code</a>
+</div>
+
+<div class="doc_text">
+ <p>For the JIT or <tt>.o</tt> file writer</p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="targetimpls">Target-specific Implementation Notes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section of the document explains features or design decisions that
+are specific to the code generator for a particular target.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="x86">The X86 backend</a>
+</div>
+
+<div class="doc_text">
+
+<p>The X86 code generator lives in the <tt>lib/Target/X86</tt> directory. This
+code generator currently targets a generic P6-like processor. As such, it
+produces a few P6-and-above instructions (like conditional moves), but it does
+not make use of newer features like MMX or SSE. In the future, the X86 backend
+will have sub-target support added for specific processor families and
+implementations.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="x86_tt">X86 Target Triples Supported</a>
+</div>
+
+<div class="doc_text">
+
+<p>The following are the known target triples that are supported by the X86
+backend. This is not an exhaustive list, and it would be useful to add those
+that people test.</p>
+
+<ul>
+<li><b>i686-pc-linux-gnu</b> - Linux</li>
+<li><b>i386-unknown-freebsd5.3</b> - FreeBSD 5.3</li>
+<li><b>i686-pc-cygwin</b> - Cygwin on Win32</li>
+<li><b>i686-pc-mingw32</b> - MingW on Win32</li>
+<li><b>i386-pc-mingw32msvc</b> - MingW crosscompiler on Linux</li>
+<li><b>i686-apple-darwin*</b> - Apple Darwin on X86</li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="x86_cc">X86 Calling Conventions supported</a>
+</div>
+
+
+<div class="doc_text">
+
+<p>The folowing target-specific calling conventions are known to backend:</p>
+
+<ul>
+<li><b>x86_StdCall</b> - stdcall calling convention seen on Microsoft Windows
+platform (CC ID = 64).</li>
+<li><b>x86_FastCall</b> - fastcall calling convention seen on Microsoft Windows
+platform (CC ID = 65).</li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="x86_memory">Representing X86 addressing modes in MachineInstrs</a>
+</div>
+
+<div class="doc_text">
+
+<p>The x86 has a very flexible way of accessing memory. It is capable of
+forming memory addresses of the following expression directly in integer
+instructions (which use ModR/M addressing):</p>
+
+<div class="doc_code">
+<pre>
+Base + [1,2,4,8] * IndexReg + Disp32
+</pre>
+</div>
+
+<p>In order to represent this, LLVM tracks no less than 4 operands for each
+memory operand of this form. This means that the "load" form of '<tt>mov</tt>'
+has the following <tt>MachineOperand</tt>s in this order:</p>
+
+<pre>
+Index: 0 | 1 2 3 4
+Meaning: DestReg, | BaseReg, Scale, IndexReg, Displacement
+OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg, SignExtImm
+</pre>
+
+<p>Stores, and all other instructions, treat the four memory operands in the
+same way and in the same order.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="x86_names">Instruction naming</a>
+</div>
+
+<div class="doc_text">
+
+<p>An instruction name consists of the base name, a default operand size, and a
+a character per operand with an optional special size. For example:</p>
+
+<p>
+<tt>ADD8rr</tt> -&gt; add, 8-bit register, 8-bit register<br>
+<tt>IMUL16rmi</tt> -&gt; imul, 16-bit register, 16-bit memory, 16-bit immediate<br>
+<tt>IMUL16rmi8</tt> -&gt; imul, 16-bit register, 16-bit memory, 8-bit immediate<br>
+<tt>MOVSX32rm16</tt> -&gt; movsx, 32-bit register, 16-bit memory
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ppc">The PowerPC backend</a>
+</div>
+
+<div class="doc_text">
+<p>The PowerPC code generator lives in the lib/Target/PowerPC directory. The
+code generation is retargetable to several variations or <i>subtargets</i> of
+the PowerPC ISA; including ppc32, ppc64 and altivec.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ppc_abi">LLVM PowerPC ABI</a>
+</div>
+
+<div class="doc_text">
+<p>LLVM follows the AIX PowerPC ABI, with two deviations. LLVM uses a PC
+relative (PIC) or static addressing for accessing global values, so no TOC (r2)
+is used. Second, r31 is used as a frame pointer to allow dynamic growth of a
+stack frame. LLVM takes advantage of having no TOC to provide space to save
+the frame pointer in the PowerPC linkage area of the caller frame. Other
+details of PowerPC ABI can be found at <a href=
+"http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/32bitPowerPC.html"
+>PowerPC ABI.</a> Note: This link describes the 32 bit ABI. The
+64 bit ABI is similar except space for GPRs are 8 bytes wide (not 4) and r13 is
+reserved for system use.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ppc_frame">Frame Layout</a>
+</div>
+
+<div class="doc_text">
+<p>The size of a PowerPC frame is usually fixed for the duration of a
+function&rsquo;s invocation. Since the frame is fixed size, all references into
+the frame can be accessed via fixed offsets from the stack pointer. The
+exception to this is when dynamic alloca or variable sized arrays are present,
+then a base pointer (r31) is used as a proxy for the stack pointer and stack
+pointer is free to grow or shrink. A base pointer is also used if llvm-gcc is
+not passed the -fomit-frame-pointer flag. The stack pointer is always aligned to
+16 bytes, so that space allocated for altivec vectors will be properly
+aligned.</p>
+<p>An invocation frame is layed out as follows (low memory at top);</p>
+</div>
+
+<div class="doc_text">
+<table class="layout">
+ <tr>
+ <td>Linkage<br><br></td>
+ </tr>
+ <tr>
+ <td>Parameter area<br><br></td>
+ </tr>
+ <tr>
+ <td>Dynamic area<br><br></td>
+ </tr>
+ <tr>
+ <td>Locals area<br><br></td>
+ </tr>
+ <tr>
+ <td>Saved registers area<br><br></td>
+ </tr>
+ <tr style="border-style: none hidden none hidden;">
+ <td><br></td>
+ </tr>
+ <tr>
+ <td>Previous Frame<br><br></td>
+ </tr>
+</table>
+</div>
+
+<div class="doc_text">
+<p>The <i>linkage</i> area is used by a callee to save special registers prior
+to allocating its own frame. Only three entries are relevant to LLVM. The
+first entry is the previous stack pointer (sp), aka link. This allows probing
+tools like gdb or exception handlers to quickly scan the frames in the stack. A
+function epilog can also use the link to pop the frame from the stack. The
+third entry in the linkage area is used to save the return address from the lr
+register. Finally, as mentioned above, the last entry is used to save the
+previous frame pointer (r31.) The entries in the linkage area are the size of a
+GPR, thus the linkage area is 24 bytes long in 32 bit mode and 48 bytes in 64
+bit mode.</p>
+</div>
+
+<div class="doc_text">
+<p>32 bit linkage area</p>
+<table class="layout">
+ <tr>
+ <td>0</td>
+ <td>Saved SP (r1)</td>
+ </tr>
+ <tr>
+ <td>4</td>
+ <td>Saved CR</td>
+ </tr>
+ <tr>
+ <td>8</td>
+ <td>Saved LR</td>
+ </tr>
+ <tr>
+ <td>12</td>
+ <td>Reserved</td>
+ </tr>
+ <tr>
+ <td>16</td>
+ <td>Reserved</td>
+ </tr>
+ <tr>
+ <td>20</td>
+ <td>Saved FP (r31)</td>
+ </tr>
+</table>
+</div>
+
+<div class="doc_text">
+<p>64 bit linkage area</p>
+<table class="layout">
+ <tr>
+ <td>0</td>
+ <td>Saved SP (r1)</td>
+ </tr>
+ <tr>
+ <td>8</td>
+ <td>Saved CR</td>
+ </tr>
+ <tr>
+ <td>16</td>
+ <td>Saved LR</td>
+ </tr>
+ <tr>
+ <td>24</td>
+ <td>Reserved</td>
+ </tr>
+ <tr>
+ <td>32</td>
+ <td>Reserved</td>
+ </tr>
+ <tr>
+ <td>40</td>
+ <td>Saved FP (r31)</td>
+ </tr>
+</table>
+</div>
+
+<div class="doc_text">
+<p>The <i>parameter area</i> is used to store arguments being passed to a callee
+function. Following the PowerPC ABI, the first few arguments are actually
+passed in registers, with the space in the parameter area unused. However, if
+there are not enough registers or the callee is a thunk or vararg function,
+these register arguments can be spilled into the parameter area. Thus, the
+parameter area must be large enough to store all the parameters for the largest
+call sequence made by the caller. The size must also be mimimally large enough
+to spill registers r3-r10. This allows callees blind to the call signature,
+such as thunks and vararg functions, enough space to cache the argument
+registers. Therefore, the parameter area is minimally 32 bytes (64 bytes in 64
+bit mode.) Also note that since the parameter area is a fixed offset from the
+top of the frame, that a callee can access its spilt arguments using fixed
+offsets from the stack pointer (or base pointer.)</p>
+</div>
+
+<div class="doc_text">
+<p>Combining the information about the linkage, parameter areas and alignment. A
+stack frame is minimally 64 bytes in 32 bit mode and 128 bytes in 64 bit
+mode.</p>
+</div>
+
+<div class="doc_text">
+<p>The <i>dynamic area</i> starts out as size zero. If a function uses dynamic
+alloca then space is added to the stack, the linkage and parameter areas are
+shifted to top of stack, and the new space is available immediately below the
+linkage and parameter areas. The cost of shifting the linkage and parameter
+areas is minor since only the link value needs to be copied. The link value can
+be easily fetched by adding the original frame size to the base pointer. Note
+that allocations in the dynamic space need to observe 16 byte aligment.</p>
+</div>
+
+<div class="doc_text">
+<p>The <i>locals area</i> is where the llvm compiler reserves space for local
+variables.</p>
+</div>
+
+<div class="doc_text">
+<p>The <i>saved registers area</i> is where the llvm compiler spills callee saved
+registers on entry to the callee.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ppc_prolog">Prolog/Epilog</a>
+</div>
+
+<div class="doc_text">
+<p>The llvm prolog and epilog are the same as described in the PowerPC ABI, with
+the following exceptions. Callee saved registers are spilled after the frame is
+created. This allows the llvm epilog/prolog support to be common with other
+targets. The base pointer callee saved register r31 is saved in the TOC slot of
+linkage area. This simplifies allocation of space for the base pointer and
+makes it convenient to locate programatically and during debugging.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ppc_dynamic">Dynamic Allocation</a>
+</div>
+
+<div class="doc_text">
+<p></p>
+</div>
+
+<div class="doc_text">
+<p><i>TODO - More to come.</i></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/CodingStandards.html b/docs/CodingStandards.html
new file mode 100644
index 0000000..f631619
--- /dev/null
+++ b/docs/CodingStandards.html
@@ -0,0 +1,769 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+ <title>A Few Coding Standards</title>
+</head>
+<body>
+
+<div class="doc_title">
+ A Few Coding Standards
+</div>
+
+<ol>
+ <li><a href="#introduction">Introduction</a></li>
+ <li><a href="#mechanicalissues">Mechanical Source Issues</a>
+ <ol>
+ <li><a href="#sourceformating">Source Code Formatting</a>
+ <ol>
+ <li><a href="#scf_commenting">Commenting</a></li>
+ <li><a href="#scf_commentformat">Comment Formatting</a></li>
+ <li><a href="#scf_includes"><tt>#include</tt> Style</a></li>
+ <li><a href="#scf_codewidth">Source Code Width</a></li>
+ <li><a href="#scf_spacestabs">Use Spaces Instead of Tabs</a></li>
+ <li><a href="#scf_indentation">Indent Code Consistently</a></li>
+ </ol></li>
+ <li><a href="#compilerissues">Compiler Issues</a>
+ <ol>
+ <li><a href="#ci_warningerrors">Treat Compiler Warnings Like
+ Errors</a></li>
+ <li><a href="#ci_portable_code">Write Portable Code</a></li>
+ <li><a href="#ci_class_struct">Use of class/struct Keywords</a></li>
+ </ol></li>
+ </ol></li>
+ <li><a href="#styleissues">Style Issues</a>
+ <ol>
+ <li><a href="#macro">The High Level Issues</a>
+ <ol>
+ <li><a href="#hl_module">A Public Header File <b>is</b> a
+ Module</a></li>
+ <li><a href="#hl_dontinclude">#include as Little as Possible</a></li>
+ <li><a href="#hl_privateheaders">Keep "internal" Headers
+ Private</a></li>
+ <li><a href="#ll_iostream"><tt>#include &lt;iostream&gt;</tt> is
+ <em>forbidden</em></a></li>
+ </ol></li>
+ <li><a href="#micro">The Low Level Issues</a>
+ <ol>
+ <li><a href="#ll_assert">Assert Liberally</a></li>
+ <li><a href="#ll_ns_std">Do not use 'using namespace std'</a></li>
+ <li><a href="#ll_virtual_anch">Provide a virtual method anchor for
+ classes in headers</a></li>
+ <li><a href="#ll_preincrement">Prefer Preincrement</a></li>
+ <li><a href="#ll_avoidendl">Avoid <tt>std::endl</tt></a></li>
+ </ol></li>
+ </ol></li>
+ <li><a href="#seealso">See Also</a></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
+ <a href="mailto:void@nondot.org">Bill Wendling</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="introduction">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document attempts to describe a few coding standards that are being used
+in the LLVM source tree. Although no coding standards should be regarded as
+absolute requirements to be followed in all instances, coding standards can be
+useful.</p>
+
+<p>This document intentionally does not prescribe fixed standards for religious
+issues such as brace placement and space usage. For issues like this, follow
+the golden rule:</p>
+
+<blockquote>
+
+<p><b><a name="goldenrule">If you are adding a significant body of source to a
+project, feel free to use whatever style you are most comfortable with. If you
+are extending, enhancing, or bug fixing already implemented code, use the style
+that is already being used so that the source is uniform and easy to
+follow.</a></b></p>
+
+</blockquote>
+
+<p>The ultimate goal of these guidelines is the increase readability and
+maintainability of our common source base. If you have suggestions for topics to
+be included, please mail them to <a
+href="mailto:sabre@nondot.org">Chris</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="mechanicalissues">Mechanical Source Issues</a>
+</div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="sourceformating">Source Code Formatting</a>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="scf_commenting">Commenting</a>
+</div>
+
+<div class="doc_text">
+
+<p>Comments are one critical part of readability and maintainability. Everyone
+knows they should comment, so should you. Although we all should probably
+comment our code more than we do, there are a few very critical places that
+documentation is very useful:</p>
+
+<b>File Headers</b>
+
+<p>Every source file should have a header on it that describes the basic
+purpose of the file. If a file does not have a header, it should not be
+checked into Subversion. Most source trees will probably have a standard
+file header format. The standard format for the LLVM source tree looks like
+this:</p>
+
+<div class="doc_code">
+<pre>
+//===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by &lt;whoever started the file&gt; and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Instruction class, which is the
+// base class for all of the VM instructions.
+//
+//===----------------------------------------------------------------------===//
+</pre>
+</div>
+
+<p>A few things to note about this particular format: The 'developed by' line
+should be the name of the person or organization who initially contributed the
+file. The "<tt>-*- C++
+-*-</tt>" string on the first line is there to tell Emacs that the source file
+is a C++ file, not a C file (Emacs assumes .h files are C files by default).
+Note that this tag is not necessary in .cpp files. The name of the file is also
+on the first line, along with a very short description of the purpose of the
+file. This is important when printing out code and flipping though lots of
+pages.</p>
+
+<p>The next section in the file is a concise note that defines the license that
+the file is released under. This makes it perfectly clear what terms the source
+code can be distributed under.</p>
+
+<p>The main body of the description does not have to be very long in most cases.
+Here it's only two lines. If an algorithm is being implemented or something
+tricky is going on, a reference to the paper where it is published should be
+included, as well as any notes or "gotchas" in the code to watch out for.</p>
+
+<b>Class overviews</b>
+
+<p>Classes are one fundamental part of a good object oriented design. As such,
+a class definition should have a comment block that explains what the class is
+used for... if it's not obvious. If it's so completely obvious your grandma
+could figure it out, it's probably safe to leave it out. Naming classes
+something sane goes a long ways towards avoiding writing documentation.</p>
+
+
+<b>Method information</b>
+
+<p>Methods defined in a class (as well as any global functions) should also be
+documented properly. A quick note about what it does any a description of the
+borderline behaviour is all that is necessary here (unless something
+particularly tricky or insideous is going on). The hope is that people can
+figure out how to use your interfaces without reading the code itself... that is
+the goal metric.</p>
+
+<p>Good things to talk about here are what happens when something unexpected
+happens: does the method return null? Abort? Format your hard disk?</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="scf_commentformat">Comment Formatting</a>
+</div>
+
+<div class="doc_text">
+
+<p>In general, prefer C++ style (<tt>//</tt>) comments. They take less space,
+require less typing, don't have nesting problems, etc. There are a few cases
+when it is useful to use C style (<tt>/* */</tt>) comments however:</p>
+
+<ol>
+ <li>When writing a C code: Obviously if you are writing C code, use C style
+ comments.</li>
+ <li>When writing a header file that may be <tt>#include</tt>d by a C source
+ file.</li>
+ <li>When writing a source file that is used by a tool that only accepts C
+ style comments.</li>
+</ol>
+
+<p>To comment out a large block of code, use <tt>#if 0</tt> and <tt>#endif</tt>.
+These nest properly and are better behaved in general than C style comments.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="scf_includes"><tt>#include</tt> Style</a>
+</div>
+
+<div class="doc_text">
+
+<p>Immediately after the <a href="#scf_commenting">header file comment</a> (and
+include guards if working on a header file), the <a
+href="#hl_dontinclude">minimal</a> list of <tt>#include</tt>s required by the
+file should be listed. We prefer these <tt>#include</tt>s to be listed in this
+order:</p>
+
+<ol>
+ <li><a href="#mmheader">Main Module header</a></li>
+ <li><a href="#hl_privateheaders">Local/Private Headers</a></li>
+ <li><tt>llvm/*</tt></li>
+ <li><tt>llvm/Analysis/*</tt></li>
+ <li><tt>llvm/Assembly/*</tt></li>
+ <li><tt>llvm/Bytecode/*</tt></li>
+ <li><tt>llvm/CodeGen/*</tt></li>
+ <li>...</li>
+ <li><tt>Support/*</tt></li>
+ <li><tt>Config/*</tt></li>
+ <li>System <tt>#includes</tt></li>
+</ol>
+
+<p>... and each catagory should be sorted by name.</p>
+
+<p><a name="mmheader">The "Main Module Header"</a> file applies to .cpp file
+which implement an interface defined by a .h file. This <tt>#include</tt>
+should always be included <b>first</b> regardless of where it lives on the file
+system. By including a header file first in the .cpp files that implement the
+interfaces, we ensure that the header does not have any hidden dependencies
+which are not explicitly #included in the header, but should be. It is also a
+form of documentation in the .cpp file to indicate where the interfaces it
+implements are defined.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="scf_codewidth">Source Code Width</a>
+</div>
+
+<div class="doc_text">
+
+<p>Write your code to fit within 80 columns of text. This helps those of us who
+like to print out code and look at your code in an xterm without resizing
+it.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="scf_spacestabs">Use Spaces Instead of Tabs</a>
+</div>
+
+<div class="doc_text">
+
+<p>In all cases, prefer spaces to tabs in source files. People have different
+prefered indentation levels, and different styles of indentation that they
+like... this is fine. What isn't is that different editors/viewers expand tabs
+out to different tab stops. This can cause your code to look completely
+unreadable, and it is not worth dealing with.</p>
+
+<p>As always, follow the <a href="#goldenrule">Golden Rule</a> above: follow the
+style of existing code if your are modifying and extending it. If you like four
+spaces of indentation, <b>DO NOT</b> do that in the middle of a chunk of code
+with two spaces of indentation. Also, do not reindent a whole source file: it
+makes for incredible diffs that are absolutely worthless.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="scf_indentation">Indent Code Consistently</a>
+</div>
+
+<div class="doc_text">
+
+<p>Okay, your first year of programming you were told that indentation is
+important. If you didn't believe and internalize this then, now is the time.
+Just do it.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="compilerissues">Compiler Issues</a>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ci_warningerrors">Treat Compiler Warnings Like Errors</a>
+</div>
+
+<div class="doc_text">
+
+<p>If your code has compiler warnings in it, something is wrong: you aren't
+casting values correctly, your have "questionable" constructs in your code, or
+you are doing something legitimately wrong. Compiler warnings can cover up
+legitimate errors in output and make dealing with a translation unit
+difficult.</p>
+
+<p>It is not possible to prevent all warnings from all compilers, nor is it
+desirable. Instead, pick a standard compiler (like <tt>gcc</tt>) that provides
+a good thorough set of warnings, and stick to them. At least in the case of
+<tt>gcc</tt>, it is possible to work around any spurious errors by changing the
+syntax of the code slightly. For example, an warning that annoys me occurs when
+I write code like this:</p>
+
+<div class="doc_code">
+<pre>
+if (V = getValue()) {
+ ...
+}
+</pre>
+</div>
+
+<p><tt>gcc</tt> will warn me that I probably want to use the <tt>==</tt>
+operator, and that I probably mistyped it. In most cases, I haven't, and I
+really don't want the spurious errors. To fix this particular problem, I
+rewrite the code like this:</p>
+
+<div class="doc_code">
+<pre>
+if ((V = getValue())) {
+ ...
+}
+</pre>
+</div>
+
+<p>...which shuts <tt>gcc</tt> up. Any <tt>gcc</tt> warning that annoys you can
+be fixed by massaging the code appropriately.</p>
+
+<p>These are the <tt>gcc</tt> warnings that I prefer to enable: <tt>-Wall
+-Winline -W -Wwrite-strings -Wno-unused</tt></p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ci_portable_code">Write Portable Code</a>
+</div>
+
+<div class="doc_text">
+
+<p>In almost all cases, it is possible and within reason to write completely
+portable code. If there are cases where it isn't possible to write portable
+code, isolate it behind a well defined (and well documented) interface.</p>
+
+<p>In practice, this means that you shouldn't assume much about the host
+compiler, including its support for "high tech" features like partial
+specialization of templates. In fact, Visual C++ 6 could be an important target
+for our work in the future, and we don't want to have to rewrite all of our code
+to support it.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+<a name="ci_class_struct">Use of <tt>class</tt> and <tt>struct</tt> Keywords</a>
+</div>
+<div class="doc_text">
+
+<p>In C++, the <tt>class</tt> and <tt>struct</tt> keywords can be used almost
+interchangeably. The only difference is when they are used to declare a class:
+<tt>class</tt> makes all members private by default while <tt>struct</tt> makes
+all members public by default.</p>
+
+<p>Unfortunately, not all compilers follow the rules and some will generate
+different symbols based on whether <tt>class</tt> or <tt>struct</tt> was used to
+declare the symbol. This can lead to problems at link time.</p>
+
+<p>So, the rule for LLVM is to always use the <tt>class</tt> keyword, unless
+<b>all</b> members are public, in which case <tt>struct</tt> is allowed.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="styleissues">Style Issues</a>
+</div>
+<!-- *********************************************************************** -->
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="macro">The High Level Issues</a>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="hl_module">A Public Header File <b>is</b> a Module</a>
+</div>
+
+<div class="doc_text">
+
+<p>C++ doesn't do too well in the modularity department. There is no real
+encapsulation or data hiding (unless you use expensive protocol classes), but it
+is what we have to work with. When you write a public header file (in the LLVM
+source tree, they live in the top level "include" directory), you are defining a
+module of functionality.</p>
+
+<p>Ideally, modules should be completely independent of each other, and their
+header files should only include the absolute minimum number of headers
+possible. A module is not just a class, a function, or a namespace: <a
+href="http://www.cuj.com/articles/2000/0002/0002c/0002c.htm">it's a collection
+of these</a> that defines an interface. This interface may be several
+functions, classes or data structures, but the important issue is how they work
+together.</p>
+
+<p>In general, a module should be implemented with one or more <tt>.cpp</tt>
+files. Each of these <tt>.cpp</tt> files should include the header that defines
+their interface first. This ensure that all of the dependences of the module
+header have been properly added to the module header itself, and are not
+implicit. System headers should be included after user headers for a
+translation unit.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="hl_dontinclude"><tt>#include</tt> as Little as Possible</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include</tt> hurts compile time performance. Don't do it unless you
+have to, especially in header files.</p>
+
+<p>But wait, sometimes you need to have the definition of a class to use it, or
+to inherit from it. In these cases go ahead and <tt>#include</tt> that header
+file. Be aware however that there are many cases where you don't need to have
+the full definition of a class. If you are using a pointer or reference to a
+class, you don't need the header file. If you are simply returning a class
+instance from a prototyped function or method, you don't need it. In fact, for
+most cases, you simply don't need the definition of a class... and not
+<tt>#include</tt>'ing speeds up compilation.</p>
+
+<p>It is easy to try to go too overboard on this recommendation, however. You
+<b>must</b> include all of the header files that you are using -- you can
+include them either directly
+or indirectly (through another header file). To make sure that you don't
+accidently forget to include a header file in your module header, make sure to
+include your module header <b>first</b> in the implementation file (as mentioned
+above). This way there won't be any hidden dependencies that you'll find out
+about later...</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="hl_privateheaders">Keep "internal" Headers Private</a>
+</div>
+
+<div class="doc_text">
+
+<p>Many modules have a complex implementation that causes them to use more than
+one implementation (<tt>.cpp</tt>) file. It is often tempting to put the
+internal communication interface (helper classes, extra functions, etc) in the
+public module header file. Don't do this.</p>
+
+<p>If you really need to do something like this, put a private header file in
+the same directory as the source files, and include it locally. This ensures
+that your private interface remains private and undisturbed by outsiders.</p>
+
+<p>Note however, that it's okay to put extra implementation methods a public
+class itself... just make them private (or protected), and all is well.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ll_iostream"><tt>#include &lt;iostream&gt;</tt> is forbidden</a>
+</div>
+
+<div class="doc_text">
+
+<p>The use of <tt>#include &lt;iostream&gt;</tt> in library files is
+hereby <b><em>forbidden</em></b>. The primary reason for doing this is to
+support clients using LLVM libraries as part of larger systems. In particular,
+we statically link LLVM into some dynamic libraries. Even if LLVM isn't used,
+the static c'tors are run whenever an application start up that uses the dynamic
+library. There are two problems with this:</p>
+
+<ol>
+ <li>The time to run the static c'tors impacts startup time of
+ applications&mdash;a critical time for gui apps.</li>
+ <li>The static c'tors cause the app to pull many extra pages of memory off the
+ disk: both the code for the static c'tors in each .o file and the small
+ amount of data that gets touched. In addition, touched/dirty pages put
+ more pressure on the VM system on low-memory machines.</li>
+</ol>
+
+<table align="center">
+ <tbody>
+ <tr>
+ <th>Old Way</th>
+ <th>New Way</th>
+ </tr>
+ <tr>
+ <td align="left"><pre>#include &lt;iostream&gt;</pre></td>
+ <td align="left"><pre>#include "llvm/Support/Streams.h"</pre></td>
+ </tr>
+ <tr>
+ <td align="left"><pre>DEBUG(std::cerr &lt;&lt; ...);
+DEBUG(dump(std::cerr));</pre></td>
+ <td align="left"><pre>DOUT &lt;&lt; ...;
+dump(DOUT);</pre></td>
+ </tr>
+ <tr>
+ <td align="left"><pre>std::cerr &lt;&lt; "Hello world\n";</pre></td>
+ <td align="left"><pre>llvm::cerr &lt;&lt; "Hello world\n";</pre></td>
+ </tr>
+ <tr>
+ <td align="left"><pre>std::cout &lt;&lt; "Hello world\n";</pre></td>
+ <td align="left"><pre>llvm::cout &lt;&lt; "Hello world\n";</pre></td>
+ </tr>
+ <tr>
+ <td align="left"><pre>std::cin &gt;&gt; Var;</pre></td>
+ <td align="left"><pre>llvm::cin &gt;&gt; Var;</pre></td>
+ </tr>
+ <tr>
+ <td align="left"><pre>std::ostream</pre></td>
+ <td align="left"><pre>llvm::OStream</pre></td>
+ </tr>
+ <tr>
+ <td align="left"><pre>std::istream</pre></td>
+ <td align="left"><pre>llvm::IStream</pre></td>
+ </tr>
+ <tr>
+ <td align="left"><pre>std::stringstream</pre></td>
+ <td align="left"><pre>llvm::StringStream</pre></td>
+ </tr>
+ <tr>
+ <td align="left"><pre>void print(std::ostream &Out);
+// ...
+print(std::cerr);</pre></td>
+ <td align="left"><pre>void print(std::ostream &Out);
+void print(std::ostream *Out) { if (Out) print(*Out) }
+// ...
+print(llvm::cerr);</pre>
+
+<ul><i>N.B.</i> The second <tt>print</tt> method is called by the <tt>print</tt>
+expression. It prevents the execution of the first <tt>print</tt> method if the
+stream is <tt>cnull</tt>.</ul></td>
+ </tbody>
+</table>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="micro">The Low Level Issues</a>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ll_assert">Assert Liberally</a>
+</div>
+
+<div class="doc_text">
+
+<p>Use the "<tt>assert</tt>" function to its fullest. Check all of your
+preconditions and assumptions, you never know when a bug (not neccesarily even
+yours) might be caught early by an assertion, which reduces debugging time
+dramatically. The "<tt>&lt;cassert&gt;</tt>" header file is probably already
+included by the header files you are using, so it doesn't cost anything to use
+it.</p>
+
+<p>To further assist with debugging, make sure to put some kind of error message
+in the assertion statement (which is printed if the assertion is tripped). This
+helps the poor debugging make sense of why an assertion is being made and
+enforced, and hopefully what to do about it. Here is one complete example:</p>
+
+<div class="doc_code">
+<pre>
+inline Value *getOperand(unsigned i) {
+ assert(i &lt; Operands.size() &amp;&amp; "getOperand() out of range!");
+ return Operands[i];
+}
+</pre>
+</div>
+
+<p>Here are some examples:</p>
+
+<div class="doc_code">
+<pre>
+assert(Ty-&gt;isPointerType() &amp;&amp; "Can't allocate a non pointer type!");
+
+assert((Opcode == Shl || Opcode == Shr) &amp;&amp; "ShiftInst Opcode invalid!");
+
+assert(idx &lt; getNumSuccessors() &amp;&amp; "Successor # out of range!");
+
+assert(V1.getType() == V2.getType() &amp;&amp; "Constant types must be identical!");
+
+assert(isa&lt;PHINode&gt;(Succ-&gt;front()) &amp;&amp; "Only works on PHId BBs!");
+</pre>
+</div>
+
+<p>You get the idea...</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ll_ns_std">Do not use '<tt>using namespace std</tt>'</a>
+</div>
+
+<div class="doc_text">
+<p>In LLVM, we prefer to explicitly prefix all identifiers from the standard
+namespace with an "<tt>std::</tt>" prefix, rather than rely on
+"<tt>using namespace std;</tt>".</p>
+
+<p> In header files, adding a '<tt>using namespace XXX</tt>' directive pollutes
+the namespace of any source file that includes the header. This is clearly a
+bad thing.</p>
+
+<p>In implementation files (e.g. .cpp files), the rule is more of a stylistic
+rule, but is still important. Basically, using explicit namespace prefixes
+makes the code <b>clearer</b>, because it is immediately obvious what facilities
+are being used and where they are coming from, and <b>more portable</b>, because
+namespace clashes cannot occur between LLVM code and other namespaces. The
+portability rule is important because different standard library implementations
+expose different symbols (potentially ones they shouldn't), and future revisions
+to the C++ standard will add more symbols to the <tt>std</tt> namespace. As
+such, we never use '<tt>using namespace std;</tt>' in LLVM.</p>
+
+<p>The exception to the general rule (i.e. it's not an exception for
+the <tt>std</tt> namespace) is for implementation files. For example, all of
+the code in the LLVM project implements code that lives in the 'llvm' namespace.
+As such, it is ok, and actually clearer, for the .cpp files to have a '<tt>using
+namespace llvm</tt>' directive at their top, after the <tt>#include</tt>s. The
+general form of this rule is that any .cpp file that implements code in any
+namespace may use that namespace (and its parents'), but should not use any
+others.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ll_virtual_anch">Provide a virtual method anchor for classes
+ in headers</a>
+</div>
+
+<div class="doc_text">
+
+<p>If a class is defined in a header file and has a v-table (either it has
+virtual methods or it derives from classes with virtual methods), it must
+always have at least one out-of-line virtual method in the class. Without
+this, the compiler will copy the vtable and RTTI into every .o file that
+#includes the header, bloating .o file sizes and increasing link times.
+</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ll_preincrement">Prefer Preincrement</a>
+</div>
+
+<div class="doc_text">
+
+<p>Hard fast rule: Preincrement (<tt>++X</tt>) may be no slower than
+postincrement (<tt>X++</tt>) and could very well be a lot faster than it. Use
+preincrementation whenever possible.</p>
+
+<p>The semantics of postincrement include making a copy of the value being
+incremented, returning it, and then preincrementing the "work value". For
+primitive types, this isn't a big deal... but for iterators, it can be a huge
+issue (for example, some iterators contains stack and set objects in them...
+copying an iterator could invoke the copy ctor's of these as well). In general,
+get in the habit of always using preincrement, and you won't have a problem.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="ll_avoidendl">Avoid <tt>std::endl</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>std::endl</tt> modifier, when used with iostreams outputs a newline
+to the output stream specified. In addition to doing this, however, it also
+flushes the output stream. In other words, these are equivalent:</p>
+
+<div class="doc_code">
+<pre>
+std::cout &lt;&lt; std::endl;
+std::cout &lt;&lt; '\n' &lt;&lt; std::flush;
+</pre>
+</div>
+
+<p>Most of the time, you probably have no reason to flush the output stream, so
+it's better to use a literal <tt>'\n'</tt>.</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="seealso">See Also</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>A lot of these comments and recommendations have been culled for other
+sources. Two particularly important books for our work are:</p>
+
+<ol>
+
+<li><a href="http://www.aw-bc.com/catalog/academic/product/0,1144,0201310155,00.html">Effective
+C++</a> by Scott Meyers. There is an online version of the book (only some
+chapters though) <a
+href="http://www.awlonline.com/cseng/meyerscddemo/">available as well</a>. Also
+interesting and useful are "More Effective C++" and "Effective STL" by the same
+author.</li>
+
+<li><a href="http://cseng.aw.com/book/0,3828,0201633620,00.html">Large-Scale C++
+Software Design</a> by John Lakos</li>
+
+</ol>
+
+<p>If you get some free time, and you haven't read them: do so, you might learn
+something.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/CommandGuide/Makefile b/docs/CommandGuide/Makefile
new file mode 100644
index 0000000..440d538
--- /dev/null
+++ b/docs/CommandGuide/Makefile
@@ -0,0 +1,101 @@
+##===- docs/CommandGuide/Makefile --------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+ifdef BUILD_FOR_WEBSITE
+
+# This special case is for keeping the CommandGuide on the LLVM web site
+# up to date automatically as the documents are checked in. It must build
+# the POD files to HTML only and keep them in the src directories. It must also
+# build in an unconfigured tree, hence the ifdef. To use this, run
+# make -s BUILD_FOR_WEBSITE=1 inside the cvs commit script.
+
+POD := $(wildcard *.pod)
+HTML := $(patsubst %.pod, html/%.html, $(POD))
+MAN := $(patsubst %.pod, man/man1/%.1, $(POD))
+PS := $(patsubst %.pod, ps/%.ps, $(POD))
+
+all: $(HTML) $(MAN) $(PS)
+
+.SUFFIXES:
+.SUFFIXES: .html .pod .1 .ps
+
+html/%.html: %.pod
+ pod2html --css=manpage.css --htmlroot=. \
+ --podpath=. --noindex --infile=$< --outfile=$@ --title=$*
+
+man/man1/%.1: %.pod
+ pod2man --release=CVS --center="LLVM Command Guide" $< $@
+
+ps/%.ps: man/man1/%.1
+ groff -Tps -man $< > $@
+
+clean:
+ rm -f pod2htm*.*~~ $(HTML) $(MAN) $(PS)
+
+else
+
+LEVEL := ../..
+
+include $(LEVEL)/Makefile.common
+
+POD := $(wildcard $(PROJ_SRC_DIR)/*.pod)
+
+EXTRA_DIST := $(POD) index.html
+
+HTML = $(patsubst $(PROJ_SRC_DIR)/%.pod, $(PROJ_OBJ_DIR)/%.html, $(POD))
+MAN = $(patsubst $(PROJ_SRC_DIR)/%.pod, $(PROJ_OBJ_DIR)/%.1, $(POD))
+PS = $(patsubst $(PROJ_SRC_DIR)/%.pod, $(PROJ_OBJ_DIR)/%.ps, $(POD))
+
+.SUFFIXES:
+.SUFFIXES: .html .pod .1 .ps
+
+$(HTML) : html/.dir man/.dir man/man1/.dir ps/.dir
+
+html: $(HTML)
+
+$(PROJ_OBJ_DIR)/%.html: %.pod
+ $(POD2HTML) --css=manpage.css --htmlroot=. --podpath=. \
+ --noindex --infile=$< --outfile=$@ --title=$*
+
+$(PROJ_OBJ_DIR)/%.1: %.pod
+ $(POD2MAN) --release=$(LLVMVersion) \
+ --center="LLVM Command Guide" $< $@
+
+$(PROJ_OBJ_DIR)/%.ps: $(PROJ_OBJ_DIR)/%.1
+ $(GROFF) -Tps -man $< > $@
+
+clean-local::
+ $(Verb) $(RM) -f pod2htm*.*~~ $(HTML) $(MAN) $(PS)
+
+HTML_DIR := $(PROJ_docsdir)/html/CommandGuide
+MAN_DIR := $(PROJ_mandir)/man1
+PS_DIR := $(PROJ_docsdir)/ps
+
+install-local:: $(HTML) $(MAN) $(PS)
+ $(Echo) Installing HTML CommandGuide Documentation
+ $(Verb) $(MKDIR) $(HTML_DIR)
+ $(Verb) $(DataInstall) $(HTML) $(HTML_DIR)
+ $(Verb) $(DataInstall) $(PROJ_SRC_DIR)/index.html $(HTML_DIR)
+ $(Verb) $(DataInstall) $(PROJ_SRC_DIR)/manpage.css $(HTML_DIR)
+ $(Echo) Installing MAN CommandGuide Documentation
+ $(Verb) $(MKDIR) $(MAN_DIR)
+ $(Verb) $(DataInstall) $(MAN) $(MAN_DIR)
+ $(Echo) Installing PS CommandGuide Documentation
+ $(Verb) $(MKDIR) $(PS_DIR)
+ $(Verb) $(DataInstall) $(PS) $(PS_DIR)
+
+uninstall-local::
+ $(Echo) Uninstalling Documentation
+ $(Verb) $(RM) -rf $(LLVM_DOCSDIR)
+
+printvars::
+ $(Echo) "POD : " '$(POD)'
+ $(Echo) "HTML : " '$(HTML)'
+
+endif
diff --git a/docs/CommandGuide/bugpoint.pod b/docs/CommandGuide/bugpoint.pod
new file mode 100644
index 0000000..12e32fb
--- /dev/null
+++ b/docs/CommandGuide/bugpoint.pod
@@ -0,0 +1,134 @@
+=pod
+
+=head1 NAME
+
+bugpoint - automatic test case reduction tool
+
+=head1 SYNOPSIS
+
+B<bugpoint> [I<options>] [I<input LLVM ll/bc files>] [I<LLVM passes>] B<--args>
+I<program arguments>
+
+=head1 DESCRIPTION
+
+B<bugpoint> narrows down the source of problems in LLVM tools and passes. It
+can be used to debug three types of failures: optimizer crashes, miscompilations
+by optimizers, or bad native code generation (including problems in the static
+and JIT compilers). It aims to reduce large test cases to small, useful ones.
+For more information on the design and inner workings of B<bugpoint>, as well as
+advice for using bugpoint, see F<llvm/docs/Bugpoint.html> in the LLVM
+distribution.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--additional-so> F<library>
+
+Load the dynamic shared object F<library> into the test program whenever it is
+run. This is useful if you are debugging programs which depend on non-LLVM
+libraries (such as the X or curses libraries) to run.
+
+=item B<--args> I<program args>
+
+Pass all arguments specified after -args to the test program whenever it runs.
+Note that if any of the I<program args> start with a '-', you should use:
+
+ bugpoint [bugpoint args] --args -- [program args]
+
+The "--" right after the B<--args> option tells B<bugpoint> to consider any
+options starting with C<-> to be part of the B<--args> option, not as options to
+B<bugpoint> itself.
+
+=item B<--tool-args> I<tool args>
+
+Pass all arguments specified after --tool-args to the LLVM tool under test
+(B<llc>, B<lli>, etc.) whenever it runs. You should use this option in the
+following way:
+
+ bugpoint [bugpoint args] --tool-args -- [tool args]
+
+The "--" right after the B<--tool-args> option tells B<bugpoint> to consider any
+options starting with C<-> to be part of the B<--tool-args> option, not as
+options to B<bugpoint> itself. (See B<--args>, above.)
+
+=item B<--check-exit-code>=I<{true,false}>
+
+Assume a non-zero exit code or core dump from the test program is a failure.
+Defaults to true.
+
+=item B<--disable-{dce,simplifycfg}>
+
+Do not run the specified passes to clean up and reduce the size of the test
+program. By default, B<bugpoint> uses these passes internally when attempting to
+reduce test programs. If you're trying to find a bug in one of these passes,
+B<bugpoint> may crash.
+
+=item B<--enable-valgrind>
+
+Use valgrind to find faults in the optimization phase. This will allow
+bugpoint to find otherwise asymptomatic problems caused by memory
+mis-management.
+
+=item B<-find-bugs>
+
+Continually randomize the specified passes and run them on the test program
+until a bug is found or the user kills B<bugpoint>.
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<--input> F<filename>
+
+Open F<filename> and redirect the standard input of the test program, whenever
+it runs, to come from that file.
+
+=item B<--load> F<plugin>
+
+Load the dynamic object F<plugin> into B<bugpoint> itself. This object should
+register new optimization passes. Once loaded, the object will add new command
+line options to enable various optimizations. To see the new complete list of
+optimizations, use the B<--help> and B<--load> options together; for example:
+
+ bugpoint --load myNewPass.so --help
+
+=item B<--mlimit> F<megabytes>
+
+Specifies an upper limit on memory usage of the optimization and codegen. Set
+to zero to disable the limit.
+
+=item B<--output> F<filename>
+
+Whenever the test program produces output on its standard output stream, it
+should match the contents of F<filename> (the "reference output"). If you
+do not use this option, B<bugpoint> will attempt to generate a reference output
+by compiling the program with the C backend and running it.
+
+=item B<--profile-info-file> F<filename>
+
+Profile file loaded by B<--profile-loader>.
+
+=item B<--run-{int,jit,llc,cbe}>
+
+Whenever the test program is compiled, B<bugpoint> should generate code for it
+using the specified code generator. These options allow you to choose the
+interpreter, the JIT compiler, the static native code compiler, or the C
+backend, respectively.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<bugpoint> succeeds in finding a problem, it will exit with 0. Otherwise,
+if an error occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<opt|opt>
+
+=head1 AUTHOR
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/html/manpage.css b/docs/CommandGuide/html/manpage.css
new file mode 100644
index 0000000..b200343
--- /dev/null
+++ b/docs/CommandGuide/html/manpage.css
@@ -0,0 +1,256 @@
+/* Based on http://www.perldoc.com/css/perldoc.css */
+
+@import url("../llvm.css");
+
+body { font-family: Arial,Helvetica; }
+
+blockquote { margin: 10pt; }
+
+h1, a { color: #336699; }
+
+
+/*** Top menu style ****/
+.mmenuon {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ff6600; font-size: 10pt;
+ }
+.mmenuoff {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: 10pt;
+}
+.cpyright {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: xx-small;
+}
+.cpyrightText {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: xx-small;
+}
+.sections {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 11pt;
+}
+.dsections {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 12pt;
+}
+.slink {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #000000; font-size: 9pt;
+}
+
+.slink2 { font-family: Arial,Helvetica; text-decoration: none; color: #336699; }
+
+.maintitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 18pt;
+}
+.dblArrow {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: small;
+}
+.menuSec {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: small;
+}
+
+.newstext {
+ font-family: Arial,Helvetica; font-size: small;
+}
+
+.linkmenu {
+ font-family: Arial,Helvetica; color: #000000; font-weight: bold;
+ text-decoration: none;
+}
+
+P {
+ font-family: Arial,Helvetica;
+}
+
+PRE {
+ font-size: 10pt;
+}
+.quote {
+ font-family: Times; text-decoration: none;
+ color: #000000; font-size: 9pt; font-style: italic;
+}
+.smstd { font-family: Arial,Helvetica; color: #000000; font-size: x-small; }
+.std { font-family: Arial,Helvetica; color: #000000; }
+.meerkatTitle {
+ font-family: sans-serif; font-size: x-small; color: black; }
+
+.meerkatDescription { font-family: sans-serif; font-size: 10pt; color: black }
+.meerkatCategory {
+ font-family: sans-serif; font-size: 9pt; font-weight: bold; font-style: italic;
+ color: brown; }
+.meerkatChannel {
+ font-family: sans-serif; font-size: 9pt; font-style: italic; color: brown; }
+.meerkatDate { font-family: sans-serif; font-size: xx-small; color: #336699; }
+
+.tocTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+.toc-item {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #336699; font-size: 10pt; text-decoration: underline;
+}
+
+.perlVersion {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #336699; font-size: 10pt; text-decoration: none;
+}
+
+.podTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #000000;
+}
+
+.docTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #000000; font-size: 10pt;
+}
+.dotDot {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #000000; font-size: 9pt;
+}
+
+.docSec {
+ font-family: Arial,Helvetica; font-weight: normal;
+ color: #333333; font-size: 9pt;
+}
+.docVersion {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 10pt;
+}
+
+.docSecs-on {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #ff0000; font-size: 10pt;
+}
+.docSecs-off {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+h2 {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: medium;
+}
+h1 {
+ font-family: Verdana,Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: large;
+}
+
+DL {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+UL > LI > A {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #336699; font-size: 10pt;
+}
+
+.moduleInfo {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #333333; font-size: 11pt;
+}
+
+.moduleInfoSec {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 10pt;
+}
+
+.moduleInfoVal {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: underline;
+ color: #000000; font-size: 10pt;
+}
+
+.cpanNavTitle {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #ffffff; font-size: 10pt;
+}
+.cpanNavLetter {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #333333; font-size: 9pt;
+}
+.cpanCat {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 9pt;
+}
+
+.bttndrkblue-bkgd-top {
+ background-color: #225688;
+ background-image: url(/global/mvc_objects/images/bttndrkblue_bgtop.gif);
+}
+.bttndrkblue-bkgd-left {
+ background-color: #225688;
+ background-image: url(/global/mvc_objects/images/bttndrkblue_bgleft.gif);
+}
+.bttndrkblue-bkgd {
+ padding-top: 0px;
+ padding-bottom: 0px;
+ margin-bottom: 0px;
+ margin-top: 0px;
+ background-repeat: no-repeat;
+ background-color: #225688;
+ background-image: url(/global/mvc_objects/images/bttndrkblue_bgmiddle.gif);
+ vertical-align: top;
+}
+.bttndrkblue-bkgd-right {
+ background-color: #225688;
+ background-image: url(/global/mvc_objects/images/bttndrkblue_bgright.gif);
+}
+.bttndrkblue-bkgd-bottom {
+ background-color: #225688;
+ background-image: url(/global/mvc_objects/images/bttndrkblue_bgbottom.gif);
+}
+.bttndrkblue-text a {
+ color: #ffffff;
+ text-decoration: none;
+}
+a.bttndrkblue-text:hover {
+ color: #ffDD3C;
+ text-decoration: none;
+}
+.bg-ltblue {
+ background-color: #f0f5fa;
+}
+
+.border-left-b {
+ background: #f0f5fa url(/i/corner-leftline.gif) repeat-y;
+}
+
+.border-right-b {
+ background: #f0f5fa url(/i/corner-rightline.gif) repeat-y;
+}
+
+.border-top-b {
+ background: #f0f5fa url(/i/corner-topline.gif) repeat-x;
+}
+
+.border-bottom-b {
+ background: #f0f5fa url(/i/corner-botline.gif) repeat-x;
+}
+
+.border-right-w {
+ background: #ffffff url(/i/corner-rightline.gif) repeat-y;
+}
+
+.border-top-w {
+ background: #ffffff url(/i/corner-topline.gif) repeat-x;
+}
+
+.border-bottom-w {
+ background: #ffffff url(/i/corner-botline.gif) repeat-x;
+}
+
+.bg-white {
+ background-color: #ffffff;
+}
+
+.border-left-w {
+ background: #ffffff url(/i/corner-leftline.gif) repeat-y;
+}
diff --git a/docs/CommandGuide/index.html b/docs/CommandGuide/index.html
new file mode 100644
index 0000000..bc18161
--- /dev/null
+++ b/docs/CommandGuide/index.html
@@ -0,0 +1,160 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM Command Guide</title>
+ <link rel="stylesheet" href="/docs/llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ LLVM Command Guide
+</div>
+
+<div class="doc_text">
+
+<p>These documents are HTML versions of the <a href="man/man1/">man pages</a>
+for all of the LLVM tools. These pages describe how to use the LLVM commands
+and what their options are. Note that these pages do not describe all of the
+options available for all tools. To get a complete listing, pass the
+<tt>--help</tt> (general options) or <tt>--help-hidden</tt> (general+debugging
+options) arguments to the tool you are interested in.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="basic">Basic Commands</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<ul>
+
+<li><a href="html/llvm-as.html"><b>llvm-as</b></a> -
+ assemble a human-readable .ll file into bytecode</li>
+
+<li><a href="html/llvm-dis.html"><b>llvm-dis</b></a> -
+ disassemble a bytecode file into a human-readable .ll file</li>
+
+<li><a href="html/llvm-upgrade.html"><b>llvm-upgrade</b></a> -
+ upgrade LLVM assembly from previous version</li>
+
+<li><a href="html/opt.html"><b>opt</b></a> -
+ run a series of LLVM-to-LLVM optimizations on a bytecode file</li>
+
+<li><a href="html/llc.html"><b>llc</b></a> -
+ generate native machine code for a bytecode file</li>
+
+<li><a href="html/lli.html"><b>lli</b></a> -
+ directly run a program compiled to bytecode using a JIT compiler or
+ interpreter</li>
+
+<li><a href="html/llvm-link.html"><b>llvm-link</b></a> -
+ link several bytecode files into one</li>
+
+<li><a href="html/llvm-ar.html"><b>llvm-ar</b></a> -
+ archive bytecode files</li>
+
+<li><a href="html/llvm-ranlib.html"><b>llvm-ranlib</b></a> -
+ create an index for archives made with llvm-ar</li>
+
+<li><a href="html/llvm-nm.html"><b>llvm-nm</b></a> -
+ print out the names and types of symbols in a bytecode file</li>
+
+<li><a href="html/llvm-prof.html"><b>llvm-prof</b></a> -
+ format raw `<tt>llvmprof.out</tt>' data into a human-readable report</li>
+
+<li><a href="html/llvmc.html"><b>llvmc</b></a> -
+ generic and configurable compiler driver</li>
+
+<li><a href="html/llvm-ld.html"><b>llvm-ld</b></a> -
+ general purpose linker with loadable runtime optimization support</li>
+
+<li><a href="html/llvm-config.html"><b>llvm-config</b></a> -
+ print out LLVM compilation options, libraries, etc. as configured.</li>
+
+ <li><a href="html/llvm2cpp.html"><b>llvm2cpp</b></a> - convert LLVM assembly
+ into the corresponding LLVM C++ API calls to produce it</li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="frontend">C, C++, and Stacker Front-end Commands</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<ul>
+
+<li><a href="html/llvmgcc.html"><b>llvmgcc</b></a> -
+ GCC-based C front-end for LLVM
+
+<li><a href="html/llvmgxx.html"><b>llvmg++</b></a> -
+ GCC-based C++ front-end for LLVM</li>
+
+<li><a href="html/stkrc.html"><b>stkrc</b></a> -
+ front-end compiler for the <a href="../Stacker.html">Stacker</a>
+ language</li>
+
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="debug">Debugging Tools</a>
+</div>
+<!-- *********************************************************************** -->
+
+
+<div class="doc_text">
+
+<ul>
+
+<li><a href="html/bugpoint.html"><b>bugpoint</b></a> -
+ automatic test-case reducer</li>
+
+<li><a href="html/llvm-extract.html"><b>llvm-extract</b></a> -
+ extract a function from an LLVM bytecode file</li>
+
+<li><a href="html/llvm-bcanalyzer.html"><b>llvm-bcanalyzer</b></a> -
+ bytecode analyzer (analyzes the binary encoding itself, not the program it
+ represents)</li>
+
+</ul>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="internal">Internal Tools</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<ul>
+
+<li><a href="html/tblgen.html"><b>tblgen</b></a> -
+ target description reader and generator</li>
+
+</ul>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/CommandGuide/llc.pod b/docs/CommandGuide/llc.pod
new file mode 100644
index 0000000..f5fd34f
--- /dev/null
+++ b/docs/CommandGuide/llc.pod
@@ -0,0 +1,191 @@
+=pod
+
+=head1 NAME
+
+llc - LLVM static compiler
+
+=head1 SYNOPSIS
+
+B<llc> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+The B<llc> command compiles LLVM bitcode into assembly language for a
+specified architecture. The assembly language output can then be passed through
+a native assembler and linker to generate a native executable.
+
+The choice of architecture for the output assembly code is automatically
+determined from the input bitcode file, unless the B<-march> option is used to
+override the default.
+
+=head1 OPTIONS
+
+If I<filename> is - or omitted, B<llc> reads LLVM bitcode from standard input.
+Otherwise, it will read LLVM bitcode from I<filename>.
+
+If the B<-o> option is omitted, then B<llc> will send its output to standard
+output if the input is from standard input. If the B<-o> option specifies -,
+then the output will also be sent to standard output.
+
+If no B<-o> option is specified and an input file other than - is specified,
+then B<llc> creates the output filename by taking the input filename,
+removing any existing F<.bc> extension, and adding a F<.s> suffix.
+
+Other B<llc> options are as follows:
+
+=head2 End-user Options
+
+=over
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-f>
+
+Overwrite output files. By default, B<llc> will refuse to overwrite
+an output file which already exists.
+
+=item B<-mtriple>=I<target triple>
+
+Override the target triple specified in the input bitcode file with the
+specified string.
+
+=item B<-march>=I<arch>
+
+Specify the architecture for which to generate assembly, overriding the target
+encoded in the bitcode file. See the output of B<llc --help> for a list of
+valid architectures. By default this is inferred from the target triple or
+autodetected to the current architecture.
+
+=item B<-mcpu>=I<cpuname>
+
+Specify a specific chip in the current architecture to generate code for.
+By default this is inferred from the target triple and autodetected to
+the current architecture. For a list of available CPUs, use:
+B<llvm-as E<lt> /dev/null | llc -march=xyz -mcpu=help>
+
+=item B<-mattr>=I<a1,+a2,-a3,...>
+
+Override or control specific attributes of the target, such as whether SIMD
+operations are enabled or not. The default set of attributes is set by the
+current CPU. For a list of available attributes, use:
+B<llvm-as E<lt> /dev/null | llc -march=xyz -mattr=help>
+
+=item B<--disable-fp-elim>
+
+Disable frame pointer elimination optimization.
+
+=item B<--disable-excess-fp-precision>
+
+Disable optimizations that may produce excess precision for floating point.
+Note that this option can dramatically slow down code on some systems
+(e.g. X86).
+
+=item B<--enable-unsafe-fp-math>
+
+Enable optimizations that make unsafe assumptions about IEEE math (e.g. that
+addition is associative) or may not work for all input ranges. These
+optimizations allow the code generator to make use of some instructions which
+would otherwise not be usable (such as fsin on X86).
+
+=item B<--enable-correct-eh-support>
+
+Instruct the B<lowerinvoke> pass to insert code for correct exception handling
+support. This is expensive and is by default omitted for efficiency.
+
+=item B<--stats>
+
+Print statistics recorded by code-generation passes.
+
+=item B<--time-passes>
+
+Record the amount of time needed for each pass and print a report to standard
+error.
+
+=item B<--load>=F<dso_path>
+
+Dynamically load F<dso_path> (a path to a dynamically shared object) that
+implements an LLVM target. This will permit the target name to be used with the
+B<-march> option so that code can be generated for that target.
+
+=back
+
+=head2 Tuning/Configuration Options
+
+=over
+
+=item B<--print-machineinstrs>
+
+Print generated machine code between compilation phases (useful for debugging).
+
+=item B<--regalloc>=I<allocator>
+
+Specify the register allocator to use. The default I<allocator> is I<local>.
+Valid register allocators are:
+
+=over
+
+=item I<simple>
+
+Very simple "always spill" register allocator
+
+=item I<local>
+
+Local register allocator
+
+=item I<linearscan>
+
+Linear scan global register allocator
+
+=item I<iterativescan>
+
+Iterative scan global register allocator
+
+=back
+
+=item B<--spiller>=I<spiller>
+
+Specify the spiller to use for register allocators that support it. Currently
+this option is used only by the linear scan register allocator. The default
+I<spiller> is I<local>. Valid spillers are:
+
+=over
+
+=item I<simple>
+
+Simple spiller
+
+=item I<local>
+
+Local spiller
+
+=back
+
+=back
+
+=head2 Intel IA-32-specific Options
+
+=over
+
+=item B<--x86-asm-syntax=att|intel>
+
+Specify whether to emit assembly code in AT&T syntax (the default) or intel
+syntax.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llc> succeeds, it will exit with 0. Otherwise, if an error occurs,
+it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<lli|lli>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/lli.pod b/docs/CommandGuide/lli.pod
new file mode 100644
index 0000000..1db47b7
--- /dev/null
+++ b/docs/CommandGuide/lli.pod
@@ -0,0 +1,97 @@
+=pod
+
+=head1 NAME
+
+lli - directly execute programs from LLVM bitcode
+
+=head1 SYNOPSIS
+
+B<lli> [I<options>] [I<filename>] [I<program args>]
+
+=head1 DESCRIPTION
+
+B<lli> directly executes programs in LLVM bitcode format. It takes a program
+in LLVM bitcode format and executes it using a just-in-time compiler, if one is
+available for the current architecture, or an interpreter. B<lli> takes all of
+the same code generator options as L<llc|llc>, but they are only effective when
+B<lli> is using the just-in-time compiler.
+
+If I<filename> is not specified, then B<lli> reads the LLVM bitcode for the
+program from standard input.
+
+The optional I<args> specified on the command line are passed to the program as
+arguments.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-stats>
+
+Print statistics from the code-generation passes. This is only meaningful for
+the just-in-time compiler, at present.
+
+=item B<-time-passes>
+
+Record the amount of time needed for each code-generation pass and print it to
+standard error.
+
+=item B<-mtriple>=I<target triple>
+
+Override the target triple specified in the input bitcode file with the
+specified string. This may result in a crash if you pick an
+architecture which is not compatible with the current system.
+
+=item B<-march>=I<arch>
+
+Specify the architecture for which to generate assembly, overriding the target
+encoded in the bitcode file. See the output of B<llc --help> for a list of
+valid architectures. By default this is inferred from the target triple or
+autodetected to the current architecture.
+
+=item B<-mcpu>=I<cpuname>
+
+Specify a specific chip in the current architecture to generate code for.
+By default this is inferred from the target triple and autodetected to
+the current architecture. For a list of available CPUs, use:
+B<llvm-as E<lt> /dev/null | llc -march=xyz -mcpu=help>
+
+=item B<-mattr>=I<a1,+a2,-a3,...>
+
+Override or control specific attributes of the target, such as whether SIMD
+operations are enabled or not. The default set of attributes is set by the
+current CPU. For a list of available attributes, use:
+B<llvm-as E<lt> /dev/null | llc -march=xyz -mattr=help>
+
+=item B<-force-interpreter>=I<{false,true}>
+
+If set to true, use the interpreter even if a just-in-time compiler is available
+for this architecture. Defaults to false.
+
+=item B<-f>=I<name>
+
+Call the function named I<name> to start the program. Note: The
+function is assumed to have the C signature C<int> I<name> C<(int,
+char **, char **)>. If you try to use this option to call a function of
+incompatible type, undefined behavior may result. Defaults to C<main>.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<lli> fails to load the program, it will exit with an exit code of 1.
+Otherwise, it will return the exit code of the program it executes.
+
+=head1 SEE ALSO
+
+L<llc|llc>
+
+=head1 AUTHOR
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-ar.pod b/docs/CommandGuide/llvm-ar.pod
new file mode 100644
index 0000000..63ba43f
--- /dev/null
+++ b/docs/CommandGuide/llvm-ar.pod
@@ -0,0 +1,406 @@
+=pod
+
+=head1 NAME
+
+llvm-ar - LLVM archiver
+
+=head1 SYNOPSIS
+
+B<llvm-ar> [-]{dmpqrtx}[Rabfikouz] [relpos] [count] <archive> [files...]
+
+
+=head1 DESCRIPTION
+
+The B<llvm-ar> command is similar to the common Unix utility, C<ar>. It
+archives several files together into a single file. The intent for this is
+to produce archive libraries by LLVM bitcode that can be linked into an
+LLVM program. However, the archive can contain any kind of file. By default,
+B<llvm-ar> generates a symbol table that makes linking faster because
+only the symbol table needs to be consulted, not each individual file member
+of the archive.
+
+The B<llvm-ar> command can be used to I<read> both SVR4 and BSD style archive
+files. However, it cannot be used to write them. While the B<llvm-ar> command
+produces files that are I<almost> identical to the format used by other C<ar>
+implementations, it has two significant departures in order to make the
+archive appropriate for LLVM. The first departure is that B<llvm-ar> only
+uses BSD4.4 style long path names (stored immediately after the header) and
+never contains a string table for long names. The second departure is that the
+symbol table is formated for efficient construction of an in-memory data
+structure that permits rapid (red-black tree) lookups. Consequently, archives
+produced with B<llvm-ar> usually won't be readable or editable with any
+C<ar> implementation or useful for linking. Using the C<f> modifier to flatten
+file names will make the archive readable by other C<ar> implementations
+but not for linking because the symbol table format for LLVM is unique. If an
+SVR4 or BSD style archive is used with the C<r> (replace) or C<q> (quick
+update) operations, the archive will be reconstructed in LLVM format. This
+means that the string table will be dropped (in deference to BSD 4.4 long names)
+and an LLVM symbol table will be added (by default). The system symbol table
+will be retained.
+
+Here's where B<llvm-ar> departs from previous C<ar> implementations:
+
+=over
+
+=item I<Symbol Table>
+
+Since B<llvm-ar> is intended to archive bitcode files, the symbol table
+won't make much sense to anything but LLVM. Consequently, the symbol table's
+format has been simplified. It consists simply of a sequence of pairs
+of a file member index number as an LSB 4byte integer and a null-terminated
+string.
+
+=item I<Long Paths>
+
+Some C<ar> implementations (SVR4) use a separate file member to record long
+path names (> 15 characters). B<llvm-ar> takes the BSD 4.4 and Mac OS X
+approach which is to simply store the full path name immediately preceding
+the data for the file. The path name is null terminated and may contain the
+slash (/) character.
+
+=item I<Compression>
+
+B<llvm-ar> can compress the members of an archive to save space. The
+compression used depends on what's available on the platform and what choices
+the LLVM Compressor utility makes. It generally favors bzip2 but will select
+between "no compression" or bzip2 depending on what makes sense for the
+file's content.
+
+=item I<Directory Recursion>
+
+Most C<ar> implementations do not recurse through directories but simply
+ignore directories if they are presented to the program in the F<files>
+option. B<llvm-ar>, however, can recurse through directory structures and
+add all the files under a directory, if requested.
+
+=item I<TOC Verbose Output>
+
+When B<llvm-ar> prints out the verbose table of contents (C<tv> option), it
+precedes the usual output with a character indicating the basic kind of
+content in the file. A blank means the file is a regular file. A 'Z' means
+the file is compressed. A 'B' means the file is an LLVM bitcode file. An
+'S' means the file is the symbol table.
+
+=back
+
+=head1 OPTIONS
+
+The options to B<llvm-ar> are compatible with other C<ar> implementations.
+However, there are a few modifiers (F<zR>) that are not found in other
+C<ar>s. The options to B<llvm-ar> specify a single basic operation to
+perform on the archive, a variety of modifiers for that operation, the
+name of the archive file, and an optional list of file names. These options
+are used to determine how B<llvm-ar> should process the archive file.
+
+The Operations and Modifiers are explained in the sections below. The minimal
+set of options is at least one operator and the name of the archive. Typically
+archive files end with a C<.a> suffix, but this is not required. Following
+the F<archive-name> comes a list of F<files> that indicate the specific members
+of the archive to operate on. If the F<files> option is not specified, it
+generally means either "none" or "all" members, depending on the operation.
+
+=head2 Operations
+
+=over
+
+=item d
+
+Delete files from the archive. No modifiers are applicable to this operation.
+The F<files> options specify which members should be removed from the
+archive. It is not an error if a specified file does not appear in the archive.
+If no F<files> are specified, the archive is not modified.
+
+=item m[abi]
+
+Move files from one location in the archive to another. The F<a>, F<b>, and
+F<i> modifiers apply to this operation. The F<files> will all be moved
+to the location given by the modifiers. If no modifiers are used, the files
+will be moved to the end of the archive. If no F<files> are specified, the
+archive is not modified.
+
+=item p[k]
+
+Print files to the standard output. The F<k> modifier applies to this
+operation. This operation simply prints the F<files> indicated to the
+standard output. If no F<files> are specified, the entire archive is printed.
+Printing bitcode files is ill-advised as they might confuse your terminal
+settings. The F<p> operation never modifies the archive.
+
+=item q[Rfz]
+
+Quickly append files to the end of the archive. The F<R>, F<f>, and F<z>
+modifiers apply to this operation. This operation quickly adds the
+F<files> to the archive without checking for duplicates that should be
+removed first. If no F<files> are specified, the archive is not modified.
+Because of the way that B<llvm-ar> constructs the archive file, its dubious
+whether the F<q> operation is any faster than the F<r> operation.
+
+=item r[Rabfuz]
+
+Replace or insert file members. The F<R>, F<a>, F<b>, F<f>, F<u>, and F<z>
+modifiers apply to this operation. This operation will replace existing
+F<files> or insert them at the end of the archive if they do not exist. If no
+F<files> are specified, the archive is not modified.
+
+=item t[v]
+
+Print the table of contents. Without any modifiers, this operation just prints
+the names of the members to the standard output. With the F<v> modifier,
+B<llvm-ar> also prints out the file type (B=bitcode, Z=compressed, S=symbol
+table, blank=regular file), the permission mode, the owner and group, the
+size, and the date. If any F<files> are specified, the listing is only for
+those files. If no F<files> are specified, the table of contents for the
+whole archive is printed.
+
+=item x[oP]
+
+Extract archive members back to files. The F<o> modifier applies to this
+operation. This operation retrieves the indicated F<files> from the archive
+and writes them back to the operating system's file system. If no
+F<files> are specified, the entire archive is extract.
+
+=back
+
+=head2 Modifiers (operation specific)
+
+The modifiers below are specific to certain operations. See the Operations
+section (above) to determine which modifiers are applicable to which operations.
+
+=over
+
+=item [a]
+
+When inserting or moving member files, this option specifies the destination of
+the new files as being C<a>fter the F<relpos> member. If F<relpos> is not found,
+the files are placed at the end of the archive.
+
+=item [b]
+
+When inserting or moving member files, this option specifies the destination of
+the new files as being C<b>efore the F<relpos> member. If F<relpos> is not
+found, the files are placed at the end of the archive. This modifier is
+identical to the the F<i> modifier.
+
+=item [f]
+
+Normally, B<llvm-ar> stores the full path name to a file as presented to it on
+the command line. With this option, truncated (15 characters max) names are
+used. This ensures name compatibility with older versions of C<ar> but may also
+thwart correct extraction of the files (duplicates may overwrite). If used with
+the F<R> option, the directory recursion will be performed but the file names
+will all be C<f>lattened to simple file names.
+
+=item [i]
+
+A synonym for the F<b> option.
+
+=item [k]
+
+Normally, B<llvm-ar> will not print the contents of bitcode files when the
+F<p> operation is used. This modifier defeats the default and allows the
+bitcode members to be printed.
+
+=item [N]
+
+This option is ignored by B<llvm-ar> but provided for compatibility.
+
+=item [o]
+
+When extracting files, this option will cause B<llvm-ar> to preserve the
+original modification times of the files it writes.
+
+=item [P]
+
+use full path names when matching
+
+=item [R]
+
+This modifier instructions the F<r> option to recursively process directories.
+Without F<R>, directories are ignored and only those F<files> that refer to
+files will be added to the archive. When F<R> is used, any directories specified
+with F<files> will be scanned (recursively) to find files to be added to the
+archive. Any file whose name begins with a dot will not be added.
+
+=item [u]
+
+When replacing existing files in the archive, only replace those files that have
+a time stamp than the time stamp of the member in the archive.
+
+=item [z]
+
+When inserting or replacing any file in the archive, compress the file first.
+This
+modifier is safe to use when (previously) compressed bitcode files are added to
+the archive; the compressed bitcode files will not be doubly compressed.
+
+=back
+
+=head2 Modifiers (generic)
+
+The modifiers below may be applied to any operation.
+
+=over
+
+=item [c]
+
+For all operations, B<llvm-ar> will always create the archive if it doesn't
+exist. Normally, B<llvm-ar> will print a warning message indicating that the
+archive is being created. Using this modifier turns off that warning.
+
+=item [s]
+
+This modifier requests that an archive index (or symbol table) be added to the
+archive. This is the default mode of operation. The symbol table will contain
+all the externally visible functions and global variables defined by all the
+bitcode files in the archive. Using this modifier is more efficient that using
+L<llvm-ranlib|llvm-ranlib> which also creates the symbol table.
+
+=item [S]
+
+This modifier is the opposite of the F<s> modifier. It instructs B<llvm-ar> to
+not build the symbol table. If both F<s> and F<S> are used, the last modifier to
+occur in the options will prevail.
+
+=item [v]
+
+This modifier instructs B<llvm-ar> to be verbose about what it is doing. Each
+editing operation taken against the archive will produce a line of output saying
+what is being done.
+
+=back
+
+=head1 STANDARDS
+
+The B<llvm-ar> utility is intended to provide a superset of the IEEE Std 1003.2
+(POSIX.2) functionality for C<ar>. B<llvm-ar> can read both SVR4 and BSD4.4 (or
+Mac OS X) archives. If the C<f> modifier is given to the C<x> or C<r> operations
+then B<llvm-ar> will write SVR4 compatible archives. Without this modifier,
+B<llvm-ar> will write BSD4.4 compatible archives that have long names
+immediately after the header and indicated using the "#1/ddd" notation for the
+name in the header.
+
+=head1 FILE FORMAT
+
+The file format for LLVM Archive files is similar to that of BSD 4.4 or Mac OSX
+archive files. In fact, except for the symbol table, the C<ar> commands on those
+operating systems should be able to read LLVM archive files. The details of the
+file format follow.
+
+Each archive begins with the archive magic number which is the eight printable
+characters "!<arch>\n" where \n represents the newline character (0x0A).
+Following the magic number, the file is composed of even length members that
+begin with an archive header and end with a \n padding character if necessary
+(to make the length even). Each file member is composed of a header (defined
+below), an optional newline-terminated "long file name" and the contents of
+the file.
+
+The fields of the header are described in the items below. All fields of the
+header contain only ASCII characters, are left justified and are right padded
+with space characters.
+
+=over
+
+=item name - char[16]
+
+This field of the header provides the name of the archive member. If the name is
+longer than 15 characters or contains a slash (/) character, then this field
+contains C<#1/nnn> where C<nnn> provides the length of the name and the C<#1/>
+is literal. In this case, the actual name of the file is provided in the C<nnn>
+bytes immediately following the header. If the name is 15 characters or less, it
+is contained directly in this field and terminated with a slash (/) character.
+
+=item date - char[12]
+
+This field provides the date of modification of the file in the form of a
+decimal encoded number that provides the number of seconds since the epoch
+(since 00:00:00 Jan 1, 1970) per Posix specifications.
+
+=item uid - char[6]
+
+This field provides the user id of the file encoded as a decimal ASCII string.
+This field might not make much sense on non-Unix systems. On Unix, it is the
+same value as the st_uid field of the stat structure returned by the stat(2)
+operating system call.
+
+=item gid - char[6]
+
+This field provides the group id of the file encoded as a decimal ASCII string.
+This field might not make much sense on non-Unix systems. On Unix, it is the
+same value as the st_gid field of the stat structure returned by the stat(2)
+operating system call.
+
+=item mode - char[8]
+
+This field provides the access mode of the file encoded as an octal ASCII
+string. This field might not make much sense on non-Unix systems. On Unix, it
+is the same value as the st_mode field of the stat structure returned by the
+stat(2) operating system call.
+
+=item size - char[10]
+
+This field provides the size of the file, in bytes, encoded as a decimal ASCII
+string. If the size field is negative (starts with a minus sign, 0x02D), then
+the archive member is stored in compressed form. The first byte of the archive
+member's data indicates the compression type used. A value of 0 (0x30) indicates
+that no compression was used. A value of 2 (0x32) indicates that bzip2
+compression was used.
+
+=item fmag - char[2]
+
+This field is the archive file member magic number. Its content is always the
+two characters back tick (0x60) and newline (0x0A). This provides some measure
+utility in identifying archive files that have been corrupted.
+
+=back
+
+The LLVM symbol table has the special name "#_LLVM_SYM_TAB_#". It is presumed
+that no regular archive member file will want this name. The LLVM symbol table
+is simply composed of a sequence of triplets: byte offset, length of symbol,
+and the symbol itself. Symbols are not null or newline terminated. Here are
+the details on each of these items:
+
+=over
+
+=item offset - vbr encoded 32-bit integer
+
+The offset item provides the offset into the archive file where the bitcode
+member is stored that is associated with the symbol. The offset value is 0
+based at the start of the first "normal" file member. To derive the actual
+file offset of the member, you must add the number of bytes occupied by the file
+signature (8 bytes) and the symbol tables. The value of this item is encoded
+using variable bit rate encoding to reduce the size of the symbol table.
+Variable bit rate encoding uses the high bit (0x80) of each byte to indicate
+if there are more bytes to follow. The remaining 7 bits in each byte carry bits
+from the value. The final byte does not have the high bit set.
+
+=item length - vbr encoded 32-bit integer
+
+The length item provides the length of the symbol that follows. Like this
+I<offset> item, the length is variable bit rate encoded.
+
+=item symbol - character array
+
+The symbol item provides the text of the symbol that is associated with the
+I<offset>. The symbol is not terminated by any character. Its length is provided
+by the I<length> field. Note that is allowed (but unwise) to use non-printing
+characters (even 0x00) in the symbol. This allows for multiple encodings of
+symbol names.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-ar> succeeds, it will exit with 0. A usage error, results
+in an exit code of 1. A hard (file system typically) error results in an
+exit code of 2. Miscellaneous or unknown errors result in an
+exit code of 3.
+
+=head1 SEE ALSO
+
+L<llvm-ranlib|llvm-ranlib>, ar(1)
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-as.pod b/docs/CommandGuide/llvm-as.pod
new file mode 100644
index 0000000..2befed1
--- /dev/null
+++ b/docs/CommandGuide/llvm-as.pod
@@ -0,0 +1,77 @@
+=pod
+
+=head1 NAME
+
+llvm-as - LLVM assembler
+
+=head1 SYNOPSIS
+
+B<llvm-as> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+B<llvm-as> is the LLVM assembler. It reads a file containing human-readable
+LLVM assembly language, translates it to LLVM bitcode, and writes the result
+into a file or to standard output.
+
+If F<filename> is omitted or is C<->, then B<llvm-as> reads its input from
+standard input.
+
+If an output file is not specified with the B<-o> option, then
+B<llvm-as> sends its output to a file or standard output by following
+these rules:
+
+=over
+
+=item *
+
+If the input is standard input, then the output is standard output.
+
+=item *
+
+If the input is a file that ends with C<.ll>, then the output file is of
+the same name, except that the suffix is changed to C<.bc>.
+
+=item *
+
+If the input is a file that does not end with the C<.ll> suffix, then the
+output file has the same name as the input file, except that the C<.bc>
+suffix is appended.
+
+=back
+
+=head1 OPTIONS
+
+=over
+
+=item B<-f>
+
+Force overwrite. Normally, B<llvm-as> will refuse to overwrite an
+output file that already exists. With this option, B<llvm-as>
+will overwrite the output file and replace it with new bitcode.
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-o> F<filename>
+
+Specify the output file name. If F<filename> is C<->, then B<llvm-as>
+sends its output to standard output.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-as> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<llvm-dis|llvm-dis>, L<gccas|gccas>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-bcanalyzer.pod b/docs/CommandGuide/llvm-bcanalyzer.pod
new file mode 100644
index 0000000..e789c98
--- /dev/null
+++ b/docs/CommandGuide/llvm-bcanalyzer.pod
@@ -0,0 +1,315 @@
+=pod
+
+=head1 NAME
+
+llvm-bcanalyzer - LLVM bitcode analyzer
+
+=head1 SYNOPSIS
+
+B<llvm-bcanalyzer> [I<options>] [F<filename>]
+
+=head1 DESCRIPTION
+
+The B<llvm-bcanalyzer> command is a small utility for analyzing bitcode files.
+The tool reads a bitcode file (such as generated with the B<llvm-as> tool) and
+produces a statistical report on the contents of the bitcode file. The tool
+can also dump a low level but human readable version of the bitcode file.
+This tool is probably not of much interest or utility except for those working
+directly with the bitcode file format. Most LLVM users can just ignore
+this tool.
+
+If F<filename> is omitted or is C<->, then B<llvm-bcanalyzer> reads its input
+from standard input. This is useful for combining the tool into a pipeline.
+Output is written to the standard output.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-nodetails>
+
+Causes B<llvm-bcanalyzer> to abbreviate its output by writing out only a module
+level summary. The details for individual functions are not displayed.
+
+=item B<-dump>
+
+Causes B<llvm-bcanalyzer> to dump the bitcode in a human readable format. This
+format is significantly different from LLVM assembly and provides details about
+the encoding of the bitcode file.
+
+=item B<-verify>
+
+Causes B<llvm-bcanalyzer> to verify the module produced by reading the
+bitcode. This ensures that the statistics generated are based on a consistent
+module.
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-bcanalyzer> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value, usually 1.
+
+=head1 SUMMARY OUTPUT DEFINITIONS
+
+The following items are always printed by llvm-bcanalyzer. They comprize the
+summary output.
+
+=over
+
+=item B<Bitcode Analysis Of Module>
+
+This just provides the name of the module for which bitcode analysis is being
+generated.
+
+=item B<Bitcode Version Number>
+
+The bitcode version (not LLVM version) of the file read by the analyzer.
+
+=item B<File Size>
+
+The size, in bytes, of the entire bitcode file.
+
+=item B<Module Bytes>
+
+The size, in bytes, of the module block. Percentage is relative to File Size.
+
+=item B<Function Bytes>
+
+The size, in bytes, of all the function blocks. Percentage is relative to File
+Size.
+
+=item B<Global Types Bytes>
+
+The size, in bytes, of the Global Types Pool. Percentage is relative to File
+Size. This is the size of the definitions of all types in the bitcode file.
+
+=item B<Constant Pool Bytes>
+
+The size, in bytes, of the Constant Pool Blocks Percentage is relative to File
+Size.
+
+=item B<Module Globals Bytes>
+
+Ths size, in bytes, of the Global Variable Definitions and their initializers.
+Percentage is relative to File Size.
+
+=item B<Instruction List Bytes>
+
+The size, in bytes, of all the instruction lists in all the functions.
+Percentage is relative to File Size. Note that this value is also included in
+the Function Bytes.
+
+=item B<Compaction Table Bytes>
+
+The size, in bytes, of all the compaction tables in all the functions.
+Percentage is relative to File Size. Note that this value is also included in
+the Function Bytes.
+
+=item B<Symbol Table Bytes>
+
+The size, in bytes, of all the symbol tables in all the functions. Percentage is
+relative to File Size. Note that this value is also included in the Function
+Bytes.
+
+=item B<Dependent Libraries Bytes>
+
+The size, in bytes, of the list of dependent libraries in the module. Percentage
+is relative to File Size. Note that this value is also included in the Module
+Global Bytes.
+
+=item B<Number Of Bitcode Blocks>
+
+The total number of blocks of any kind in the bitcode file.
+
+=item B<Number Of Functions>
+
+The total number of function definitions in the bitcode file.
+
+=item B<Number Of Types>
+
+The total number of types defined in the Global Types Pool.
+
+=item B<Number Of Constants>
+
+The total number of constants (of any type) defined in the Constant Pool.
+
+=item B<Number Of Basic Blocks>
+
+The total number of basic blocks defined in all functions in the bitcode file.
+
+=item B<Number Of Instructions>
+
+The total number of instructions defined in all functions in the bitcode file.
+
+=item B<Number Of Long Instructions>
+
+The total number of long instructions defined in all functions in the bitcode
+file. Long instructions are those taking greater than 4 bytes. Typically long
+instructions are GetElementPtr with several indices, PHI nodes, and calls to
+functions with large numbers of arguments.
+
+=item B<Number Of Operands>
+
+The total number of operands used in all instructions in the bitcode file.
+
+=item B<Number Of Compaction Tables>
+
+The total number of compaction tables in all functions in the bitcode file.
+
+=item B<Number Of Symbol Tables>
+
+The total number of symbol tables in all functions in the bitcode file.
+
+=item B<Number Of Dependent Libs>
+
+The total number of dependent libraries found in the bitcode file.
+
+=item B<Total Instruction Size>
+
+The total size of the instructions in all functions in the bitcode file.
+
+=item B<Average Instruction Size>
+
+The average number of bytes per instruction across all functions in the bitcode
+file. This value is computed by dividing Total Instruction Size by Number Of
+Instructions.
+
+=item B<Maximum Type Slot Number>
+
+The maximum value used for a type's slot number. Larger slot number values take
+more bytes to encode.
+
+=item B<Maximum Value Slot Number>
+
+The maximum value used for a value's slot number. Larger slot number values take
+more bytes to encode.
+
+=item B<Bytes Per Value>
+
+The average size of a Value definition (of any type). This is computed by
+dividing File Size by the total number of values of any type.
+
+=item B<Bytes Per Global>
+
+The average size of a global definition (constants and global variables).
+
+=item B<Bytes Per Function>
+
+The average number of bytes per function definition. This is computed by
+dividing Function Bytes by Number Of Functions.
+
+=item B<# of VBR 32-bit Integers>
+
+The total number of 32-bit integers encoded using the Variable Bit Rate
+encoding scheme.
+
+=item B<# of VBR 64-bit Integers>
+
+The total number of 64-bit integers encoded using the Variable Bit Rate encoding
+scheme.
+
+=item B<# of VBR Compressed Bytes>
+
+The total number of bytes consumed by the 32-bit and 64-bit integers that use
+the Variable Bit Rate encoding scheme.
+
+=item B<# of VBR Expanded Bytes>
+
+The total number of bytes that would have been consumed by the 32-bit and 64-bit
+integers had they not been compressed with the Variable Bit Rage encoding
+scheme.
+
+=item B<Bytes Saved With VBR>
+
+The total number of bytes saved by using the Variable Bit Rate encoding scheme.
+The percentage is relative to # of VBR Expanded Bytes.
+
+=back
+
+=head1 DETAILED OUTPUT DEFINITIONS
+
+The following definitions occur only if the -nodetails option was not given.
+The detailed output provides additional information on a per-function basis.
+
+=over
+
+=item B<Type>
+
+The type signature of the function.
+
+=item B<Byte Size>
+
+The total number of bytes in the function's block.
+
+=item B<Basic Blocks>
+
+The number of basic blocks defined by the function.
+
+=item B<Instructions>
+
+The number of instructions defined by the function.
+
+=item B<Long Instructions>
+
+The number of instructions using the long instruction format in the function.
+
+=item B<Operands>
+
+The number of operands used by all instructions in the function.
+
+=item B<Instruction Size>
+
+The number of bytes consumed by instructions in the function.
+
+=item B<Average Instruction Size>
+
+The average number of bytes consumed by the instructions in the funtion. This
+value is computed by dividing Instruction Size by Instructions.
+
+=item B<Bytes Per Instruction>
+
+The average number of bytes used by the function per instruction. This value is
+computed by dividing Byte Size by Instructions. Note that this is not the same
+as Average Instruction Size. It computes a number relative to the total function
+size not just the size of the instruction list.
+
+=item B<Number of VBR 32-bit Integers>
+
+The total number of 32-bit integers found in this function (for any use).
+
+=item B<Number of VBR 64-bit Integers>
+
+The total number of 64-bit integers found in this function (for any use).
+
+=item B<Number of VBR Compressed Bytes>
+
+The total number of bytes in this function consumed by the 32-bit and 64-bit
+integers that use the Variable Bit Rate encoding scheme.
+
+=item B<Number of VBR Expanded Bytes>
+
+The total number of bytes in this function that would have been consumed by
+the 32-bit and 64-bit integers had they not been compressed with the Variable
+Bit Rate encoding scheme.
+
+=item B<Bytes Saved With VBR>
+
+The total number of bytes saved in this function by using the Variable Bit
+Rate encoding scheme. The percentage is relative to # of VBR Expanded Bytes.
+
+=back
+
+=head1 SEE ALSO
+
+L<llvm-dis|llvm-dis>, L<http://llvm.org/docs/BitcodeFormat.html>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-config.pod b/docs/CommandGuide/llvm-config.pod
new file mode 100644
index 0000000..06f10de
--- /dev/null
+++ b/docs/CommandGuide/llvm-config.pod
@@ -0,0 +1,131 @@
+=pod
+
+=head1 NAME
+
+llvm-config - Print LLVM compilation options
+
+=head1 SYNOPSIS
+
+B<llvm-config> I<option> [I<components>...]
+
+=head1 DESCRIPTION
+
+B<llvm-config> makes it easier to build applications that use LLVM. It can
+print the compiler flags, linker flags and object libraries needed to link
+against LLVM.
+
+=head1 EXAMPLES
+
+To link against the JIT:
+
+ g++ `llvm-config --cxxflags` -o HowToUseJIT.o -c HowToUseJIT.cpp
+ g++ `llvm-config --ldflags` -o HowToUseJIT HowToUseJIT.o \
+ `llvm-config --libs engine bcreader scalaropts`
+
+=head1 OPTIONS
+
+=over
+
+=item B<--version>
+
+Print the version number of LLVM.
+
+=item B<--help>
+
+Print a summary of B<llvm-config> arguments.
+
+=item B<--prefix>
+
+Print the installation prefix for LLVM.
+
+=item B<--src-root>
+
+Print the source root from which LLVM was built.
+
+=item B<--obj-root>
+
+Print the object root used to build LLVM.
+
+=item B<--bindir>
+
+Print the installation directory for LLVM binaries.
+
+=item B<--includedir>
+
+Print the installation directory for LLVM headers.
+
+=item B<--libdir>
+
+Print the installation directory for LLVM libraries.
+
+=item B<--cxxflags>
+
+Print the C++ compiler flags needed to use LLVM headers.
+
+=item B<--ldflags>
+
+Print the flags needed to link against LLVM libraries.
+
+=item B<--libs>
+
+Print all the libraries needed to link against the specified LLVM
+I<components>, including any dependencies.
+
+=item B<--libnames>
+
+Similar to B<--libs>, but prints the bare filenames of the libraries
+without B<-l> or pathnames. Useful for linking against a not-yet-installed
+copy of LLVM.
+
+=item B<--libfiles>
+
+Similar to B<--libs>, but print the full path to each library file. This is
+useful when creating makefile dependencies, to ensure that a tool is relinked if
+any library it uses changes.
+
+=item B<--components>
+
+Print all valid component names.
+
+=item B<--targets-built>
+
+Print the component names for all targets supported by this copy of LLVM.
+
+=item B<--build-mode>
+
+Print the build mode used when LLVM was built (e.g. Debug or Release)
+
+=back
+
+=head1 COMPONENTS
+
+To print a list of all available components, run B<llvm-config
+--components>. In most cases, components correspond directly to LLVM
+libraries. Useful "virtual" components include:
+
+=over
+
+=item B<all>
+
+Includes all LLVM libaries. The default if no components are specified.
+
+=item B<backend>
+
+Includes either a native backend or the C backend.
+
+=item B<engine>
+
+Includes either a native JIT or the bitcode interpreter.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-config> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-db.pod b/docs/CommandGuide/llvm-db.pod
new file mode 100644
index 0000000..1324176
--- /dev/null
+++ b/docs/CommandGuide/llvm-db.pod
@@ -0,0 +1,16 @@
+=pod
+
+=head1 NAME
+
+llvm-db - LLVM debugger (alpha)
+
+=head1 SYNOPSIS
+
+Details coming soon. Please see
+L<http://llvm.org/docs/SourceLevelDebugging.html> in the meantime.
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-dis.pod b/docs/CommandGuide/llvm-dis.pod
new file mode 100644
index 0000000..8df382d
--- /dev/null
+++ b/docs/CommandGuide/llvm-dis.pod
@@ -0,0 +1,60 @@
+=pod
+
+=head1 NAME
+
+llvm-dis - LLVM disassembler
+
+=head1 SYNOPSIS
+
+B<llvm-dis> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+The B<llvm-dis> command is the LLVM disassembler. It takes an LLVM
+bitcode file and converts it into human-readable LLVM assembly language.
+
+If filename is omitted or specified as C<->, B<llvm-dis> reads its
+input from standard input.
+
+If the input is being read from standard input, then B<llvm-dis>
+will send its output to standard output by default. Otherwise, the
+output will be written to a file named after the input file, with
+a C<.ll> suffix added (any existing C<.bc> suffix will first be
+removed). You can override the choice of output file using the
+B<-o> option.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-f>
+
+Force overwrite. Normally, B<llvm-dis> will refuse to overwrite
+an output file that already exists. With this option, B<llvm-dis>
+will overwrite the output file.
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-o> F<filename>
+
+Specify the output file name. If F<filename> is -, then the output is sent
+to standard output.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-dis> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<llvm-as|llvm-as>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-extract.pod b/docs/CommandGuide/llvm-extract.pod
new file mode 100644
index 0000000..d916612
--- /dev/null
+++ b/docs/CommandGuide/llvm-extract.pod
@@ -0,0 +1,63 @@
+=pod
+
+=head1 NAME
+
+llvm-extract - extract a function from an LLVM module
+
+=head1 SYNOPSIS
+
+B<llvm-extract> [I<options>] B<--func> I<function-name> [I<filename>]
+
+=head1 DESCRIPTION
+
+The B<llvm-extract> command takes the name of a function and extracts it from
+the specified LLVM bitcode file. It is primarily used as a debugging tool to
+reduce test cases from larger programs that are triggering a bug.
+
+In addition to extracting the bitcode of the specified function,
+B<llvm-extract> will also remove unreachable global variables, prototypes, and
+unused types.
+
+The B<llvm-extract> command reads its input from standard input if filename is
+omitted or if filename is -. The output is always written to standard output,
+unless the B<-o> option is specified (see below).
+
+=head1 OPTIONS
+
+=over
+
+=item B<-f>
+
+Force overwrite. Normally, B<llvm-extract> will refuse to overwrite an
+output file that already exists. With this option, B<llvm-extract>
+will overwrite the output file and replace it with new bitcode.
+
+=item B<--func> I<function-name>
+
+Extract the function named I<function-name> from the LLVM bitcode.
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-o> I<filename>
+
+Specify the output filename. If filename is "-" (the default), then
+B<llvm-extract> sends its output to standard output.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-extract> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<bugpoint|bugpoint>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-ld.pod b/docs/CommandGuide/llvm-ld.pod
new file mode 100644
index 0000000..d655737
--- /dev/null
+++ b/docs/CommandGuide/llvm-ld.pod
@@ -0,0 +1,269 @@
+=pod
+
+=head1 NAME
+
+llvm-ld - LLVM linker
+
+=head1 SYNOPSIS
+
+B<llvm-ld> <options> <files>
+
+=head1 DESCRIPTION
+
+The B<llvm-ld> tool takes a set of LLVM bitcode files and links them
+together into a single LLVM bitcode file. The output bitcode file can be
+another bitcode file or an executable bitcode program. Using additional
+options, B<llvm-ld> is able to produce native code executables.
+
+The B<llvm-ld> tool is the main linker for LLVM. It is used to link together
+the output of LLVM front-end compilers and run "link time" optimizations (mostly
+the inter-procedural kind).
+
+The B<llvm-ld> tools attempts to mimic the interface provided by the default
+system linker so that it can act as a I<drop-in> replacement.
+
+=head2 Search Order
+
+When looking for objects specified on the command line, B<llvm-ld> will search
+for the object first in the current directory and then in the directory
+specified by the B<LLVM_LIB_SEARCH_PATH> environment variable. If it cannot
+find the object, it fails.
+
+When looking for a library specified with the B<-l> option, B<llvm-ld> first
+attempts to load a file with that name from the current directory. If that
+fails, it looks for libI<library>.bc, libI<library>.a, or libI<library>.I<shared
+library extension>, in that order, in each directory added to the library search
+path with the B<-L> option. These directories are searched in the order they
+are specified. If the library cannot be located, then B<llvm-ld> looks in the
+directory specified by the B<LLVM_LIB_SEARCH_PATH> environment variable. If it
+does not find a library there, it fails.
+
+The I<shared library extension> may be I<.so>, I<.dyld>, I<.dll>, or something
+different, depending upon the system.
+
+The B<-L> option is global. It does not matter where it is specified in the
+list of command line arguments; the directory is simply added to the search path
+and is applied to all libraries, preceding or succeeding, in the command line.
+
+=head2 Link order
+
+All object and bitcode files are linked first in the order they were
+specified on the command line. All library files are linked next.
+Some libraries may not be linked into the object program; see below.
+
+=head2 Library Linkage
+
+Object files and static bitcode objects are always linked into the output
+file. Library archives (.a files) load only the objects within the archive
+that define symbols needed by the output file. Hence, libraries should be
+listed after the object files and libraries which need them; otherwise, the
+library may not be linked in, and the dependent library will not have its
+undefined symbols defined.
+
+=head2 Native code generation
+
+The B<llvm-ld> program has limited support for native code generation, when
+using the B<-native> or B<-native-cbe> options. Native code generation is
+performed by converting the linked bitcode into native assembly (.s) or C code
+and running the system compiler (typically gcc) on the result.
+
+=head1 OPTIONS
+
+=head2 General Options
+
+=over
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-v>
+
+Specifies verbose mode. In this mode the linker will print additional
+information about the actions it takes, programs it executes, etc.
+
+=item B<-stats>
+
+Print statistics.
+
+=item B<-time-passes>
+
+Record the amount of time needed for each pass and print it to standard
+error.
+
+=back
+
+=head2 Input/Output Options
+
+=over
+
+=item B<-o> F<filename>
+
+This overrides the default output file and specifies the name of the file that
+should be generated by the linker. By default, B<llvm-ld> generates a file named
+F<a.out> for compatibility with B<ld>. The output will be written to
+F<filename>.
+
+=item B<-l>F<name>
+
+This option specifies the F<name> of a library to search when resolving symbols
+for the program. Only the base name should be specified as F<name>, without a
+F<lib> prefix or any suffix.
+
+=item B<-L>F<Path>
+
+This option tells B<llvm-ld> to look in F<Path> to find any library subsequently
+specified with the B<-l> option. The paths will be searched in the order in
+which they are specified on the command line. If the library is still not found,
+a small set of system specific directories will also be searched. Note that
+libraries specified with the B<-l> option that occur I<before> any B<-L> options
+will not search the paths given by the B<-L> options following it.
+
+=item B<-link-as-library>
+
+Link the bitcode files together as a library, not an executable. In this mode,
+undefined symbols will be permitted.
+
+=item B<-r>
+
+An alias for -link-as-library.
+
+=item B<-march=>C<target>
+
+Specifies the kind of machine for which code or assembly should be generated.
+
+=item B<-native>
+
+Generate a native machine code executable.
+
+When generating native executables, B<llvm-ld> first checks for a bitcode
+version of the library and links it in, if necessary. If the library is
+missing, B<llvm-ld> skips it. Then, B<llvm-ld> links in the same
+libraries as native code.
+
+In this way, B<llvm-ld> should be able to link in optimized bitcode
+subsets of common libraries and then link in any part of the library that
+hasn't been converted to bitcode.
+
+=item B<-native-cbe>
+
+Generate a native machine code executable with the LLVM C backend.
+
+This option is identical to the B<-native> option, but uses the
+C backend to generate code for the program instead of an LLVM native
+code generator.
+
+=back
+
+=head2 Optimization Options
+
+=over
+
+=item B<-O0>
+
+An alias for the -O1 option.
+
+=item B<-O1>
+
+Optimize for linking speed, not execution speed. The optimizer will attempt to
+reduce the size of the linked program to reduce I/O but will not otherwise
+perform any link-time optimizations.
+
+=item B<-O2>
+
+Perform only the minimal or required set of scalar optimizations.
+
+=item B<-03>
+
+An alias for the -O2 option.
+
+=item B<-04>
+
+Perform the standard link time inter-procedural optimizations. This will
+attempt to optimize the program taking the entire program into consideration.
+
+=item B<-O5>
+
+Perform aggressive link time optimizations. This is the same as -O4 but works
+more aggressively to optimize the program.
+
+=item B<-disable-inlining>
+
+Do not run the inlining pass. Functions will not be inlined into other
+functions.
+
+=item B<-disable-opt>
+
+Completely disable optimization. The various B<-On> options will be ignored and
+no link time optimization passes will be run.
+
+=item B<-disable-internalize>
+
+Do not mark all symbols as internal.
+
+=item B<-verify-each>
+
+Run the verification pass after each of the passes to verify intermediate
+results.
+
+=item B<-strip-all>
+
+Strip all debug and symbol information from the executable to make it smaller.
+
+=item B<-strip-debug>
+
+Strip all debug information from the executable to make it smaller.
+
+=item B<-s>
+
+An alias for B<-strip-all>.
+
+=item B<-S>
+
+An alias for B<-strip-debug>.
+
+=item B<-export-dynamic>
+
+An alias for B<-disable-internalize>
+
+=item B<-load> F<module>
+
+Load an optimization module, F<module>, which is expected to be a dynamic
+library that provides the function name C<RunOptimizations>. This function will
+be passed the PassManager, and the optimization level (values 0-5 based on the
+B<-On> option). This function may add passes to the PassManager that should be
+run. This feature allows the optimization passes of B<llvm-ld> to be extended.
+
+=item B<-post-link-opt>F<Path>
+
+Run post-link optimization program. After linking is completed a bitcode file
+will be generated. It will be passed to the program specified by F<Path> as the
+first argument. The second argument to the program will be the name of a
+temporary file into which the program should place its optimized output. For
+example, the "no-op optimization" would be a simple shell script:
+
+ #!/bin/bash
+ cp $1 $2
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-ld> succeeds, it will exit with 0 return code. If an error occurs,
+it will exit with a non-zero return code.
+
+=head1 ENVIRONMENT
+
+The C<LLVM_LIB_SEARCH_PATH> environment variable is used to find bitcode
+libraries. Any paths specified in this variable will be searched after the C<-L>
+options.
+
+=head1 SEE ALSO
+
+L<llvm-link|llvm-link>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-link.pod b/docs/CommandGuide/llvm-link.pod
new file mode 100644
index 0000000..5f4dcb6
--- /dev/null
+++ b/docs/CommandGuide/llvm-link.pod
@@ -0,0 +1,74 @@
+=pod
+
+=head1 NAME
+
+llvm-link - LLVM linker
+
+=head1 SYNOPSIS
+
+B<llvm-link> [I<options>] I<filename ...>
+
+=head1 DESCRIPTION
+
+B<llvm-link> takes several LLVM bitcode files and links them together into a
+single LLVM bitcode file. It writes the output file to standard output, unless
+the B<-o> option is used to specify a filename.
+
+B<llvm-link> attempts to load the input files from the current directory. If
+that fails, it looks for each file in each of the directories specified by the
+B<-L> options on the command line. The library search paths are global; each
+one is searched for every input file if necessary. The directories are searched
+in the order they were specified on the command line.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-L> F<directory>
+
+Add the specified F<directory> to the library search path. When looking for
+libraries, B<llvm-link> will look in path name for libraries. This option can be
+specified multiple times; B<llvm-link> will search inside these directories in
+the order in which they were specified on the command line.
+
+=item B<-f>
+
+Overwrite output files. By default, B<llvm-link> will not overwrite an output
+file if it already exists.
+
+=item B<-o> F<filename>
+
+Specify the output file name. If F<filename> is C<->, then B<llvm-link> will
+write its output to standard output.
+
+=item B<-d>
+
+If specified, B<llvm-link> prints a human-readable version of the output
+bitcode file to standard error.
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-v>
+
+Verbose mode. Print information about what B<llvm-link> is doing. This
+typically includes a message for each bitcode file linked in and for each
+library found.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-link> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<gccld|gccld>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-nm.pod b/docs/CommandGuide/llvm-nm.pod
new file mode 100644
index 0000000..995ac08
--- /dev/null
+++ b/docs/CommandGuide/llvm-nm.pod
@@ -0,0 +1,122 @@
+=pod
+
+=head1 NAME
+
+llvm-nm - list LLVM bitcode file's symbol table
+
+=head1 SYNOPSIS
+
+B<llvm-nm> [I<options>] [I<filenames...>]
+
+=head1 DESCRIPTION
+
+The B<llvm-nm> utility lists the names of symbols from the LLVM bitcode files,
+or B<ar> archives containing LLVM bitcode files, named on the command line.
+Each symbol is listed along with some simple information about its provenance.
+If no file name is specified, or I<-> is used as a file name, B<llvm-nm> will
+process a bitcode file on its standard input stream.
+
+B<llvm-nm>'s default output format is the traditional BSD B<nm> output format.
+Each such output record consists of an (optional) 8-digit hexadecimal address,
+followed by a type code character, followed by a name, for each symbol. One
+record is printed per line; fields are separated by spaces. When the address is
+omitted, it is replaced by 8 spaces.
+
+Type code characters currently supported, and their meanings, are as follows:
+
+=over
+
+=item U
+
+Named object is referenced but undefined in this bitcode file
+
+=item C
+
+Common (multiple definitions link together into one def)
+
+=item W
+
+Weak reference (multiple definitions link together into zero or one definitions)
+
+=item t
+
+Local function (text) object
+
+=item T
+
+Global function (text) object
+
+=item d
+
+Local data object
+
+=item D
+
+Global data object
+
+=item ?
+
+Something unrecognizable
+
+=back
+
+Because LLVM bitcode files typically contain objects that are not considered to
+have addresses until they are linked into an executable image or dynamically
+compiled "just-in-time", B<llvm-nm> does not print an address for any symbol,
+even symbols which are defined in the bitcode file.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-P>
+
+Use POSIX.2 output format. Alias for B<--format=posix>.
+
+=item B<-B> (default)
+
+Use BSD output format. Alias for B<--format=bsd>.
+
+=item B<--help>
+
+Print a summary of command-line options and their meanings.
+
+=item B<--defined-only>
+
+Print only symbols defined in this bitcode file (as opposed to
+symbols which may be referenced by objects in this file, but not
+defined in this file.)
+
+=item B<--extern-only>, B<-g>
+
+Print only symbols whose definitions are external; that is, accessible
+from other bitcode files.
+
+=item B<--undefined-only>, B<-u>
+
+Print only symbols referenced but not defined in this bitcode file.
+
+=item B<--format=>I<fmt>, B<-f>
+
+Select an output format; I<fmt> may be I<sysv>, I<posix>, or I<bsd>. The
+default is I<bsd>.
+
+=back
+
+=head1 BUGS
+
+B<llvm-nm> cannot demangle C++ mangled names, like GNU B<nm> can.
+
+=head1 EXIT STATUS
+
+B<llvm-nm> exits with an exit code of zero.
+
+=head1 SEE ALSO
+
+L<llvm-dis|llvm-dis>, ar(1), nm(1)
+
+=head1 AUTHOR
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-prof.pod b/docs/CommandGuide/llvm-prof.pod
new file mode 100644
index 0000000..381387d
--- /dev/null
+++ b/docs/CommandGuide/llvm-prof.pod
@@ -0,0 +1,57 @@
+=pod
+
+=head1 NAME
+
+llvm-prof - print execution profile of LLVM program
+
+=head1 SYNOPSIS
+
+B<llvm-prof> [I<options>] [I<bitcode file>] [I<llvmprof.out>]
+
+=head1 DESCRIPTION
+
+The B<llvm-prof> tool reads in an F<llvmprof.out> file (which can
+optionally use a specific file with the third program argument), a bitcode file
+for the program, and produces a human readable report, suitable for determining
+where the program hotspots are.
+
+This program is often used in conjunction with the F<utils/profile.pl>
+script. This script automatically instruments a program, runs it with the JIT,
+then runs B<llvm-prof> to format a report. To get more information about
+F<utils/profile.pl>, execute it with the B<--help> option.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--annotated-llvm> or B<-A>
+
+In addition to the normal report printed, print out the code for the
+program, annotated with execution frequency information. This can be
+particularly useful when trying to visualize how frequently basic blocks
+are executed. This is most useful with basic block profiling
+information or better.
+
+=item B<--print-all-code>
+
+Using this option enables the B<--annotated-llvm> option, but it
+prints the entire module, instead of just the most commonly executed
+functions.
+
+=item B<--time-passes>
+
+Record the amount of time needed for each pass and print it to standard
+error.
+
+=back
+
+=head1 EXIT STATUS
+
+B<llvm-prof> returns 1 if it cannot load the bitcode file or the profile
+information. Otherwise, it exits with zero.
+
+=head1 AUTHOR
+
+B<llvm-prof> is maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-ranlib.pod b/docs/CommandGuide/llvm-ranlib.pod
new file mode 100644
index 0000000..130edb0
--- /dev/null
+++ b/docs/CommandGuide/llvm-ranlib.pod
@@ -0,0 +1,52 @@
+=pod
+
+=head1 NAME
+
+llvm-ranlib - Generate index for LLVM archive
+
+=head1 SYNOPSIS
+
+B<llvm-ranlib> [--version] [--help] <archive-file>
+
+=head1 DESCRIPTION
+
+The B<llvm-ranlib> command is similar to the common Unix utility, C<ranlib>. It
+adds or updates the symbol table in an LLVM archive file. Note that using the
+B<llvm-ar> modifier F<s> is usually more efficient than running B<llvm-ranlib>
+which is only provided only for completness and compatibility. Unlike other
+implementations of C<ranlib>, B<llvm-ranlib> indexes LLVM bitcode files, not
+native object modules. You can list the contents of the symbol table with the
+C<llvm-nm -s> command.
+
+=head1 OPTIONS
+
+=over
+
+=item F<archive-file>
+
+Specifies the archive-file to which the symbol table is added or updated.
+
+=item F<--version>
+
+Print the version of B<llvm-ranlib> and exit without building a symbol table.
+
+=item F<--help>
+
+Print usage help for B<llvm-ranlib> and exit without building a symbol table.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-ranlib> succeeds, it will exit with 0. If an error occurs, a non-zero
+exit code will be returned.
+
+=head1 SEE ALSO
+
+L<llvm-ar|llvm-ar>, ranlib(1)
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-upgrade.pod b/docs/CommandGuide/llvm-upgrade.pod
new file mode 100644
index 0000000..fd84eb4
--- /dev/null
+++ b/docs/CommandGuide/llvm-upgrade.pod
@@ -0,0 +1,66 @@
+=pod
+
+=head1 NAME
+
+llvm-upgrade - LLVM assembly upgrade tool
+
+=head1 SYNOPSIS
+
+B<llvm-upgrade> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+B<llvm-upgrade> is the LLVM assembly upgrade tool. It reads a file containing
+human-readable LLVM assembly language, and upgrades that assembly to the current
+version of LLVM. If the input is in the form currently accepted by LLVM, then
+no upgrades are performed.
+
+The expected usage of this tool is as a filter, like this:
+
+=over
+
+B<llvm-1.9/bin/llvm-dis < 1.9.bc | llvm-upgrade | llvm-2.0/bin/llvm-as -o 2.0.bc>
+
+=back
+
+If F<filename> is omitted or is C<->, then B<llvm-upgrade> reads its input from
+standard input.
+
+If an output file is not specified with the B<-o> option, then
+B<llvm-upgrade> sends its output to standard output.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-f>
+
+Force overwrite. Normally, B<llvm-upgrade> will refuse to overwrite an
+output file that already exists. With this option, B<llvm-upgrade>
+will overwrite the output file.
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-o> F<filename>
+
+Specify the output file name. If F<filename> is C<->, then B<llvm-upgrade>
+sends its output to standard output.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-upgrade> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<llvm-as|llvm-as>, L<llvm-dis|llvm-dis>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvm2cpp.pod b/docs/CommandGuide/llvm2cpp.pod
new file mode 100644
index 0000000..4b86ae0
--- /dev/null
+++ b/docs/CommandGuide/llvm2cpp.pod
@@ -0,0 +1,217 @@
+=pod
+
+=head1 NAME
+
+llvm2xpp - LLVM bitcode to LLVM C++ IR translator
+
+=head1 SYNOPSIS
+
+B<llvm2cpp> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+B<llvm2cpp> translates from LLVM bitcode (.bc files) to a
+corresponding C++ source file that will make calls against the LLVM C++ API to
+build the same module as the input. By default, the C++ output is a complete
+program that builds the module, verifies it and then emits the module as
+LLVM assembly. This technique assists with testing because the input to
+B<llvm2cpp> and the output of the generated C++ program should be identical.
+
+If F<filename> is omitted or is C<->, then B<llvm2cpp> reads its input from
+standard input.
+
+If an output file is not specified with the B<-o> option, then
+B<llvm2cpp> sends its output to a file or standard output by following
+these rules:
+
+=over
+
+=item *
+
+If the input is standard input, then the output is standard output.
+
+=item *
+
+If the input is a file that ends with C<.bc>, then the output file is of
+the same name, except that the suffix is changed to C<.cpp>.
+
+=item *
+
+If the input is a file that does not end with the C<.bc> suffix, then the
+output file has the same name as the input file, except that the C<.cpp>
+suffix is appended.
+
+=back
+
+=head1 OPTIONS
+
+=over
+
+=item B<-f>
+
+Force overwrite. Normally, B<llvm2cpp> will refuse to overwrite an
+output file that already exists. With this option, B<llvm2cpp>
+will overwrite the output file and replace it with new C++ source code.
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-f>
+
+Normally, B<llvm2cpp> will not overwrite an existing output file. With this
+option, that default behavior is changed and the program will overwrite existing
+output files.
+
+=item B<-o> F<filename>
+
+Specify the output file name. If F<filename> is C<->, then B<llvm2cpp>
+sends its output to standard output.
+
+=item B<-funcname> F<functionName>
+
+Specify the name of the function to be generated. The generated code contains a
+single function that produces the input module. By default its name is
+I<makeLLVMModule>. The B<-funcname> option overrides this default and allows
+you to control the name of the generated function. This is handy in conjunction
+with the B<-fragment> option when you only want B<llvm2cpp> to generate a
+single function that produces the module. With both options, such generated code
+could be I<#included> into another program.
+
+=item B<-for>
+
+Specify the name of the thing for which C++ code should be generated. By default
+the entire input module is re-generated. However, use of the various B<-gen-*>
+options can restrict what is produced. This option indicates what that
+restriction is.
+
+=item B<-gen-program>
+
+Specify that the output should be a complete program. Such program will recreate
+B<llvm2cpp>'s input as an LLVM module, verify that module, and then write out
+the module in LLVM assembly format. This is useful for doing identity tests
+where the output of the generated program is identical to the input to
+B<llvm2cpp>. The LLVM DejaGnu test suite can make use of this fact. This is the
+default form of generated output.
+
+If the B<-for> option is given with this option, it specifies the module
+identifier to use for the module created.
+
+=item B<-gen-module>
+
+Specify that the output should be a function that regenerates the module. It is
+assumed that this output will be #included into another program that has already
+arranged for the correct header files to be #included. The function generated
+takes no arguments and returns a I<Module*>.
+
+If the B<-for> option is given with this option, it specifies the module
+identifier to use in creating the module returned by the generated function.
+
+=item B<-gen-contents>
+
+Specify that the output should be a function that adds the contents of the input
+module to another module. It is assumed that the output will be #included into
+another program that has already arranged for the correct header files to be
+#included. The function generated takes a single argument of type I<Module*> and
+returns that argument. Note that Module level attributes such as endianess,
+pointer size, target triple and inline asm are not passed on from the input
+module to the destination module. Only the sub-elements of the module (types,
+constants, functions, global variables) will be added to the input module.
+
+If the B<-for> option is given with this option, it specifies the module
+identifier to set in the input module by the generated function.
+
+=item B<-gen-function>
+
+Specify that the output should be a function that produces the definitions
+necessary for a specific function to be added to a module. It is assumed that
+the output will be #included into another program that has already arranged
+for the correct header files to be #included. The function generated takes a
+single argument of type I<Module*> and returns the I<Function*> that it added to
+the module. Note that only those things (types, constants, etc.) directly
+needed in the definition of the function will be placed in the generated
+function.
+
+The B<-for> option must be given with this option or an error will be produced.
+The value of the option must be the name of a function in the input module for
+which code should be generated. If the named function does not exist an error
+will be produced.
+
+=item B<-gen-inline>
+
+This option is very analagous to B<-gen-function> except that the generated
+function will not re-produce the target function's definition. Instead, the body
+of the target function is inserted into some other function passed as an
+argument to the generated function. Similarly any arguments to the function must
+be passed to the generated function. The result of the generated function is the
+first basic block of the target function.
+
+The B<-for> option works the same way as it does for B<-gen-function>.
+
+=item B<-gen-variable>
+
+Specify that the output should be a function that produces the definitions
+necessary for a specific global variable to be added to a module. It is assumed
+that the output will be #included into another program that has already arranged
+for the correct header files to be #included. The function generated takes a
+single argument of type I<Module*> and returns the I<GlobalVariable*> that it
+added to the module. Note that only those things (types, constants, etc.)
+directly needed in the definition of the global variable will be placed in the
+generated function.
+
+The B<-for> option must be given with this option or an error will be produced.
+THe value of the option must be the name of a global variable in the input
+module for which code should be generated. If the named global variable does not
+exist an error will be produced.
+
+=item B<-gen-type>
+
+Specify that the output should be a function that produces the definitions
+necessary for specific type to be added to a module. It is assumed that the
+otuput will be #included into another program that has already arranged for the
+correct header files to be #included. The function generated take a single
+argument of type I<Module*> and returns the I<Type*> that it added to the
+module. Note that the generated function will only add the necessary type
+definitions to (possibly recursively) define the requested type.
+
+The B<-for> option must be given with this option or an error will be produced.
+The value of the option must be the name of a global type in the input module
+for which code should be generated. If the named type does not exist an error
+will be produced.
+
+=item B<-stats>
+
+Show pass statistics (not interesting in this program).
+
+=item B<-time-passes>
+
+Show pass timing statistics (not interesting in this program).
+
+=item B<-version>
+
+Show the version number of this program.
+
+=back
+
+
+=head1 EXIT STATUS
+
+If B<llvm2cpp> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<llvm-as|llvm-as> L<tblgen|tblgen>
+
+=head1 NOTES
+
+This tool may be removed from a future version of LLVM. Instead, its
+functionality may be incorporated into the llc tool. It would then act similarly
+to other targets except its output would be C++ source that could be compiled to
+construct the input program.
+
+=head1 AUTHORS
+
+Written by Reid Spencer (L<http://hlvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvmc.pod b/docs/CommandGuide/llvmc.pod
new file mode 100644
index 0000000..5e6cc95
--- /dev/null
+++ b/docs/CommandGuide/llvmc.pod
@@ -0,0 +1,431 @@
+=pod
+
+=head1 NAME
+
+llvmc - The LLVM Compiler Driver (experimental)
+
+=head1 SYNOPSIS
+
+B<llvmc> [I<options>] [I<filenames>...]
+
+=head1 DESCRIPTION
+
+B<llvmc> is a configurable driver for invoking other LLVM (and non-LLVM) tools
+in order to compile, optimize and link software for multiple languages. For
+those familiar with FSF's B<gcc> tool, it is very similar. Please note that
+B<llvmc> is considered an experimental tool. B<llvmc> has the following goals:
+
+=over
+
+=item * provide a single point of access to the LLVM tool set,
+
+=item * hide the complexities of the LLVM tools through a single interface,
+
+=item * make integration of existing non-LLVM tools simple,
+
+=item * extend the capabilities of minimal front ends, and
+
+=item * make the interface for compiling consistent for all languages.
+
+=back
+
+The tool itself does nothing with a user's program. It merely invokes other
+tools to get the compilation tasks done.
+
+The options supported by B<llvmc> generalize the compilation process and
+provide a consistent and simple interface for multiple programming languages.
+This makes it easier for developers to get their software compiled with LLVM.
+Without B<llvmc>, developers would need to understand how to invoke the
+front-end compiler, optimizer, assembler, and linker in order to compile their
+programs. B<llvmc>'s sole mission is to trivialize that process.
+
+=head2 Basic Operation
+
+B<llvmc> always takes the following basic actions:
+
+=over
+
+=item * Command line options and filenames are collected.
+
+The command line options provide the marching orders to B<llvmc> on what actions
+it should perform. This is the I<request> the user is making of B<llvmc> and it
+is interpreted first.
+
+=item * Configuration files are read.
+
+Based on the options and the suffixes of the filenames presented, a set of
+configuration files are read to configure the actions B<llvmc> will take.
+Configuration files are provided by either LLVM or the front end compiler tools
+that B<llvmc> invokes. Users generally don't need to be concerned with the
+contents of the configuration files.
+
+=item * Determine actions to take.
+
+The tool chain needed to complete the task is determined. This is the primary
+work of B<llvmc>. It breaks the request specified by the command line options
+into a set of basic actions to be done:
+
+=over
+
+=item * Pre-processing: gathering/filtering compiler input (optional).
+
+=item * Translation: source language to bitcode conversion.
+
+=item * Assembly: bitcode to native code conversion.
+
+=item * Optimization: conversion of bitcode to something that runs faster.
+
+=item * Linking: combining multiple bitcode files to produce executable program.
+
+=back
+
+=item * Execute actions.
+
+The actions determined previously are executed sequentially and then
+B<llvmc> terminates.
+
+=back
+
+=head1 OPTIONS
+
+=head2 Control Options
+
+Control options tell B<llvmc> what to do at a high level. The
+following control options are defined:
+
+=over
+
+=item B<-c> or B<--compile>
+
+This option specifies that the linking phase is not to be run. All
+previous phases, if applicable will run. This is generally how a given
+bitcode file is compiled and optimized for a source language module.
+
+=item B<-k> or B<--link> or default
+
+This option (or the lack of any control option) specifies that all stages
+of compilation, optimization, and linking should be attempted. Source files
+specified on the command line will be compiled and linked with objects and
+libraries also specified.
+
+=item B<-S>
+
+This option specifies that compilation should end in the creation of
+an LLVM assembly file that can be later converted to an LLVM object
+file.
+
+=item B<-E>
+
+This option specifies that no compilation or linking should be
+performed. Only pre-processing, if applicable to the language being
+compiled, is performed. For languages that support it, this will
+result in the output containing the raw input to the compiler.
+
+=back
+
+=head2 Optimization Options
+
+Optimization with B<llvmc> is based on goals and specified with
+the following -O options. The specific details of which
+optimizations run is controlled by the configuration files because
+each source language will have different needs.
+
+=over
+
+=item B<-O1> or B<-O0> (default, fast compilation)
+
+Only those optimizations that will hasten the compilation (mostly by reducing
+the output) are applied. In general these are extremely fast and simple
+optimizations that reduce emitted code size. The goal here is not to make the
+resulting program fast but to make the compilation fast. If not specified,
+this is the default level of optimization.
+
+=item B<-O2> (basic optimization)
+
+This level of optimization specifies a balance between generating good code
+that will execute reasonably quickly and not spending too much time optimizing
+the code to get there. For example, this level of optimization may include
+things like global common sub-expression elimination, aggressive dead code
+elimination, and scalar replication.
+
+=item B<-O3> (aggressive optimization)
+
+This level of optimization aggressively optimizes each set of files compiled
+together. However, no link-time inter-procedural optimization is performed.
+This level implies all the optimizations of the B<-O1> and B<-O2> optimization
+levels, and should also provide loop optimizations and compile time
+inter-procedural optimizations. Essentially, this level tries to do as much
+as it can with the input it is given but doesn't do any link time IPO.
+
+=item B<-O4> (link time optimization)
+
+In addition to the previous three levels of optimization, this level of
+optimization aggressively optimizes each program at link time. It employs
+basic analysis and basic link-time inter-procedural optimizations,
+considering the program as a whole.
+
+=item B<-O5> (aggressive link time optimization)
+
+This is the same as B<-O4> except it employs aggressive analyses and
+aggressive inter-procedural optimization.
+
+=item B<-O6> (profile guided optimization: not implemented)
+
+This is the same as B<-O5> except that it employs profile-guided
+re-optimization of the program after it has executed. Note that this implies
+a single level of re-optimization based on run time profile analysis. Once
+the re-optimization has completed, the profiling instrumentation is
+removed and final optimizations are employed.
+
+=item B<-O7> (lifelong optimization: not implemented)
+
+This is the same as B<-O5> and similar to B<-O6> except that re-optimization
+is performed through the life of the program. That is, each run will update
+the profile by which future re-optimizations are directed.
+
+=back
+
+=head2 Input Options
+
+=over
+
+=item B<-l> I<LIBRARY>
+
+This option instructs B<llvmc> to locate a library named I<LIBRARY> and search
+it for unresolved symbols when linking the program.
+
+=item B<-L> F<path>
+
+This option instructs B<llvmc> to add F<path> to the list of places in which
+the linker will
+
+=item B<-x> I<LANGUAGE>
+
+This option instructs B<llvmc> to regard the following input files as
+containing programs in the language I<LANGUAGE>. Normally, input file languages
+are identified by their suffix but this option will override that default
+behavior. The B<-x> option stays in effect until the end of the options or
+a new B<-x> option is encountered.
+
+=back
+
+=head2 Output Options
+
+=over
+
+=item B<-m>I<arch>
+
+This option selects the back end code generator to use. The I<arch> portion
+of the option names the back end to use.
+
+=item B<--native>
+
+Normally, B<llvmc> produces bitcode files at most stages of compilation.
+With this option, B<llvmc> will arrange for native object files to be
+generated with the B<-c> option, native assembly files to be generated
+with the B<-S> option, and native executables to be generated with the
+B<--link> option. In the case of the B<-E> option, the output will not
+differ as there is no I<native> version of pre-processed output.
+
+=item B<-o> F<filename>
+
+Specify the output file name. The contents of the file depend on other
+options.
+
+=back
+
+=head2 Information Options
+
+=over
+
+=item B<-n> or B<--no-op>
+
+This option tells B<llvmc> to do everything but actually execute the
+resulting tools. In combination with the B<-v> option, this causes B<llvmc>
+to merely print out what it would have done.
+
+=item B<-v> or B<--verbose>
+
+This option will cause B<llvmc> to print out (on standard output) each of the
+actions it takes to accomplish the objective. The output will immediately
+precede the invocation of other tools.
+
+=item B<--stats>
+
+Print all statistics gathered during the compilation to the standard error.
+Note that this option is merely passed through to the sub-tools to do with
+as they please.
+
+=item B<--time-passes>
+
+Record the amount of time needed for each optimization pass and print it
+to standard error. Like B<--stats> this option is just passed through to
+the sub-tools to do with as they please.
+
+=item B<--time-programs>
+
+Record the amount of time each program (compilation tool) takes and print
+it to the standard error.
+
+=back
+
+=head2 Language Specific Options
+
+=over
+
+=item B<-T,pre>=I<options>
+
+Pass an arbitrary option to the pre-processor.
+
+=item B<-T,opt>=I<options>
+
+Pass an arbitrary option to the optimizer.
+
+=item B<-T,lnk>=I<options>
+
+Pass an arbitrary option to the linker.
+
+=item B<-T,asm>=I<options>
+
+Pass an arbitrary option to the code generator.
+
+=back
+
+=head2 C/C++ Specific Options
+
+=over
+
+=item B<-I>F<path>
+
+This option is just passed through to a C or C++ front end compiler to tell it
+where include files can be found.
+
+=item B<-D>F<symbol>
+
+This option is just passed through to a C or C++ front end compiler to tell it
+to define a symbol.
+
+=back
+
+=head2 Miscellaneous Options
+
+=over
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<--version>
+
+This option will cause B<llvmc> to print out its version number and terminate.
+
+=back
+
+=head2 Advanced Options
+
+You better know what you're doing if you use these options. Improper use
+of these options can produce drastically wrong results.
+
+=over
+
+=item B<--config-dir> F<dirname>
+
+This option tells B<llvmc> to read configuration data from the I<directory>
+named F<dirname>. Data from such directories will be read in the order
+specified on the command line after all other standard configuration files have
+been read. This allows users or groups of users to conveniently create
+their own configuration directories in addition to the standard ones to which
+they may not have write access.
+
+=back
+
+
+=head2 Unimplemented Options
+
+The options below are not currently implemented in B<llvmc> but will be
+eventually. They are documented here as "future design".
+
+=over
+
+=item B<--show-config> I<[suffixes...]>
+
+When this option is given, the only action taken by B<llvmc> is to show its
+final configuration state in the form of a configuration file. No compilation
+tasks will be conducted when this option is given; processing will stop once
+the configuration has been printed. The optional (comma separated) list of
+suffixes controls what is printed. Without any suffixes, the configuration
+for all languages is printed. With suffixes, only the languages pertaining
+to those file suffixes will be printed. The configuration information is
+printed after all command line options and configuration files have been
+read and processed. This allows the user to verify that the correct
+configuration data has been read by B<llvmc>.
+
+=item B<--config> :I<section>:I<name>=I<value>
+
+This option instructs B<llvmc> to accept I<value> as the value for configuration
+item I<name> in the section named I<section>. This is a quick way to override
+a configuration item on the command line without resorting to changing the
+configuration files.
+
+=item B<--config-only-from> F<dirname>
+
+This option tells B<llvmc> to skip the normal processing of configuration
+files and only configure from the contents of the F<dirname> directory. Multiple
+B<--config-only-from> options may be given in which case the directories are
+read in the order given on the command line.
+
+=item B<--emit-raw-code>
+
+No optimization is done whatsoever. The compilers invoked by B<llvmc> with
+this option given will be instructed to produce raw, unoptimized code. This
+option is useful only to front end language developers and therefore does not
+participate in the list of B<-O> options. This is distinctly different from
+the B<-O0> option (a synonym for B<-O1>) because those optimizations will
+reduce code size to make compilation faster. With B<--emit-raw-code>, only
+the full raw code produced by the compiler will be generated.
+
+=back
+
+
+=head1 EXIT STATUS
+
+If B<llvmc> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value and no compilation actions
+will be taken. If one of the compilation tools returns a non-zero
+status, pending actions will be discarded and B<llvmc> will return the
+same result code as the failing compilation tool.
+
+=head1 DEFICIENCIES
+
+B<llvmc> is considered an experimental LLVM tool because it has these
+deficiencies:
+
+=over
+
+=item Insufficient support for native linking
+
+Because B<llvm-ld> doesn't handle native linking, neither can B<llvmc>
+
+=item Poor configuration support
+
+The support for configuring new languages, etc. is weak. There are many
+command line configurations that cannot be achieved with the current
+support. Furthermore the grammar is cumbersome for configuration files.
+Please see L<http://llvm.org/PR686> for further details.
+
+=item Does not handle target specific configurations
+
+This is one of the major deficiencies, also addressed in
+L<http://llvm.org/PR686>
+
+=back
+
+=head1 SEE ALSO
+
+L<llvm-as|llvm-as>, L<llvm-dis|llvm-dis>, L<llc|llc>, L<llvm-link|llvm-link>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/llvmgcc.pod b/docs/CommandGuide/llvmgcc.pod
new file mode 100644
index 0000000..0b2b156
--- /dev/null
+++ b/docs/CommandGuide/llvmgcc.pod
@@ -0,0 +1,85 @@
+=pod
+
+=head1 NAME
+
+llvm-gcc - LLVM C front-end
+
+=head1 SYNOPSIS
+
+B<llvm-gcc> [I<options>] I<filename>
+
+=head1 DESCRIPTION
+
+The B<llvm-gcc> command is the LLVM C front end. It is a modified
+version of gcc that compiles C/ObjC programs into native objects, LLVM
+bitcode or LLVM assembly language, depending upon the options.
+
+By default, B<llvm-gcc> compiles to native objects just like GCC does. If the
+B<-emit-llvm> option is given then it will generate LLVM bitcode files instead.
+If B<-S> (assembly) is also given, then it will generate LLVM assembly.
+
+Being derived from the GNU Compiler Collection, B<llvm-gcc> has many
+of gcc's features and accepts most of gcc's options. It handles a
+number of gcc's extensions to the C programming language.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-S>
+
+Do not generate an LLVM bitcode file. Rather, compile the source
+file into an LLVM assembly language file.
+
+=item B<-c>
+
+Do not generate a linked executable. Rather, compile the source
+file into an LLVM bitcode file. This bitcode file can then be
+linked with other bitcode files later on to generate a full LLVM
+executable.
+
+=item B<-o> I<filename>
+
+Specify the output file to be I<filename>.
+
+=item B<-I> I<directory>
+
+Add a directory to the header file search path. This option can be
+repeated.
+
+=item B<-L> I<directory>
+
+Add I<directory> to the library search path. This option can be
+repeated.
+
+=item B<-l>I<name>
+
+Link in the library libI<name>.[bc | a | so]. This library should
+be a bitcode library.
+
+=item B<-emit-llvm>
+
+Make the output be LLVM bitcode (or assembly) instead of native object (or
+assembly).
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-gcc> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<llvm-g++|llvmgxx>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
+
diff --git a/docs/CommandGuide/llvmgxx.pod b/docs/CommandGuide/llvmgxx.pod
new file mode 100644
index 0000000..64b670e
--- /dev/null
+++ b/docs/CommandGuide/llvmgxx.pod
@@ -0,0 +1,85 @@
+=pod
+
+=head1 NAME
+
+llvm-g++ - LLVM C++ front-end
+
+=head1 SYNOPSIS
+
+B<llvm-g++> [I<options>] I<filename>
+
+=head1 DESCRIPTION
+
+The B<llvm-g++> command is the LLVM C++ front end. It is a modified
+version of g++ that compiles C++/ObjC++ programs into native code,
+LLVM bitcode or assembly language, depending upon the options.
+
+By default, B<llvm-g++> compiles to native objects just like GCC does. If the
+B<-emit-llvm> option is given then it will generate LLVM bitcode files instead.
+If B<-S> (assembly) is also given, then it will generate LLVM assembly.
+
+Being derived from the GNU Compiler Collection, B<llvm-g++> has many
+of g++'s features and accepts most of g++'s options. It handles a
+number of g++'s extensions to the C++ programming language.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-S>
+
+Do not generate an LLVM bitcode file. Rather, compile the source
+file into an LLVM assembly language file.
+
+=item B<-c>
+
+Do not generate a linked executable. Rather, compile the source
+file into an LLVM bitcode file. This bitcode file can then be
+linked with other bitcode files later on to generate a full LLVM
+executable.
+
+=item B<-o> I<filename>
+
+Specify the output file to be I<filename>.
+
+=item B<-I> I<directory>
+
+Add a directory to the header file search path. This option can be
+repeated.
+
+=item B<-L> I<directory>
+
+Add I<directory> to the library search path. This option can be
+repeated.
+
+=item B<-l>I<name>
+
+Link in the library libI<name>.[bc | a | so]. This library should
+be a bitcode library.
+
+=item B<-emit-llvm>
+
+Make the output be LLVM bitcode (or assembly) instead of native object (or
+assembly).
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-g++> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<llvm-gcc|llvmgcc>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
+
diff --git a/docs/CommandGuide/manpage.css b/docs/CommandGuide/manpage.css
new file mode 100644
index 0000000..c922564
--- /dev/null
+++ b/docs/CommandGuide/manpage.css
@@ -0,0 +1,256 @@
+/* Based on http://www.perldoc.com/css/perldoc.css */
+
+@import url("../llvm.css");
+
+body { font-family: Arial,Helvetica; }
+
+blockquote { margin: 10pt; }
+
+h1, a { color: #336699; }
+
+
+/*** Top menu style ****/
+.mmenuon {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ff6600; font-size: 10pt;
+}
+.mmenuoff {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: 10pt;
+}
+.cpyright {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: xx-small;
+}
+.cpyrightText {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: xx-small;
+}
+.sections {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 11pt;
+}
+.dsections {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 12pt;
+}
+.slink {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #000000; font-size: 9pt;
+}
+
+.slink2 { font-family: Arial,Helvetica; text-decoration: none; color: #336699; }
+
+.maintitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 18pt;
+}
+.dblArrow {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: small;
+}
+.menuSec {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: small;
+}
+
+.newstext {
+ font-family: Arial,Helvetica; font-size: small;
+}
+
+.linkmenu {
+ font-family: Arial,Helvetica; color: #000000; font-weight: bold;
+ text-decoration: none;
+}
+
+P {
+ font-family: Arial,Helvetica;
+}
+
+PRE {
+ font-size: 10pt;
+}
+.quote {
+ font-family: Times; text-decoration: none;
+ color: #000000; font-size: 9pt; font-style: italic;
+}
+.smstd { font-family: Arial,Helvetica; color: #000000; font-size: x-small; }
+.std { font-family: Arial,Helvetica; color: #000000; }
+.meerkatTitle {
+ font-family: sans-serif; font-size: x-small; color: black; }
+
+.meerkatDescription { font-family: sans-serif; font-size: 10pt; color: black }
+.meerkatCategory {
+ font-family: sans-serif; font-size: 9pt; font-weight: bold; font-style: italic;
+ color: brown; }
+.meerkatChannel {
+ font-family: sans-serif; font-size: 9pt; font-style: italic; color: brown; }
+.meerkatDate { font-family: sans-serif; font-size: xx-small; color: #336699; }
+
+.tocTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+.toc-item {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #336699; font-size: 10pt; text-decoration: underline;
+}
+
+.perlVersion {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #336699; font-size: 10pt; text-decoration: none;
+}
+
+.podTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #000000;
+}
+
+.docTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #000000; font-size: 10pt;
+}
+.dotDot {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #000000; font-size: 9pt;
+}
+
+.docSec {
+ font-family: Arial,Helvetica; font-weight: normal;
+ color: #333333; font-size: 9pt;
+}
+.docVersion {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 10pt;
+}
+
+.docSecs-on {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #ff0000; font-size: 10pt;
+}
+.docSecs-off {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+h2 {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: medium;
+}
+h1 {
+ font-family: Verdana,Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: large;
+}
+
+DL {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+UL > LI > A {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #336699; font-size: 10pt;
+}
+
+.moduleInfo {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #333333; font-size: 11pt;
+}
+
+.moduleInfoSec {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 10pt;
+}
+
+.moduleInfoVal {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: underline;
+ color: #000000; font-size: 10pt;
+}
+
+.cpanNavTitle {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #ffffff; font-size: 10pt;
+}
+.cpanNavLetter {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #333333; font-size: 9pt;
+}
+.cpanCat {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 9pt;
+}
+
+.bttndrkblue-bkgd-top {
+ background-color: #225688;
+ background-image: url(/global/mvc_objects/images/bttndrkblue_bgtop.gif);
+}
+.bttndrkblue-bkgd-left {
+ background-color: #225688;
+ background-image: url(/global/mvc_objects/images/bttndrkblue_bgleft.gif);
+}
+.bttndrkblue-bkgd {
+ padding-top: 0px;
+ padding-bottom: 0px;
+ margin-bottom: 0px;
+ margin-top: 0px;
+ background-repeat: no-repeat;
+ background-color: #225688;
+ background-image: url(/global/mvc_objects/images/bttndrkblue_bgmiddle.gif);
+ vertical-align: top;
+}
+.bttndrkblue-bkgd-right {
+ background-color: #225688;
+ background-image: url(/global/mvc_objects/images/bttndrkblue_bgright.gif);
+}
+.bttndrkblue-bkgd-bottom {
+ background-color: #225688;
+ background-image: url(/global/mvc_objects/images/bttndrkblue_bgbottom.gif);
+}
+.bttndrkblue-text a {
+ color: #ffffff;
+ text-decoration: none;
+}
+a.bttndrkblue-text:hover {
+ color: #ffDD3C;
+ text-decoration: none;
+}
+.bg-ltblue {
+ background-color: #f0f5fa;
+}
+
+.border-left-b {
+ background: #f0f5fa url(/i/corner-leftline.gif) repeat-y;
+}
+
+.border-right-b {
+ background: #f0f5fa url(/i/corner-rightline.gif) repeat-y;
+}
+
+.border-top-b {
+ background: #f0f5fa url(/i/corner-topline.gif) repeat-x;
+}
+
+.border-bottom-b {
+ background: #f0f5fa url(/i/corner-botline.gif) repeat-x;
+}
+
+.border-right-w {
+ background: #ffffff url(/i/corner-rightline.gif) repeat-y;
+}
+
+.border-top-w {
+ background: #ffffff url(/i/corner-topline.gif) repeat-x;
+}
+
+.border-bottom-w {
+ background: #ffffff url(/i/corner-botline.gif) repeat-x;
+}
+
+.bg-white {
+ background-color: #ffffff;
+}
+
+.border-left-w {
+ background: #ffffff url(/i/corner-leftline.gif) repeat-y;
+}
diff --git a/docs/CommandGuide/opt.pod b/docs/CommandGuide/opt.pod
new file mode 100644
index 0000000..75b7edd
--- /dev/null
+++ b/docs/CommandGuide/opt.pod
@@ -0,0 +1,138 @@
+=pod
+
+=head1 NAME
+
+opt - LLVM optimizer
+
+=head1 SYNOPSIS
+
+B<opt> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+The B<opt> command is the modular LLVM optimizer and analyzer. It takes LLVM
+bitcode as input, runs the specified optimizations or analyses on it, and then
+outputs the optimized LLVM bitcode or the analysis results. The function of
+B<opt> depends on whether the B<-analyze> option is given.
+
+When B<-analyze> is specified, B<opt> performs various analyses of LLVM
+bitcode. It will usually print the results on standard output, but in a few
+cases, it will print output to standard error or generate a file with the
+analysis output, which is usually done when the output is meant for another
+program.
+
+While B<-analyze> is I<not> given, B<opt> attempts to produce an optimized
+bitcode file. The optimizations available via B<opt> depend upon what
+libraries were linked into it as well as any additional libraries that have
+been loaded with the B<-load> option. Use the B<-help> option to determine
+what optimizations you can use.
+
+If I<filename> is omitted from the command line or is I<->, B<opt> reads its
+input from standard input. The input must be an LLVM bitcode file.
+
+If an output filename is not specified with the B<-o> option, B<opt>
+writes its output to the standard output.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-f>
+
+Force overwrite. Normally, B<opt> will refuse to overwrite an
+output file that already exists. With this option, B<opt> will
+overwrite the output file and replace it with new bitcode.
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-o> I<filename>
+
+Specify the output filename.
+
+=item B<-{passname}>
+
+B<opt> provides the ability to run any of LLVM's optimization or analysis passes
+in any order. The B<-help> option lists all the passes available. The order in
+which the options occur on the command line are the order in which they are
+executed (within pass constraints).
+
+=item B<-std-compile-opts>
+
+This is short hand for a standard list of I<compile time optimization> passes.
+This is typically used to optimize the output from the llvm-gcc front end. It
+might be useful for other front end compilers as well. To discover the full set
+of options available, use the following command:
+
+ llvm-as < /dev/null | opt -std-compile-opts -disable-output -debug-pass=Arguments
+
+=item B<-disable-inlining>
+
+This option is only meaningful when B<-std-compile-opts> is given. It simply
+removes the inlining pass from the standard list.
+
+=item B<-disable-opt>
+
+This option is only meaningful when B<-std-compile-opts> is given. It disables
+most, but not all, of the B<-std-compile-opts>. The ones that remain are
+B<-verify>, B<-lower-setjmp>, and B<-funcresolve>.
+
+=item B<-strip-debug>
+
+This option causes opt to strip debug information from the module before
+applying other optimizations. It is essentially the same as B<-strip> but it
+ensures that stripping of debug information is done first.
+
+=item B<-verify-each>
+
+This option causes opt to add a verify pass after every pass otherwise specified
+on the command line (including B<-verify>). This is useful for cases where it
+is suspected that a pass is creating an invalid module but it is not clear which
+pass is doing it. The combination of B<-std-compile-opts> and B<-verify-each>
+can quickly track down this kind of problem.
+
+=item B<-profile-info-file> I<filename>
+
+Specify the name of the file loaded by the -profile-loader option.
+
+=item B<-stats>
+
+Print statistics.
+
+=item B<-time-passes>
+
+Record the amount of time needed for each pass and print it to standard
+error.
+
+=item B<-debug>
+
+If this is a debug build, this option will enable debug printouts
+from passes which use the I<DEBUG()> macro. See the B<LLVM Programmer's
+Manual>, section I<#DEBUG> for more information.
+
+=item B<-load>=I<plugin>
+
+Load the dynamic object I<plugin>. This object should register new optimization
+or analysis passes. Once loaded, the object will add new command line options to
+enable various optimizations or analyses. To see the new complete list of
+optimizations, use the B<-help> and B<-load> options together. For example:
+
+ opt -load=plugin.so -help
+
+=item B<-p>
+
+Print module after each transformation.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<opt> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/stkrc.pod b/docs/CommandGuide/stkrc.pod
new file mode 100644
index 0000000..4d8b4c9
--- /dev/null
+++ b/docs/CommandGuide/stkrc.pod
@@ -0,0 +1,96 @@
+=pod
+
+=head1 NAME
+
+stkrc - Stacker Compiler
+
+=head1 SYNOPSIS
+
+B<stkrc> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+The B<stkrc> command is the compiler for the Stacker language. Stacker is a
+simple stack based, Forth-like language that was written as a demonstration
+language for LLVM. For details on the language, please see
+L<http://llvm.org/docs/Stacker.html> . The B<stkrc> compiler is fairly
+minimal. It compiles to bitcode only and doesn't perform any optimizations.
+The output of stkrc (a bitcode file) can be piped through other LLVM tools
+for optimization and linking.
+
+If F<filename> is omitted or is C<->, then B<stkrc> reads its input
+from standard input. This is useful for combining the tool into a pipeline.
+
+If an output file is not specified with the B<-o> option, then
+B<llvm-as> sends its output to a file or standard output by following
+these rules:
+
+=over
+
+=item *
+
+If the input is standard input, then the output is standard output.
+
+=item *
+
+If the input is a file that ends with C<.st>, then the output file is of
+the same name, except that the suffix is changed to C<.bc>.
+
+=item *
+
+If the input is a file that does not end with the C<.st> suffix, then the
+output file has the same name as the input file, except that the C<.bc>
+suffix is appended.
+
+=back
+
+=head1 OPTIONS
+
+=over
+
+=item B<-o> F<filename>
+
+Specify the output file name. If F<filename> is C<->, then B<llvm-as>
+sends its output to standard output.
+
+=item B<-stats>
+
+Print statistics acquired during compilation.
+
+=item B<-time-passes>
+
+Record the amount of time needed for each pass and print it to standard
+error.
+
+=item B<-f>
+
+Force the output to be written. Normally, B<stkrc> won't overwrite an existing
+bitcode file. This option overrides that behavior.
+
+=item B<-s> F<stacksize>
+
+Specify the stack size for the program. The default stack size, 1024, should be
+sufficient for most programs. For very large programs, especially those that
+recurse a lot, you might want to provide a larger value. Each unit of this
+value consumes 8 bytes of memory.
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<stkrc> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value, usually 1.
+
+=head1 SEE ALSO
+
+L<llvm-as>, L<http://llvm.org/docs/Stacker.html>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandGuide/tblgen.pod b/docs/CommandGuide/tblgen.pod
new file mode 100644
index 0000000..c8244af
--- /dev/null
+++ b/docs/CommandGuide/tblgen.pod
@@ -0,0 +1,115 @@
+
+=pod
+
+=head1 NAME
+
+tblgen - Target Description To C++ Code Generator
+
+=head1 SYNOPSIS
+
+B<tblgen> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+B<tblgen> translates from target description (.td) files into C++ code that can
+be included in the definition of an LLVM target library. Most users of LLVM will
+not need to use this program. It is only for assisting with writing an LLVM
+target backend.
+
+The input and output of B<tblgen> is beyond the scope of this short
+introduction. Please see the I<CodeGeneration> page in the LLVM documentation.
+
+The F<filename> argument specifies the name of a Target Description (.td) file
+to read as input.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-o> F<filename>
+
+Specify the output file name. If F<filename> is C<->, then B<tblgen>
+sends its output to standard output.
+
+=item B<-I> F<directory>
+
+Specify where to find other target description files for inclusion. The
+F<directory> value should be a full or partial path to a directory that contains
+target description files.
+
+=item B<-asmwriternum> F<N>
+
+Make -gen-asm-writer emit assembly writer number F<N>.
+
+=item B<-class> F<class Name>
+
+Print the enumeration list for this class.
+
+=item B<-print-records>
+
+Print all records to standard output (default).
+
+=item B<-print-enums>
+
+Print enumeration values for a class
+
+=item B<-gen-emitter>
+
+Generate machine code emitter.
+
+=item B<-gen-register-enums>
+
+Generate the enumeration values for all registers.
+
+=item B<-gen-register-desc>
+
+Generate a register info description for each register.
+
+=item B<-gen-register-desc-header>
+
+Generate a register info description header for each register.
+
+=item B<-gen-instr-enums>
+
+Generate enumeration values for instructions.
+
+=item B<-gen-instr-desc>
+
+Generate instruction descriptions.
+
+=item B<-gen-asm-writer>
+
+Generate the assembly writer.
+
+=item B<-gen-dag-isel>
+
+Generate a DAG (Directed Acycle Graph) instruction selector.
+
+=item B<-gen-subtarget>
+
+Generate subtarget enumerations.
+
+=item B<-gen-intrinsic>
+
+Generate intrinsic information.
+
+=item B<-version>
+
+Show the version number of this program.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<tblgen> succeeds, it will exit with 0. Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 AUTHORS
+
+Maintained by The LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/CommandLine.html b/docs/CommandLine.html
new file mode 100644
index 0000000..bf80ec0
--- /dev/null
+++ b/docs/CommandLine.html
@@ -0,0 +1,1938 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <title>CommandLine 2.0 Library Manual</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ CommandLine 2.0 Library Manual
+</div>
+
+<ol>
+ <li><a href="#introduction">Introduction</a></li>
+
+ <li><a href="#quickstart">Quick Start Guide</a>
+ <ol>
+ <li><a href="#bool">Boolean Arguments</a></li>
+ <li><a href="#alias">Argument Aliases</a></li>
+ <li><a href="#onealternative">Selecting an alternative from a
+ set of possibilities</a></li>
+ <li><a href="#namedalternatives">Named alternatives</a></li>
+ <li><a href="#list">Parsing a list of options</a></li>
+ <li><a href="#bits">Collecting options as a set of flags</a></li>
+ <li><a href="#description">Adding freeform text to help output</a></li>
+ </ol></li>
+
+ <li><a href="#referenceguide">Reference Guide</a>
+ <ol>
+ <li><a href="#positional">Positional Arguments</a>
+ <ul>
+ <li><a href="#--">Specifying positional options with hyphens</a></li>
+ <li><a href="#getPosition">Determining absolute position with
+ getPosition</a></li>
+ <li><a href="#cl::ConsumeAfter">The <tt>cl::ConsumeAfter</tt>
+ modifier</a></li>
+ </ul></li>
+
+ <li><a href="#storage">Internal vs External Storage</a></li>
+
+ <li><a href="#attributes">Option Attributes</a></li>
+
+ <li><a href="#modifiers">Option Modifiers</a>
+ <ul>
+ <li><a href="#hiding">Hiding an option from <tt>--help</tt>
+ output</a></li>
+ <li><a href="#numoccurrences">Controlling the number of occurrences
+ required and allowed</a></li>
+ <li><a href="#valrequired">Controlling whether or not a value must be
+ specified</a></li>
+ <li><a href="#formatting">Controlling other formatting options</a></li>
+ <li><a href="#misc">Miscellaneous option modifiers</a></li>
+ </ul></li>
+
+ <li><a href="#toplevel">Top-Level Classes and Functions</a>
+ <ul>
+ <li><a href="#cl::ParseCommandLineOptions">The
+ <tt>cl::ParseCommandLineOptions</tt> function</a></li>
+ <li><a href="#cl::ParseEnvironmentOptions">The
+ <tt>cl::ParseEnvironmentOptions</tt> function</a></li>
+ <li><a href="#cl::SetVersionPrinter">The cl::SetVersionPrinter
+ function</a></li>
+ <li><a href="#cl::opt">The <tt>cl::opt</tt> class</a></li>
+ <li><a href="#cl::list">The <tt>cl::list</tt> class</a></li>
+ <li><a href="#cl::bits">The <tt>cl::bits</tt> class</a></li>
+ <li><a href="#cl::alias">The <tt>cl::alias</tt> class</a></li>
+ <li><a href="#cl::extrahelp">The <tt>cl::extrahelp</tt> class</a></li>
+ </ul></li>
+
+ <li><a href="#builtinparsers">Builtin parsers</a>
+ <ul>
+ <li><a href="#genericparser">The Generic <tt>parser&lt;t&gt;</tt>
+ parser</a></li>
+ <li><a href="#boolparser">The <tt>parser&lt;bool&gt;</tt>
+ specialization</a></li>
+ <li><a href="#boolOrDefaultparser">The <tt>parser&lt;boolOrDefault&gt;</tt>
+ specialization</a></li>
+ <li><a href="#stringparser">The <tt>parser&lt;string&gt;</tt>
+ specialization</a></li>
+ <li><a href="#intparser">The <tt>parser&lt;int&gt;</tt>
+ specialization</a></li>
+ <li><a href="#doubleparser">The <tt>parser&lt;double&gt;</tt> and
+ <tt>parser&lt;float&gt;</tt> specializations</a></li>
+ </ul></li>
+ </ol></li>
+ <li><a href="#extensionguide">Extension Guide</a>
+ <ol>
+ <li><a href="#customparser">Writing a custom parser</a></li>
+ <li><a href="#explotingexternal">Exploiting external storage</a></li>
+ <li><a href="#dynamicopts">Dynamically adding command line
+ options</a></li>
+ </ol></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="introduction">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document describes the CommandLine argument processing library. It will
+show you how to use it, and what it can do. The CommandLine library uses a
+declarative approach to specifying the command line options that your program
+takes. By default, these options declarations implicitly hold the value parsed
+for the option declared (of course this <a href="#storage">can be
+changed</a>).</p>
+
+<p>Although there are a <b>lot</b> of command line argument parsing libraries
+out there in many different languages, none of them fit well with what I needed.
+By looking at the features and problems of other libraries, I designed the
+CommandLine library to have the following features:</p>
+
+<ol>
+<li>Speed: The CommandLine library is very quick and uses little resources. The
+parsing time of the library is directly proportional to the number of arguments
+parsed, not the the number of options recognized. Additionally, command line
+argument values are captured transparently into user defined global variables,
+which can be accessed like any other variable (and with the same
+performance).</li>
+
+<li>Type Safe: As a user of CommandLine, you don't have to worry about
+remembering the type of arguments that you want (is it an int? a string? a
+bool? an enum?) and keep casting it around. Not only does this help prevent
+error prone constructs, it also leads to dramatically cleaner source code.</li>
+
+<li>No subclasses required: To use CommandLine, you instantiate variables that
+correspond to the arguments that you would like to capture, you don't subclass a
+parser. This means that you don't have to write <b>any</b> boilerplate
+code.</li>
+
+<li>Globally accessible: Libraries can specify command line arguments that are
+automatically enabled in any tool that links to the library. This is possible
+because the application doesn't have to keep a "list" of arguments to pass to
+the parser. This also makes supporting <a href="#dynamicopts">dynamically
+loaded options</a> trivial.</li>
+
+<li>Cleaner: CommandLine supports enum and other types directly, meaning that
+there is less error and more security built into the library. You don't have to
+worry about whether your integral command line argument accidentally got
+assigned a value that is not valid for your enum type.</li>
+
+<li>Powerful: The CommandLine library supports many different types of
+arguments, from simple <a href="#boolparser">boolean flags</a> to <a
+href="#cl::opt">scalars arguments</a> (<a href="#stringparser">strings</a>, <a
+href="#intparser">integers</a>, <a href="#genericparser">enums</a>, <a
+href="#doubleparser">doubles</a>), to <a href="#cl::list">lists of
+arguments</a>. This is possible because CommandLine is...</li>
+
+<li>Extensible: It is very simple to add a new argument type to CommandLine.
+Simply specify the parser that you want to use with the command line option when
+you declare it. <a href="#customparser">Custom parsers</a> are no problem.</li>
+
+<li>Labor Saving: The CommandLine library cuts down on the amount of grunt work
+that you, the user, have to do. For example, it automatically provides a
+<tt>--help</tt> option that shows the available command line options for your
+tool. Additionally, it does most of the basic correctness checking for
+you.</li>
+
+<li>Capable: The CommandLine library can handle lots of different forms of
+options often found in real programs. For example, <a
+href="#positional">positional</a> arguments, <tt>ls</tt> style <a
+href="#cl::Grouping">grouping</a> options (to allow processing '<tt>ls
+-lad</tt>' naturally), <tt>ld</tt> style <a href="#cl::Prefix">prefix</a>
+options (to parse '<tt>-lmalloc -L/usr/lib</tt>'), and <a
+href="#cl::ConsumeAfter">interpreter style options</a>.</li>
+
+</ol>
+
+<p>This document will hopefully let you jump in and start using CommandLine in
+your utility quickly and painlessly. Additionally it should be a simple
+reference manual to figure out how stuff works. If it is failing in some area
+(or you want an extension to the library), nag the author, <a
+href="mailto:sabre@nondot.org">Chris Lattner</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="quickstart">Quick Start Guide</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section of the manual runs through a simple CommandLine'ification of a
+basic compiler tool. This is intended to show you how to jump into using the
+CommandLine library in your own program, and show you some of the cool things it
+can do.</p>
+
+<p>To start out, you need to include the CommandLine header file into your
+program:</p>
+
+<div class="doc_code"><pre>
+ #include "llvm/Support/CommandLine.h"
+</pre></div>
+
+<p>Additionally, you need to add this as the first line of your main
+program:</p>
+
+<div class="doc_code"><pre>
+int main(int argc, char **argv) {
+ <a href="#cl::ParseCommandLineOptions">cl::ParseCommandLineOptions</a>(argc, argv);
+ ...
+}
+</pre></div>
+
+<p>... which actually parses the arguments and fills in the variable
+declarations.</p>
+
+<p>Now that you are ready to support command line arguments, we need to tell the
+system which ones we want, and what type of argument they are. The CommandLine
+library uses a declarative syntax to model command line arguments with the
+global variable declarations that capture the parsed values. This means that
+for every command line option that you would like to support, there should be a
+global variable declaration to capture the result. For example, in a compiler,
+we would like to support the unix standard '<tt>-o &lt;filename&gt;</tt>' option
+to specify where to put the output. With the CommandLine library, this is
+represented like this:</p>
+
+<a name="value_desc_example"></a>
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; OutputFilename("<i>o</i>", <a href="#cl::desc">cl::desc</a>("<i>Specify output filename</i>"), <a href="#cl::value_desc">cl::value_desc</a>("<i>filename</i>"));
+</pre></div>
+
+<p>This declares a global variable "<tt>OutputFilename</tt>" that is used to
+capture the result of the "<tt>o</tt>" argument (first parameter). We specify
+that this is a simple scalar option by using the "<tt><a
+href="#cl::opt">cl::opt</a></tt>" template (as opposed to the <a
+href="#list">"<tt>cl::list</tt> template</a>), and tell the CommandLine library
+that the data type that we are parsing is a string.</p>
+
+<p>The second and third parameters (which are optional) are used to specify what
+to output for the "<tt>--help</tt>" option. In this case, we get a line that
+looks like this:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options]
+
+OPTIONS:
+ -help - display available options (--help-hidden for more)
+ <b>-o &lt;filename&gt; - Specify output filename</b>
+</pre></div>
+
+<p>Because we specified that the command line option should parse using the
+<tt>string</tt> data type, the variable declared is automatically usable as a
+real string in all contexts that a normal C++ string object may be used. For
+example:</p>
+
+<div class="doc_code"><pre>
+ ...
+ ofstream Output(OutputFilename.c_str());
+ if (Out.good()) ...
+ ...
+</pre></div>
+
+<p>There are many different options that you can use to customize the command
+line option handling library, but the above example shows the general interface
+to these options. The options can be specified in any order, and are specified
+with helper functions like <a href="#cl::desc"><tt>cl::desc(...)</tt></a>, so
+there are no positional dependencies to remember. The available options are
+discussed in detail in the <a href="#referenceguide">Reference Guide</a>.</p>
+
+<p>Continuing the example, we would like to have our compiler take an input
+filename as well as an output filename, but we do not want the input filename to
+be specified with a hyphen (ie, not <tt>-filename.c</tt>). To support this
+style of argument, the CommandLine library allows for <a
+href="#positional">positional</a> arguments to be specified for the program.
+These positional arguments are filled with command line parameters that are not
+in option form. We use this feature like this:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; InputFilename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"), <a href="#cl::init">cl::init</a>("<i>-</i>"));
+</pre></div>
+
+<p>This declaration indicates that the first positional argument should be
+treated as the input filename. Here we use the <tt><a
+href="#cl::init">cl::init</a></tt> option to specify an initial value for the
+command line option, which is used if the option is not specified (if you do not
+specify a <tt><a href="#cl::init">cl::init</a></tt> modifier for an option, then
+the default constructor for the data type is used to initialize the value).
+Command line options default to being optional, so if we would like to require
+that the user always specify an input filename, we would add the <tt><a
+href="#cl::Required">cl::Required</a></tt> flag, and we could eliminate the
+<tt><a href="#cl::init">cl::init</a></tt> modifier, like this:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; InputFilename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"), <b><a href="#cl::Required">cl::Required</a></b>);
+</pre></div>
+
+<p>Again, the CommandLine library does not require the options to be specified
+in any particular order, so the above declaration is equivalent to:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; InputFilename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::Required">cl::Required</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"));
+</pre></div>
+
+<p>By simply adding the <tt><a href="#cl::Required">cl::Required</a></tt> flag,
+the CommandLine library will automatically issue an error if the argument is not
+specified, which shifts all of the command line option verification code out of
+your application into the library. This is just one example of how using flags
+can alter the default behaviour of the library, on a per-option basis. By
+adding one of the declarations above, the <tt>--help</tt> option synopsis is now
+extended to:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options] <b>&lt;input file&gt;</b>
+
+OPTIONS:
+ -help - display available options (--help-hidden for more)
+ -o &lt;filename&gt; - Specify output filename
+</pre></div>
+
+<p>... indicating that an input filename is expected.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="bool">Boolean Arguments</a>
+</div>
+
+<div class="doc_text">
+
+<p>In addition to input and output filenames, we would like the compiler example
+to support three boolean flags: "<tt>-f</tt>" to force overwriting of the output
+file, "<tt>--quiet</tt>" to enable quiet mode, and "<tt>-q</tt>" for backwards
+compatibility with some of our users. We can support these by declaring options
+of boolean type like this:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Force ("<i>f</i>", <a href="#cl::desc">cl::desc</a>("<i>Overwrite output files</i>"));
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet ("<i>quiet</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"));
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet2("<i>q</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"), <a href="#cl::Hidden">cl::Hidden</a>);
+</pre></div>
+
+<p>This does what you would expect: it declares three boolean variables
+("<tt>Force</tt>", "<tt>Quiet</tt>", and "<tt>Quiet2</tt>") to recognize these
+options. Note that the "<tt>-q</tt>" option is specified with the "<a
+href="#cl::Hidden"><tt>cl::Hidden</tt></a>" flag. This modifier prevents it
+from being shown by the standard "<tt>--help</tt>" output (note that it is still
+shown in the "<tt>--help-hidden</tt>" output).</p>
+
+<p>The CommandLine library uses a <a href="#builtinparsers">different parser</a>
+for different data types. For example, in the string case, the argument passed
+to the option is copied literally into the content of the string variable... we
+obviously cannot do that in the boolean case, however, so we must use a smarter
+parser. In the case of the boolean parser, it allows no options (in which case
+it assigns the value of true to the variable), or it allows the values
+"<tt>true</tt>" or "<tt>false</tt>" to be specified, allowing any of the
+following inputs:</p>
+
+<div class="doc_code"><pre>
+ compiler -f # No value, 'Force' == true
+ compiler -f=true # Value specified, 'Force' == true
+ compiler -f=TRUE # Value specified, 'Force' == true
+ compiler -f=FALSE # Value specified, 'Force' == false
+</pre></div>
+
+<p>... you get the idea. The <a href="#boolparser">bool parser</a> just turns
+the string values into boolean values, and rejects things like '<tt>compiler
+-f=foo</tt>'. Similarly, the <a href="#doubleparser">float</a>, <a
+href="#doubleparser">double</a>, and <a href="#intparser">int</a> parsers work
+like you would expect, using the '<tt>strtol</tt>' and '<tt>strtod</tt>' C
+library calls to parse the string value into the specified data type.</p>
+
+<p>With the declarations above, "<tt>compiler --help</tt>" emits this:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options] &lt;input file&gt;
+
+OPTIONS:
+ <b>-f - Overwrite output files</b>
+ -o - Override output filename
+ <b>-quiet - Don't print informational messages</b>
+ -help - display available options (--help-hidden for more)
+</pre></div>
+
+<p>and "<tt>opt --help-hidden</tt>" prints this:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options] &lt;input file&gt;
+
+OPTIONS:
+ -f - Overwrite output files
+ -o - Override output filename
+ <b>-q - Don't print informational messages</b>
+ -quiet - Don't print informational messages
+ -help - display available options (--help-hidden for more)
+</pre></div>
+
+<p>This brief example has shown you how to use the '<tt><a
+href="#cl::opt">cl::opt</a></tt>' class to parse simple scalar command line
+arguments. In addition to simple scalar arguments, the CommandLine library also
+provides primitives to support CommandLine option <a href="#alias">aliases</a>,
+and <a href="#list">lists</a> of options.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="alias">Argument Aliases</a>
+</div>
+
+<div class="doc_text">
+
+<p>So far, the example works well, except for the fact that we need to check the
+quiet condition like this now:</p>
+
+<div class="doc_code"><pre>
+...
+ if (!Quiet &amp;&amp; !Quiet2) printInformationalMessage(...);
+...
+</pre></div>
+
+<p>... which is a real pain! Instead of defining two values for the same
+condition, we can use the "<tt><a href="#cl::alias">cl::alias</a></tt>" class to make the "<tt>-q</tt>"
+option an <b>alias</b> for the "<tt>-quiet</tt>" option, instead of providing
+a value itself:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Force ("<i>f</i>", <a href="#cl::desc">cl::desc</a>("<i>Overwrite output files</i>"));
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet ("<i>quiet</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"));
+<a href="#cl::alias">cl::alias</a> QuietA("<i>q</i>", <a href="#cl::desc">cl::desc</a>("<i>Alias for -quiet</i>"), <a href="#cl::aliasopt">cl::aliasopt</a>(Quiet));
+</pre></div>
+
+<p>The third line (which is the only one we modified from above) defines a
+"<tt>-q</tt> alias that updates the "<tt>Quiet</tt>" variable (as specified by
+the <tt><a href="#cl::aliasopt">cl::aliasopt</a></tt> modifier) whenever it is
+specified. Because aliases do not hold state, the only thing the program has to
+query is the <tt>Quiet</tt> variable now. Another nice feature of aliases is
+that they automatically hide themselves from the <tt>-help</tt> output
+(although, again, they are still visible in the <tt>--help-hidden
+output</tt>).</p>
+
+<p>Now the application code can simply use:</p>
+
+<div class="doc_code"><pre>
+...
+ if (!Quiet) printInformationalMessage(...);
+...
+</pre></div>
+
+<p>... which is much nicer! The "<tt><a href="#cl::alias">cl::alias</a></tt>"
+can be used to specify an alternative name for any variable type, and has many
+uses.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="onealternative">Selecting an alternative from a set of
+ possibilities</a>
+</div>
+
+<div class="doc_text">
+
+<p>So far, we have seen how the CommandLine library handles builtin types like
+<tt>std::string</tt>, <tt>bool</tt> and <tt>int</tt>, but how does it handle
+things it doesn't know about, like enums or '<tt>int*</tt>'s?</p>
+
+<p>The answer is that it uses a table driven generic parser (unless you specify
+your own parser, as described in the <a href="#extensionguide">Extension
+Guide</a>). This parser maps literal strings to whatever type is required, and
+requires you to tell it what this mapping should be.</p>
+
+<p>Lets say that we would like to add four optimization levels to our
+optimizer, using the standard flags "<tt>-g</tt>", "<tt>-O0</tt>",
+"<tt>-O1</tt>", and "<tt>-O2</tt>". We could easily implement this with boolean
+options like above, but there are several problems with this strategy:</p>
+
+<ol>
+<li>A user could specify more than one of the options at a time, for example,
+"<tt>opt -O3 -O2</tt>". The CommandLine library would not be able to catch this
+erroneous input for us.</li>
+
+<li>We would have to test 4 different variables to see which ones are set.</li>
+
+<li>This doesn't map to the numeric levels that we want... so we cannot easily
+see if some level &gt;= "<tt>-O1</tt>" is enabled.</li>
+
+</ol>
+
+<p>To cope with these problems, we can use an enum value, and have the
+CommandLine library fill it in with the appropriate level directly, which is
+used like this:</p>
+
+<div class="doc_code"><pre>
+enum OptLevel {
+ g, O1, O2, O3
+};
+
+<a href="#cl::opt">cl::opt</a>&lt;OptLevel&gt; OptimizationLevel(<a href="#cl::desc">cl::desc</a>("<i>Choose optimization level:</i>"),
+ <a href="#cl::values">cl::values</a>(
+ clEnumVal(g , "<i>No optimizations, enable debugging</i>"),
+ clEnumVal(O1, "<i>Enable trivial optimizations</i>"),
+ clEnumVal(O2, "<i>Enable default optimizations</i>"),
+ clEnumVal(O3, "<i>Enable expensive optimizations</i>"),
+ clEnumValEnd));
+
+...
+ if (OptimizationLevel &gt;= O2) doPartialRedundancyElimination(...);
+...
+</pre></div>
+
+<p>This declaration defines a variable "<tt>OptimizationLevel</tt>" of the
+"<tt>OptLevel</tt>" enum type. This variable can be assigned any of the values
+that are listed in the declaration (Note that the declaration list must be
+terminated with the "<tt>clEnumValEnd</tt>" argument!). The CommandLine
+library enforces
+that the user can only specify one of the options, and it ensure that only valid
+enum values can be specified. The "<tt>clEnumVal</tt>" macros ensure that the
+command line arguments matched the enum values. With this option added, our
+help output now is:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options] &lt;input file&gt;
+
+OPTIONS:
+ <b>Choose optimization level:
+ -g - No optimizations, enable debugging
+ -O1 - Enable trivial optimizations
+ -O2 - Enable default optimizations
+ -O3 - Enable expensive optimizations</b>
+ -f - Overwrite output files
+ -help - display available options (--help-hidden for more)
+ -o &lt;filename&gt; - Specify output filename
+ -quiet - Don't print informational messages
+</pre></div>
+
+<p>In this case, it is sort of awkward that flag names correspond directly to
+enum names, because we probably don't want a enum definition named "<tt>g</tt>"
+in our program. Because of this, we can alternatively write this example like
+this:</p>
+
+<div class="doc_code"><pre>
+enum OptLevel {
+ Debug, O1, O2, O3
+};
+
+<a href="#cl::opt">cl::opt</a>&lt;OptLevel&gt; OptimizationLevel(<a href="#cl::desc">cl::desc</a>("<i>Choose optimization level:</i>"),
+ <a href="#cl::values">cl::values</a>(
+ clEnumValN(Debug, "g", "<i>No optimizations, enable debugging</i>"),
+ clEnumVal(O1 , "<i>Enable trivial optimizations</i>"),
+ clEnumVal(O2 , "<i>Enable default optimizations</i>"),
+ clEnumVal(O3 , "<i>Enable expensive optimizations</i>"),
+ clEnumValEnd));
+
+...
+ if (OptimizationLevel == Debug) outputDebugInfo(...);
+...
+</pre></div>
+
+<p>By using the "<tt>clEnumValN</tt>" macro instead of "<tt>clEnumVal</tt>", we
+can directly specify the name that the flag should get. In general a direct
+mapping is nice, but sometimes you can't or don't want to preserve the mapping,
+which is when you would use it.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="namedalternatives">Named Alternatives</a>
+</div>
+
+<div class="doc_text">
+
+<p>Another useful argument form is a named alternative style. We shall use this
+style in our compiler to specify different debug levels that can be used.
+Instead of each debug level being its own switch, we want to support the
+following options, of which only one can be specified at a time:
+"<tt>--debug-level=none</tt>", "<tt>--debug-level=quick</tt>",
+"<tt>--debug-level=detailed</tt>". To do this, we use the exact same format as
+our optimization level flags, but we also specify an option name. For this
+case, the code looks like this:</p>
+
+<div class="doc_code"><pre>
+enum DebugLev {
+ nodebuginfo, quick, detailed
+};
+
+// Enable Debug Options to be specified on the command line
+<a href="#cl::opt">cl::opt</a>&lt;DebugLev&gt; DebugLevel("<i>debug_level</i>", <a href="#cl::desc">cl::desc</a>("<i>Set the debugging level:</i>"),
+ <a href="#cl::values">cl::values</a>(
+ clEnumValN(nodebuginfo, "none", "<i>disable debug information</i>"),
+ clEnumVal(quick, "<i>enable quick debug information</i>"),
+ clEnumVal(detailed, "<i>enable detailed debug information</i>"),
+ clEnumValEnd));
+</pre></div>
+
+<p>This definition defines an enumerated command line variable of type "<tt>enum
+DebugLev</tt>", which works exactly the same way as before. The difference here
+is just the interface exposed to the user of your program and the help output by
+the "<tt>--help</tt>" option:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options] &lt;input file&gt;
+
+OPTIONS:
+ Choose optimization level:
+ -g - No optimizations, enable debugging
+ -O1 - Enable trivial optimizations
+ -O2 - Enable default optimizations
+ -O3 - Enable expensive optimizations
+ <b>-debug_level - Set the debugging level:
+ =none - disable debug information
+ =quick - enable quick debug information
+ =detailed - enable detailed debug information</b>
+ -f - Overwrite output files
+ -help - display available options (--help-hidden for more)
+ -o &lt;filename&gt; - Specify output filename
+ -quiet - Don't print informational messages
+</pre></div>
+
+<p>Again, the only structural difference between the debug level declaration and
+the optimization level declaration is that the debug level declaration includes
+an option name (<tt>"debug_level"</tt>), which automatically changes how the
+library processes the argument. The CommandLine library supports both forms so
+that you can choose the form most appropriate for your application.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="list">Parsing a list of options</a>
+</div>
+
+<div class="doc_text">
+
+<p>Now that we have the standard run of the mill argument types out of the way,
+lets get a little wild and crazy. Lets say that we want our optimizer to accept
+a <b>list</b> of optimizations to perform, allowing duplicates. For example, we
+might want to run: "<tt>compiler -dce -constprop -inline -dce -strip</tt>". In
+this case, the order of the arguments and the number of appearances is very
+important. This is what the "<tt><a href="#cl::list">cl::list</a></tt>"
+template is for. First, start by defining an enum of the optimizations that you
+would like to perform:</p>
+
+<div class="doc_code"><pre>
+enum Opts {
+ // 'inline' is a C++ keyword, so name it 'inlining'
+ dce, constprop, inlining, strip
+};
+</pre></div>
+
+<p>Then define your "<tt><a href="#cl::list">cl::list</a></tt>" variable:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::list">cl::list</a>&lt;Opts&gt; OptimizationList(<a href="#cl::desc">cl::desc</a>("<i>Available Optimizations:</i>"),
+ <a href="#cl::values">cl::values</a>(
+ clEnumVal(dce , "<i>Dead Code Elimination</i>"),
+ clEnumVal(constprop , "<i>Constant Propagation</i>"),
+ clEnumValN(inlining, "<i>inline</i>", "<i>Procedure Integration</i>"),
+ clEnumVal(strip , "<i>Strip Symbols</i>"),
+ clEnumValEnd));
+</pre></div>
+
+<p>This defines a variable that is conceptually of the type
+"<tt>std::vector&lt;enum Opts&gt;</tt>". Thus, you can access it with standard
+vector methods:</p>
+
+<div class="doc_code"><pre>
+ for (unsigned i = 0; i != OptimizationList.size(); ++i)
+ switch (OptimizationList[i])
+ ...
+</pre></div>
+
+<p>... to iterate through the list of options specified.</p>
+
+<p>Note that the "<tt><a href="#cl::list">cl::list</a></tt>" template is
+completely general and may be used with any data types or other arguments that
+you can use with the "<tt><a href="#cl::opt">cl::opt</a></tt>" template. One
+especially useful way to use a list is to capture all of the positional
+arguments together if there may be more than one specified. In the case of a
+linker, for example, the linker takes several '<tt>.o</tt>' files, and needs to
+capture them into a list. This is naturally specified as:</p>
+
+<div class="doc_code"><pre>
+...
+<a href="#cl::list">cl::list</a>&lt;std::string&gt; InputFilenames(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("&lt;Input files&gt;"), <a href="#cl::OneOrMore">cl::OneOrMore</a>);
+...
+</pre></div>
+
+<p>This variable works just like a "<tt>vector&lt;string&gt;</tt>" object. As
+such, accessing the list is simple, just like above. In this example, we used
+the <tt><a href="#cl::OneOrMore">cl::OneOrMore</a></tt> modifier to inform the
+CommandLine library that it is an error if the user does not specify any
+<tt>.o</tt> files on our command line. Again, this just reduces the amount of
+checking we have to do.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="bits">Collecting options as a set of flags</a>
+</div>
+
+<div class="doc_text">
+
+<p>Instead of collecting sets of options in a list, it is also possible to
+gather information for enum values in a <b>bit vector</b>. The represention used by
+the <a href="#bits"><tt>cl::bits</tt></a> class is an <tt>unsigned</tt>
+integer. An enum value is represented by a 0/1 in the enum's ordinal value bit
+position. 1 indicating that the enum was specified, 0 otherwise. As each
+specified value is parsed, the resulting enum's bit is set in the option's bit
+vector:</p>
+
+<div class="doc_code"><pre>
+ <i>bits</i> |= 1 << (unsigned)<i>enum</i>;
+</pre></div>
+
+<p>Options that are specified multiple times are redundant. Any instances after
+the first are discarded.</p>
+
+<p>Reworking the above list example, we could replace <a href="#list">
+<tt>cl::list</tt></a> with <a href="#bits"><tt>cl::bits</tt></a>:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::bits">cl::bits</a>&lt;Opts&gt; OptimizationBits(<a href="#cl::desc">cl::desc</a>("<i>Available Optimizations:</i>"),
+ <a href="#cl::values">cl::values</a>(
+ clEnumVal(dce , "<i>Dead Code Elimination</i>"),
+ clEnumVal(constprop , "<i>Constant Propagation</i>"),
+ clEnumValN(inlining, "<i>inline</i>", "<i>Procedure Integration</i>"),
+ clEnumVal(strip , "<i>Strip Symbols</i>"),
+ clEnumValEnd));
+</pre></div>
+
+<p>To test to see if <tt>constprop</tt> was specified, we can use the
+<tt>cl:bits::isSet</tt> function:</p>
+
+<div class="doc_code"><pre>
+ if (OptimizationBits.isSet(constprop)) {
+ ...
+ }
+</pre></div>
+
+<p>It's also possible to get the raw bit vector using the
+<tt>cl::bits::getBits</tt> function:</p>
+
+<div class="doc_code"><pre>
+ unsigned bits = OptimizationBits.getBits();
+</pre></div>
+
+<p>Finally, if external storage is used, then the location specified must be of
+<b>type</b> <tt>unsigned</tt>. In all other ways a <a
+href="#bits"><tt>cl::bits</tt></a> option is morally equivalent to a <a
+href="#list"> <tt>cl::list</tt></a> option.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="description">Adding freeform text to help output</a>
+</div>
+
+<div class="doc_text">
+
+<p>As our program grows and becomes more mature, we may decide to put summary
+information about what it does into the help output. The help output is styled
+to look similar to a Unix <tt>man</tt> page, providing concise information about
+a program. Unix <tt>man</tt> pages, however often have a description about what
+the program does. To add this to your CommandLine program, simply pass a third
+argument to the <a
+href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>
+call in main. This additional argument is then printed as the overview
+information for your program, allowing you to include any additional information
+that you want. For example:</p>
+
+<div class="doc_code"><pre>
+int main(int argc, char **argv) {
+ <a href="#cl::ParseCommandLineOptions">cl::ParseCommandLineOptions</a>(argc, argv, " CommandLine compiler example\n\n"
+ " This program blah blah blah...\n");
+ ...
+}
+</pre></div>
+
+<p>would yield the help output:</p>
+
+<div class="doc_code"><pre>
+<b>OVERVIEW: CommandLine compiler example
+
+ This program blah blah blah...</b>
+
+USAGE: compiler [options] &lt;input file&gt;
+
+OPTIONS:
+ ...
+ -help - display available options (--help-hidden for more)
+ -o &lt;filename&gt; - Specify output filename
+</pre></div>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="referenceguide">Reference Guide</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Now that you know the basics of how to use the CommandLine library, this
+section will give you the detailed information you need to tune how command line
+options work, as well as information on more "advanced" command line option
+processing capabilities.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="positional">Positional Arguments</a>
+</div>
+
+<div class="doc_text">
+
+<p>Positional arguments are those arguments that are not named, and are not
+specified with a hyphen. Positional arguments should be used when an option is
+specified by its position alone. For example, the standard Unix <tt>grep</tt>
+tool takes a regular expression argument, and an optional filename to search
+through (which defaults to standard input if a filename is not specified).
+Using the CommandLine library, this would be specified as:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; Regex (<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;regular expression&gt;</i>"), <a href="#cl::Required">cl::Required</a>);
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; Filename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"), <a href="#cl::init">cl::init</a>("<i>-</i>"));
+</pre></div>
+
+<p>Given these two option declarations, the <tt>--help</tt> output for our grep
+replacement would look like this:</p>
+
+<div class="doc_code"><pre>
+USAGE: spiffygrep [options] <b>&lt;regular expression&gt; &lt;input file&gt;</b>
+
+OPTIONS:
+ -help - display available options (--help-hidden for more)
+</pre></div>
+
+<p>... and the resultant program could be used just like the standard
+<tt>grep</tt> tool.</p>
+
+<p>Positional arguments are sorted by their order of construction. This means
+that command line options will be ordered according to how they are listed in a
+.cpp file, but will not have an ordering defined if the positional arguments
+are defined in multiple .cpp files. The fix for this problem is simply to
+define all of your positional arguments in one .cpp file.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="--">Specifying positional options with hyphens</a>
+</div>
+
+<div class="doc_text">
+
+<p>Sometimes you may want to specify a value to your positional argument that
+starts with a hyphen (for example, searching for '<tt>-foo</tt>' in a file). At
+first, you will have trouble doing this, because it will try to find an argument
+named '<tt>-foo</tt>', and will fail (and single quotes will not save you).
+Note that the system <tt>grep</tt> has the same problem:</p>
+
+<div class="doc_code"><pre>
+ $ spiffygrep '-foo' test.txt
+ Unknown command line argument '-foo'. Try: spiffygrep --help'
+
+ $ grep '-foo' test.txt
+ grep: illegal option -- f
+ grep: illegal option -- o
+ grep: illegal option -- o
+ Usage: grep -hblcnsviw pattern file . . .
+</pre></div>
+
+<p>The solution for this problem is the same for both your tool and the system
+version: use the '<tt>--</tt>' marker. When the user specifies '<tt>--</tt>' on
+the command line, it is telling the program that all options after the
+'<tt>--</tt>' should be treated as positional arguments, not options. Thus, we
+can use it like this:</p>
+
+<div class="doc_code"><pre>
+ $ spiffygrep -- -foo test.txt
+ ...output...
+</pre></div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="getPosition">Determining absolute position with getPosition()</a>
+</div>
+<div class="doc_text">
+ <p>Sometimes an option can affect or modify the meaning of another option. For
+ example, consider <tt>gcc</tt>'s <tt>-x LANG</tt> option. This tells
+ <tt>gcc</tt> to ignore the suffix of subsequent positional arguments and force
+ the file to be interpreted as if it contained source code in language
+ <tt>LANG</tt>. In order to handle this properly , you need to know the
+ absolute position of each argument, especially those in lists, so their
+ interaction(s) can be applied correctly. This is also useful for options like
+ <tt>-llibname</tt> which is actually a positional argument that starts with
+ a dash.</p>
+ <p>So, generally, the problem is that you have two <tt>cl::list</tt> variables
+ that interact in some way. To ensure the correct interaction, you can use the
+ <tt>cl::list::getPosition(optnum)</tt> method. This method returns the
+ absolute position (as found on the command line) of the <tt>optnum</tt>
+ item in the <tt>cl::list</tt>.</p>
+ <p>The idiom for usage is like this:</p>
+
+ <div class="doc_code"><pre>
+ static cl::list&lt;std::string&gt; Files(cl::Positional, cl::OneOrMore);
+ static cl::listlt;std::string&gt; Libraries("l", cl::ZeroOrMore);
+
+ int main(int argc, char**argv) {
+ // ...
+ std::vector&lt;std::string&gt;::iterator fileIt = Files.begin();
+ std::vector&lt;std::string&gt;::iterator libIt = Libraries.begin();
+ unsigned libPos = 0, filePos = 0;
+ while ( 1 ) {
+ if ( libIt != Libraries.end() )
+ libPos = Libraries.getPosition( libIt - Libraries.begin() );
+ else
+ libPos = 0;
+ if ( fileIt != Files.end() )
+ filePos = Files.getPosition( fileIt - Files.begin() );
+ else
+ filePos = 0;
+
+ if ( filePos != 0 &amp;&amp; (libPos == 0 || filePos &lt; libPos) ) {
+ // Source File Is next
+ ++fileIt;
+ }
+ else if ( libPos != 0 &amp;&amp; (filePos == 0 || libPos &lt; filePos) ) {
+ // Library is next
+ ++libIt;
+ }
+ else
+ break; // we're done with the list
+ }
+ }</pre></div>
+
+ <p>Note that, for compatibility reasons, the <tt>cl::opt</tt> also supports an
+ <tt>unsigned getPosition()</tt> option that will provide the absolute position
+ of that option. You can apply the same approach as above with a
+ <tt>cl::opt</tt> and a <tt>cl::list</tt> option as you can with two lists.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="cl::ConsumeAfter">The <tt>cl::ConsumeAfter</tt> modifier</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::ConsumeAfter</tt> <a href="#formatting">formatting option</a> is
+used to construct programs that use "interpreter style" option processing. With
+this style of option processing, all arguments specified after the last
+positional argument are treated as special interpreter arguments that are not
+interpreted by the command line argument.</p>
+
+<p>As a concrete example, lets say we are developing a replacement for the
+standard Unix Bourne shell (<tt>/bin/sh</tt>). To run <tt>/bin/sh</tt>, first
+you specify options to the shell itself (like <tt>-x</tt> which turns on trace
+output), then you specify the name of the script to run, then you specify
+arguments to the script. These arguments to the script are parsed by the bourne
+shell command line option processor, but are not interpreted as options to the
+shell itself. Using the CommandLine library, we would specify this as:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; Script(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input script&gt;</i>"), <a href="#cl::init">cl::init</a>("-"));
+<a href="#cl::list">cl::list</a>&lt;string&gt; Argv(<a href="#cl::ConsumeAfter">cl::ConsumeAfter</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;program arguments&gt;...</i>"));
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Trace("<i>x</i>", <a href="#cl::desc">cl::desc</a>("<i>Enable trace output</i>"));
+</pre></div>
+
+<p>which automatically provides the help output:</p>
+
+<div class="doc_code"><pre>
+USAGE: spiffysh [options] <b>&lt;input script&gt; &lt;program arguments&gt;...</b>
+
+OPTIONS:
+ -help - display available options (--help-hidden for more)
+ <b>-x - Enable trace output</b>
+</pre></div>
+
+<p>At runtime, if we run our new shell replacement as `<tt>spiffysh -x test.sh
+-a -x -y bar</tt>', the <tt>Trace</tt> variable will be set to true, the
+<tt>Script</tt> variable will be set to "<tt>test.sh</tt>", and the
+<tt>Argv</tt> list will contain <tt>["-a", "-x", "-y", "bar"]</tt>, because they
+were specified after the last positional argument (which is the script
+name).</p>
+
+<p>There are several limitations to when <tt>cl::ConsumeAfter</tt> options can
+be specified. For example, only one <tt>cl::ConsumeAfter</tt> can be specified
+per program, there must be at least one <a href="#positional">positional
+argument</a> specified, there must not be any <a href="#cl::list">cl::list</a>
+positional arguments, and the <tt>cl::ConsumeAfter</tt> option should be a <a
+href="#cl::list">cl::list</a> option.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="storage">Internal vs External Storage</a>
+</div>
+
+<div class="doc_text">
+
+<p>By default, all command line options automatically hold the value that they
+parse from the command line. This is very convenient in the common case,
+especially when combined with the ability to define command line options in the
+files that use them. This is called the internal storage model.</p>
+
+<p>Sometimes, however, it is nice to separate the command line option processing
+code from the storage of the value parsed. For example, lets say that we have a
+'<tt>-debug</tt>' option that we would like to use to enable debug information
+across the entire body of our program. In this case, the boolean value
+controlling the debug code should be globally accessable (in a header file, for
+example) yet the command line option processing code should not be exposed to
+all of these clients (requiring lots of .cpp files to #include
+<tt>CommandLine.h</tt>).</p>
+
+<p>To do this, set up your .h file with your option, like this for example:</p>
+
+<div class="doc_code">
+<pre>
+<i>// DebugFlag.h - Get access to the '-debug' command line option
+//
+
+// DebugFlag - This boolean is set to true if the '-debug' command line option
+// is specified. This should probably not be referenced directly, instead, use
+// the DEBUG macro below.
+//</i>
+extern bool DebugFlag;
+
+<i>// DEBUG macro - This macro should be used by code to emit debug information.
+// In the '-debug' option is specified on the command line, and if this is a
+// debug build, then the code specified as the option to the macro will be
+// executed. Otherwise it will not be. Example:
+//
+// DOUT &lt;&lt; "Bitset contains: " &lt;&lt; Bitset &lt;&lt; "\n";
+//</i>
+<span class="doc_hilite">#ifdef NDEBUG
+#define DEBUG(X)
+#else
+#define DEBUG(X)</span> do { if (DebugFlag) { X; } } while (0)
+<span class="doc_hilite">#endif</span>
+</pre>
+</div>
+
+<p>This allows clients to blissfully use the <tt>DEBUG()</tt> macro, or the
+<tt>DebugFlag</tt> explicitly if they want to. Now we just need to be able to
+set the <tt>DebugFlag</tt> boolean when the option is set. To do this, we pass
+an additial argument to our command line argument processor, and we specify
+where to fill in with the <a href="#cl::location">cl::location</a>
+attribute:</p>
+
+<div class="doc_code">
+<pre>
+bool DebugFlag; <i>// the actual value</i>
+static <a href="#cl::opt">cl::opt</a>&lt;bool, true&gt; <i>// The parser</i>
+Debug("<i>debug</i>", <a href="#cl::desc">cl::desc</a>("<i>Enable debug output</i>"), <a href="#cl::Hidden">cl::Hidden</a>, <a href="#cl::location">cl::location</a>(DebugFlag));
+</pre>
+</div>
+
+<p>In the above example, we specify "<tt>true</tt>" as the second argument to
+the <tt><a href="#cl::opt">cl::opt</a></tt> template, indicating that the
+template should not maintain a copy of the value itself. In addition to this,
+we specify the <tt><a href="#cl::location">cl::location</a></tt> attribute, so
+that <tt>DebugFlag</tt> is automatically set.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="attributes">Option Attributes</a>
+</div>
+
+<div class="doc_text">
+
+<p>This section describes the basic attributes that you can specify on
+options.</p>
+
+<ul>
+
+<li>The option name attribute (which is required for all options, except <a
+href="#positional">positional options</a>) specifies what the option name is.
+This option is specified in simple double quotes:
+
+<pre>
+<a href="#cl::opt">cl::opt</a>&lt;<b>bool</b>&gt; Quiet("<i>quiet</i>");
+</pre>
+
+</li>
+
+<li><a name="cl::desc">The <b><tt>cl::desc</tt></b></a> attribute specifies a
+description for the option to be shown in the <tt>--help</tt> output for the
+program.</li>
+
+<li><a name="cl::value_desc">The <b><tt>cl::value_desc</tt></b></a> attribute
+specifies a string that can be used to fine tune the <tt>--help</tt> output for
+a command line option. Look <a href="#value_desc_example">here</a> for an
+example.</li>
+
+<li><a name="cl::init">The <b><tt>cl::init</tt></b></a> attribute specifies an
+inital value for a <a href="#cl::opt">scalar</a> option. If this attribute is
+not specified then the command line option value defaults to the value created
+by the default constructor for the type. <b>Warning</b>: If you specify both
+<b><tt>cl::init</tt></b> and <b><tt>cl::location</tt></b> for an option,
+you must specify <b><tt>cl::location</tt></b> first, so that when the
+command-line parser sees <b><tt>cl::init</tt></b>, it knows where to put the
+initial value. (You will get an error at runtime if you don't put them in
+the right order.)</li>
+
+<li><a name="cl::location">The <b><tt>cl::location</tt></b></a> attribute where to
+store the value for a parsed command line option if using external storage. See
+the section on <a href="#storage">Internal vs External Storage</a> for more
+information.</li>
+
+<li><a name="cl::aliasopt">The <b><tt>cl::aliasopt</tt></b></a> attribute
+specifies which option a <tt><a href="#cl::alias">cl::alias</a></tt> option is
+an alias for.</li>
+
+<li><a name="cl::values">The <b><tt>cl::values</tt></b></a> attribute specifies
+the string-to-value mapping to be used by the generic parser. It takes a
+<b>clEnumValEnd terminated</b> list of (option, value, description) triplets
+that
+specify the option name, the value mapped to, and the description shown in the
+<tt>--help</tt> for the tool. Because the generic parser is used most
+frequently with enum values, two macros are often useful:
+
+<ol>
+
+<li><a name="clEnumVal">The <b><tt>clEnumVal</tt></b></a> macro is used as a
+nice simple way to specify a triplet for an enum. This macro automatically
+makes the option name be the same as the enum name. The first option to the
+macro is the enum, the second is the description for the command line
+option.</li>
+
+<li><a name="clEnumValN">The <b><tt>clEnumValN</tt></b></a> macro is used to
+specify macro options where the option name doesn't equal the enum name. For
+this macro, the first argument is the enum value, the second is the flag name,
+and the second is the description.</li>
+
+</ol>
+
+You will get a compile time error if you try to use cl::values with a parser
+that does not support it.</li>
+
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="modifiers">Option Modifiers</a>
+</div>
+
+<div class="doc_text">
+
+<p>Option modifiers are the flags and expressions that you pass into the
+constructors for <tt><a href="#cl::opt">cl::opt</a></tt> and <tt><a
+href="#cl::list">cl::list</a></tt>. These modifiers give you the ability to
+tweak how options are parsed and how <tt>--help</tt> output is generated to fit
+your application well.</p>
+
+<p>These options fall into five main catagories:</p>
+
+<ol>
+<li><a href="#hiding">Hiding an option from <tt>--help</tt> output</a></li>
+<li><a href="#numoccurrences">Controlling the number of occurrences
+ required and allowed</a></li>
+<li><a href="#valrequired">Controlling whether or not a value must be
+ specified</a></li>
+<li><a href="#formatting">Controlling other formatting options</a></li>
+<li><a href="#misc">Miscellaneous option modifiers</a></li>
+</ol>
+
+<p>It is not possible to specify two options from the same catagory (you'll get
+a runtime error) to a single option, except for options in the miscellaneous
+catagory. The CommandLine library specifies defaults for all of these settings
+that are the most useful in practice and the most common, which mean that you
+usually shouldn't have to worry about these.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="hiding">Hiding an option from <tt>--help</tt> output</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::NotHidden</tt>, <tt>cl::Hidden</tt>, and
+<tt>cl::ReallyHidden</tt> modifiers are used to control whether or not an option
+appears in the <tt>--help</tt> and <tt>--help-hidden</tt> output for the
+compiled program:</p>
+
+<ul>
+
+<li><a name="cl::NotHidden">The <b><tt>cl::NotHidden</tt></b></a> modifier
+(which is the default for <tt><a href="#cl::opt">cl::opt</a></tt> and <tt><a
+href="#cl::list">cl::list</a></tt> options), indicates the option is to appear
+in both help listings.</li>
+
+<li><a name="cl::Hidden">The <b><tt>cl::Hidden</tt></b></a> modifier (which is the
+default for <tt><a href="#cl::alias">cl::alias</a></tt> options), indicates that
+the option should not appear in the <tt>--help</tt> output, but should appear in
+the <tt>--help-hidden</tt> output.</li>
+
+<li><a name="cl::ReallyHidden">The <b><tt>cl::ReallyHidden</tt></b></a> modifier,
+indicates that the option should not appear in any help output.</li>
+
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="numoccurrences">Controlling the number of occurrences required and
+ allowed</a>
+</div>
+
+<div class="doc_text">
+
+<p>This group of options is used to control how many time an option is allowed
+(or required) to be specified on the command line of your program. Specifying a
+value for this setting allows the CommandLine library to do error checking for
+you.</p>
+
+<p>The allowed values for this option group are:</p>
+
+<ul>
+
+<li><a name="cl::Optional">The <b><tt>cl::Optional</tt></b></a> modifier (which
+is the default for the <tt><a href="#cl::opt">cl::opt</a></tt> and <tt><a
+href="#cl::alias">cl::alias</a></tt> classes) indicates that your program will
+allow either zero or one occurrence of the option to be specified.</li>
+
+<li><a name="cl::ZeroOrMore">The <b><tt>cl::ZeroOrMore</tt></b></a> modifier
+(which is the default for the <tt><a href="#cl::list">cl::list</a></tt> class)
+indicates that your program will allow the option to be specified zero or more
+times.</li>
+
+<li><a name="cl::Required">The <b><tt>cl::Required</tt></b></a> modifier
+indicates that the specified option must be specified exactly one time.</li>
+
+<li><a name="cl::OneOrMore">The <b><tt>cl::OneOrMore</tt></b></a> modifier
+indicates that the option must be specified at least one time.</li>
+
+<li>The <b><tt>cl::ConsumeAfter</tt></b> modifier is described in the <a
+href="#positional">Positional arguments section</a></li>
+
+</ul>
+
+<p>If an option is not specified, then the value of the option is equal to the
+value specified by the <tt><a href="#cl::init">cl::init</a></tt> attribute. If
+the <tt><a href="#cl::init">cl::init</a></tt> attribute is not specified, the
+option value is initialized with the default constructor for the data type.</p>
+
+<p>If an option is specified multiple times for an option of the <tt><a
+href="#cl::opt">cl::opt</a></tt> class, only the last value will be
+retained.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="valrequired">Controlling whether or not a value must be specified</a>
+</div>
+
+<div class="doc_text">
+
+<p>This group of options is used to control whether or not the option allows a
+value to be present. In the case of the CommandLine library, a value is either
+specified with an equal sign (e.g. '<tt>-index-depth=17</tt>') or as a trailing
+string (e.g. '<tt>-o a.out</tt>').</p>
+
+<p>The allowed values for this option group are:</p>
+
+<ul>
+
+<li><a name="cl::ValueOptional">The <b><tt>cl::ValueOptional</tt></b></a> modifier
+(which is the default for <tt>bool</tt> typed options) specifies that it is
+acceptable to have a value, or not. A boolean argument can be enabled just by
+appearing on the command line, or it can have an explicit '<tt>-foo=true</tt>'.
+If an option is specified with this mode, it is illegal for the value to be
+provided without the equal sign. Therefore '<tt>-foo true</tt>' is illegal. To
+get this behavior, you must use the <a
+href="#cl::ValueRequired">cl::ValueRequired</a> modifier.</li>
+
+<li><a name="cl::ValueRequired">The <b><tt>cl::ValueRequired</tt></b></a> modifier
+(which is the default for all other types except for <a
+href="#onealternative">unnamed alternatives using the generic parser</a>)
+specifies that a value must be provided. This mode informs the command line
+library that if an option is not provides with an equal sign, that the next
+argument provided must be the value. This allows things like '<tt>-o
+a.out</tt>' to work.</li>
+
+<li><a name="cl::ValueDisallowed">The <b><tt>cl::ValueDisallowed</tt></b></a>
+modifier (which is the default for <a href="#onealternative">unnamed
+alternatives using the generic parser</a>) indicates that it is a runtime error
+for the user to specify a value. This can be provided to disallow users from
+providing options to boolean options (like '<tt>-foo=true</tt>').</li>
+
+</ul>
+
+<p>In general, the default values for this option group work just like you would
+want them to. As mentioned above, you can specify the <a
+href="#cl::ValueDisallowed">cl::ValueDisallowed</a> modifier to a boolean
+argument to restrict your command line parser. These options are mostly useful
+when <a href="#extensionguide">extending the library</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="formatting">Controlling other formatting options</a>
+</div>
+
+<div class="doc_text">
+
+<p>The formatting option group is used to specify that the command line option
+has special abilities and is otherwise different from other command line
+arguments. As usual, you can only specify at most one of these arguments.</p>
+
+<ul>
+
+<li><a name="cl::NormalFormatting">The <b><tt>cl::NormalFormatting</tt></b></a>
+modifier (which is the default all options) specifies that this option is
+"normal".</li>
+
+<li><a name="cl::Positional">The <b><tt>cl::Positional</tt></b></a> modifier
+specifies that this is a positional argument, that does not have a command line
+option associated with it. See the <a href="#positional">Positional
+Arguments</a> section for more information.</li>
+
+<li>The <b><a href="#cl::ConsumeAfter"><tt>cl::ConsumeAfter</tt></a></b> modifier
+specifies that this option is used to capture "interpreter style" arguments. See <a href="#cl::ConsumeAfter">this section for more information</a>.</li>
+
+<li><a name="cl::Prefix">The <b><tt>cl::Prefix</tt></b></a> modifier specifies
+that this option prefixes its value. With 'Prefix' options, the equal sign does
+not separate the value from the option name specified. Instead, the value is
+everything after the prefix, including any equal sign if present. This is useful
+for processing odd arguments like <tt>-lmalloc</tt> and <tt>-L/usr/lib</tt> in a
+linker tool or <tt>-DNAME=value</tt> in a compiler tool. Here, the
+'<tt>l</tt>', '<tt>D</tt>' and '<tt>L</tt>' options are normal string (or list)
+options, that have the <b><tt><a href="#cl::Prefix">cl::Prefix</a></tt></b>
+modifier added to allow the CommandLine library to recognize them. Note that
+<b><tt><a href="#cl::Prefix">cl::Prefix</a></tt></b> options must not have the
+<b><tt><a href="#cl::ValueDisallowed">cl::ValueDisallowed</a></tt></b> modifier
+specified.</li>
+
+<li><a name="cl::Grouping">The <b><tt>cl::Grouping</tt></b></a> modifier is used
+to implement unix style tools (like <tt>ls</tt>) that have lots of single letter
+arguments, but only require a single dash. For example, the '<tt>ls -labF</tt>'
+command actually enables four different options, all of which are single
+letters. Note that <b><tt><a href="#cl::Grouping">cl::Grouping</a></tt></b>
+options cannot have values.</li>
+
+</ul>
+
+<p>The CommandLine library does not restrict how you use the <b><tt><a
+href="#cl::Prefix">cl::Prefix</a></tt></b> or <b><tt><a
+href="#cl::Grouping">cl::Grouping</a></tt></b> modifiers, but it is possible to
+specify ambiguous argument settings. Thus, it is possible to have multiple
+letter options that are prefix or grouping options, and they will still work as
+designed.</p>
+
+<p>To do this, the CommandLine library uses a greedy algorithm to parse the
+input option into (potentially multiple) prefix and grouping options. The
+strategy basically looks like this:</p>
+
+<div class="doc_code"><tt>parse(string OrigInput) {</tt>
+
+<ol>
+<li><tt>string input = OrigInput;</tt>
+<li><tt>if (isOption(input)) return getOption(input).parse();</tt>&nbsp;&nbsp;&nbsp;&nbsp;<i>// Normal option</i>
+<li><tt>while (!isOption(input) &amp;&amp; !input.empty()) input.pop_back();</tt>&nbsp;&nbsp;&nbsp;&nbsp;<i>// Remove the last letter</i>
+<li><tt>if (input.empty()) return error();</tt>&nbsp;&nbsp;&nbsp;&nbsp;<i>// No matching option</i>
+<li><tt>if (getOption(input).isPrefix())<br>
+&nbsp;&nbsp;return getOption(input).parse(input);</tt>
+<li><tt>while (!input.empty()) {&nbsp;&nbsp;&nbsp;&nbsp;<i>// Must be grouping options</i><br>
+&nbsp;&nbsp;getOption(input).parse();<br>
+&nbsp;&nbsp;OrigInput.erase(OrigInput.begin(), OrigInput.begin()+input.length());<br>
+&nbsp;&nbsp;input = OrigInput;<br>
+&nbsp;&nbsp;while (!isOption(input) &amp;&amp; !input.empty()) input.pop_back();<br>
+}</tt>
+<li><tt>if (!OrigInput.empty()) error();</tt></li>
+</ol>
+
+<p><tt>}</tt></p>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="misc">Miscellaneous option modifiers</a>
+</div>
+
+<div class="doc_text">
+
+<p>The miscellaneous option modifiers are the only flags where you can specify
+more than one flag from the set: they are not mutually exclusive. These flags
+specify boolean properties that modify the option.</p>
+
+<ul>
+
+<li><a name="cl::CommaSeparated">The <b><tt>cl::CommaSeparated</tt></b></a> modifier
+indicates that any commas specified for an option's value should be used to
+split the value up into multiple values for the option. For example, these two
+options are equivalent when <tt>cl::CommaSeparated</tt> is specified:
+"<tt>-foo=a -foo=b -foo=c</tt>" and "<tt>-foo=a,b,c</tt>". This option only
+makes sense to be used in a case where the option is allowed to accept one or
+more values (i.e. it is a <a href="#cl::list">cl::list</a> option).</li>
+
+<li><a name="cl::PositionalEatsArgs">The
+<b><tt>cl::PositionalEatsArgs</tt></b></a> modifier (which only applies to
+positional arguments, and only makes sense for lists) indicates that positional
+argument should consume any strings after it (including strings that start with
+a "-") up until another recognized positional argument. For example, if you
+have two "eating" positional arguments "<tt>pos1</tt>" and "<tt>pos2</tt>" the
+string "<tt>-pos1 -foo -bar baz -pos2 -bork</tt>" would cause the "<tt>-foo -bar
+-baz</tt>" strings to be applied to the "<tt>-pos1</tt>" option and the
+"<tt>-bork</tt>" string to be applied to the "<tt>-pos2</tt>" option.</li>
+
+</ul>
+
+<p>So far, these are the only two miscellaneous option modifiers.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="toplevel">Top-Level Classes and Functions</a>
+</div>
+
+<div class="doc_text">
+
+<p>Despite all of the built-in flexibility, the CommandLine option library
+really only consists of one function (<a
+href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>)
+and three main classes: <a href="#cl::opt"><tt>cl::opt</tt></a>, <a
+href="#cl::list"><tt>cl::list</tt></a>, and <a
+href="#cl::alias"><tt>cl::alias</tt></a>. This section describes these three
+classes in detail.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="cl::ParseCommandLineOptions">The <tt>cl::ParseCommandLineOptions</tt>
+ function</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::ParseCommandLineOptions</tt> function is designed to be called
+directly from <tt>main</tt>, and is used to fill in the values of all of the
+command line option variables once <tt>argc</tt> and <tt>argv</tt> are
+available.</p>
+
+<p>The <tt>cl::ParseCommandLineOptions</tt> function requires two parameters
+(<tt>argc</tt> and <tt>argv</tt>), but may also take an optional third parameter
+which holds <a href="#description">additional extra text</a> to emit when the
+<tt>--help</tt> option is invoked.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="cl::ParseEnvironmentOptions">The <tt>cl::ParseEnvironmentOptions</tt>
+ function</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::ParseEnvironmentOptions</tt> function has mostly the same effects
+as <a
+href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>,
+except that it is designed to take values for options from an environment
+variable, for those cases in which reading the command line is not convenient or
+not desired. It fills in the values of all the command line option variables
+just like <a
+href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>
+does.</p>
+
+<p>It takes three parameters: first, the name of the program (since
+<tt>argv</tt> may not be available, it can't just look in <tt>argv[0]</tt>),
+second, the name of the environment variable to examine, and third, the optional
+<a href="#description">additional extra text</a> to emit when the
+<tt>--help</tt> option is invoked.</p>
+
+<p><tt>cl::ParseEnvironmentOptions</tt> will break the environment
+variable's value up into words and then process them using
+<a href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>.
+<b>Note:</b> Currently <tt>cl::ParseEnvironmentOptions</tt> does not support
+quoting, so an environment variable containing <tt>-option "foo bar"</tt> will
+be parsed as three words, <tt>-option</tt>, <tt>"foo</tt>, and <tt>bar"</tt>,
+which is different from what you would get from the shell with the same
+input.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="cl::SetVersionPrinter">The <tt>cl::SetVersionPrinter</tt>
+ function</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::SetVersionPrinter</tt> function is designed to be called
+directly from <tt>main</tt>, and <i>before</i>
+<tt>cl::ParseCommandLineOptions</tt>. Its use is optional. It simply arranges
+for a function to be called in response to the <tt>--version</tt> option instead
+of having the <tt>CommandLine</tt> library print out the usual version string
+for LLVM. This is useful for programs that are not part of LLVM but wish to use
+the <tt>CommandLine</tt> facilities. Such programs should just define a small
+function that takes no arguments and returns <tt>void</tt> and that prints out
+whatever version information is appropriate for the program. Pass the address
+of that function to <tt>cl::SetVersionPrinter</tt> to arrange for it to be
+called when the <tt>--version</tt> option is given by the user.</p>
+
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="cl::opt">The <tt>cl::opt</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::opt</tt> class is the class used to represent scalar command line
+options, and is the one used most of the time. It is a templated class which
+can take up to three arguments (all except for the first have default values
+though):</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> cl {
+ <b>template</b> &lt;<b>class</b> DataType, <b>bool</b> ExternalStorage = <b>false</b>,
+ <b>class</b> ParserClass = parser&lt;DataType&gt; &gt;
+ <b>class</b> opt;
+}
+</pre></div>
+
+<p>The first template argument specifies what underlying data type the command
+line argument is, and is used to select a default parser implementation. The
+second template argument is used to specify whether the option should contain
+the storage for the option (the default) or whether external storage should be
+used to contain the value parsed for the option (see <a href="#storage">Internal
+vs External Storage</a> for more information).</p>
+
+<p>The third template argument specifies which parser to use. The default value
+selects an instantiation of the <tt>parser</tt> class based on the underlying
+data type of the option. In general, this default works well for most
+applications, so this option is only used when using a <a
+href="#customparser">custom parser</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="cl::list">The <tt>cl::list</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::list</tt> class is the class used to represent a list of command
+line options. It too is a templated class which can take up to three
+arguments:</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> cl {
+ <b>template</b> &lt;<b>class</b> DataType, <b>class</b> Storage = <b>bool</b>,
+ <b>class</b> ParserClass = parser&lt;DataType&gt; &gt;
+ <b>class</b> list;
+}
+</pre></div>
+
+<p>This class works the exact same as the <a
+href="#cl::opt"><tt>cl::opt</tt></a> class, except that the second argument is
+the <b>type</b> of the external storage, not a boolean value. For this class,
+the marker type '<tt>bool</tt>' is used to indicate that internal storage should
+be used.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="cl::bits">The <tt>cl::bits</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::bits</tt> class is the class used to represent a list of command
+line options in the form of a bit vector. It is also a templated class which
+can take up to three arguments:</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> cl {
+ <b>template</b> &lt;<b>class</b> DataType, <b>class</b> Storage = <b>bool</b>,
+ <b>class</b> ParserClass = parser&lt;DataType&gt; &gt;
+ <b>class</b> bits;
+}
+</pre></div>
+
+<p>This class works the exact same as the <a
+href="#cl::opt"><tt>cl::lists</tt></a> class, except that the second argument
+must be of <b>type</b> <tt>unsigned</tt> if external storage is used.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="cl::alias">The <tt>cl::alias</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::alias</tt> class is a nontemplated class that is used to form
+aliases for other arguments.</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> cl {
+ <b>class</b> alias;
+}
+</pre></div>
+
+<p>The <a href="#cl::aliasopt"><tt>cl::aliasopt</tt></a> attribute should be
+used to specify which option this is an alias for. Alias arguments default to
+being <a href="#cl::Hidden">Hidden</a>, and use the aliased options parser to do
+the conversion from string to data.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="cl::extrahelp">The <tt>cl::extrahelp</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::extrahelp</tt> class is a nontemplated class that allows extra
+help text to be printed out for the <tt>--help</tt> option.</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> cl {
+ <b>struct</b> extrahelp;
+}
+</pre></div>
+
+<p>To use the extrahelp, simply construct one with a <tt>const char*</tt>
+parameter to the constructor. The text passed to the constructor will be printed
+at the bottom of the help message, verbatim. Note that multiple
+<tt>cl::extrahelp</tt> <b>can</b> be used, but this practice is discouraged. If
+your tool needs to print additional help information, put all that help into a
+single <tt>cl::extrahelp</tt> instance.</p>
+<p>For example:</p>
+<div class="doc_code"><pre>
+ cl::extrahelp("\nADDITIONAL HELP:\n\n This is the extra help\n");
+</pre></div>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="builtinparsers">Builtin parsers</a>
+</div>
+
+<div class="doc_text">
+
+<p>Parsers control how the string value taken from the command line is
+translated into a typed value, suitable for use in a C++ program. By default,
+the CommandLine library uses an instance of <tt>parser&lt;type&gt;</tt> if the
+command line option specifies that it uses values of type '<tt>type</tt>'.
+Because of this, custom option processing is specified with specializations of
+the '<tt>parser</tt>' class.</p>
+
+<p>The CommandLine library provides the following builtin parser
+specializations, which are sufficient for most applications. It can, however,
+also be extended to work with new data types and new ways of interpreting the
+same data. See the <a href="#customparser">Writing a Custom Parser</a> for more
+details on this type of library extension.</p>
+
+<ul>
+
+<li><a name="genericparser">The <b>generic <tt>parser&lt;t&gt;</tt> parser</b></a>
+can be used to map strings values to any data type, through the use of the <a
+href="#cl::values">cl::values</a> property, which specifies the mapping
+information. The most common use of this parser is for parsing enum values,
+which allows you to use the CommandLine library for all of the error checking to
+make sure that only valid enum values are specified (as opposed to accepting
+arbitrary strings). Despite this, however, the generic parser class can be used
+for any data type.</li>
+
+<li><a name="boolparser">The <b><tt>parser&lt;bool&gt;</tt> specialization</b></a>
+is used to convert boolean strings to a boolean value. Currently accepted
+strings are "<tt>true</tt>", "<tt>TRUE</tt>", "<tt>True</tt>", "<tt>1</tt>",
+"<tt>false</tt>", "<tt>FALSE</tt>", "<tt>False</tt>", and "<tt>0</tt>".</li>
+
+<li><a name="boolOrDefaultparser">The <b><tt>parser&lt;boolOrDefault&gt;</tt>
+ specialization</b></a> is used for cases where the value is boolean,
+but we also need to know whether the option was specified at all. boolOrDefault
+is an enum with 3 values, BOU_UNSET, BOU_TRUE and BOU_FALSE. This parser accepts
+the same strings as <b><tt>parser&lt;bool&gt;</tt></b>.</li>
+
+<li><a name="stringparser">The <b><tt>parser&lt;string&gt;</tt>
+specialization</b></a> simply stores the parsed string into the string value
+specified. No conversion or modification of the data is performed.</li>
+
+<li><a name="intparser">The <b><tt>parser&lt;int&gt;</tt> specialization</b></a>
+uses the C <tt>strtol</tt> function to parse the string input. As such, it will
+accept a decimal number (with an optional '+' or '-' prefix) which must start
+with a non-zero digit. It accepts octal numbers, which are identified with a
+'<tt>0</tt>' prefix digit, and hexadecimal numbers with a prefix of
+'<tt>0x</tt>' or '<tt>0X</tt>'.</li>
+
+<li><a name="doubleparser">The <b><tt>parser&lt;double&gt;</tt></b></a> and
+<b><tt>parser&lt;float&gt;</tt> specializations</b> use the standard C
+<tt>strtod</tt> function to convert floating point strings into floating point
+values. As such, a broad range of string formats is supported, including
+exponential notation (ex: <tt>1.7e15</tt>) and properly supports locales.
+</li>
+
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="extensionguide">Extension Guide</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Although the CommandLine library has a lot of functionality built into it
+already (as discussed previously), one of its true strengths lie in its
+extensibility. This section discusses how the CommandLine library works under
+the covers and illustrates how to do some simple, common, extensions.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="customparser">Writing a custom parser</a>
+</div>
+
+<div class="doc_text">
+
+<p>One of the simplest and most common extensions is the use of a custom parser.
+As <a href="#builtinparsers">discussed previously</a>, parsers are the portion
+of the CommandLine library that turns string input from the user into a
+particular parsed data type, validating the input in the process.</p>
+
+<p>There are two ways to use a new parser:</p>
+
+<ol>
+
+<li>
+
+<p>Specialize the <a href="#genericparser"><tt>cl::parser</tt></a> template for
+your custom data type.<p>
+
+<p>This approach has the advantage that users of your custom data type will
+automatically use your custom parser whenever they define an option with a value
+type of your data type. The disadvantage of this approach is that it doesn't
+work if your fundamental data type is something that is already supported.</p>
+
+</li>
+
+<li>
+
+<p>Write an independent class, using it explicitly from options that need
+it.</p>
+
+<p>This approach works well in situations where you would line to parse an
+option using special syntax for a not-very-special data-type. The drawback of
+this approach is that users of your parser have to be aware that they are using
+your parser, instead of the builtin ones.</p>
+
+</li>
+
+</ol>
+
+<p>To guide the discussion, we will discuss a custom parser that accepts file
+sizes, specified with an optional unit after the numeric size. For example, we
+would like to parse "102kb", "41M", "1G" into the appropriate integer value. In
+this case, the underlying data type we want to parse into is
+'<tt>unsigned</tt>'. We choose approach #2 above because we don't want to make
+this the default for all <tt>unsigned</tt> options.</p>
+
+<p>To start out, we declare our new <tt>FileSizeParser</tt> class:</p>
+
+<div class="doc_code"><pre>
+<b>struct</b> FileSizeParser : <b>public</b> cl::basic_parser&lt;<b>unsigned</b>&gt; {
+ <i>// parse - Return true on error.</i>
+ <b>bool</b> parse(cl::Option &amp;O, <b>const char</b> *ArgName, <b>const</b> std::string &amp;ArgValue,
+ <b>unsigned</b> &amp;Val);
+};
+</pre></div>
+
+<p>Our new class inherits from the <tt>cl::basic_parser</tt> template class to
+fill in the default, boiler plate, code for us. We give it the data type that
+we parse into (the last argument to the <tt>parse</tt> method so that clients of
+our custom parser know what object type to pass in to the parse method (here we
+declare that we parse into '<tt>unsigned</tt>' variables.</p>
+
+<p>For most purposes, the only method that must be implemented in a custom
+parser is the <tt>parse</tt> method. The <tt>parse</tt> method is called
+whenever the option is invoked, passing in the option itself, the option name,
+the string to parse, and a reference to a return value. If the string to parse
+is not well formed, the parser should output an error message and return true.
+Otherwise it should return false and set '<tt>Val</tt>' to the parsed value. In
+our example, we implement <tt>parse</tt> as:</p>
+
+<div class="doc_code"><pre>
+<b>bool</b> FileSizeParser::parse(cl::Option &amp;O, <b>const char</b> *ArgName,
+ <b>const</b> std::string &amp;Arg, <b>unsigned</b> &amp;Val) {
+ <b>const char</b> *ArgStart = Arg.c_str();
+ <b>char</b> *End;
+
+ <i>// Parse integer part, leaving 'End' pointing to the first non-integer char</i>
+ Val = (unsigned)strtol(ArgStart, &amp;End, 0);
+
+ <b>while</b> (1) {
+ <b>switch</b> (*End++) {
+ <b>case</b> 0: <b>return</b> false; <i>// No error</i>
+ <b>case</b> 'i': <i>// Ignore the 'i' in KiB if people use that</i>
+ <b>case</b> 'b': <b>case</b> 'B': <i>// Ignore B suffix</i>
+ <b>break</b>;
+
+ <b>case</b> 'g': <b>case</b> 'G': Val *= 1024*1024*1024; <b>break</b>;
+ <b>case</b> 'm': <b>case</b> 'M': Val *= 1024*1024; <b>break</b>;
+ <b>case</b> 'k': <b>case</b> 'K': Val *= 1024; <b>break</b>;
+
+ default:
+ <i>// Print an error message if unrecognized character!</i>
+ <b>return</b> O.error(": '" + Arg + "' value invalid for file size argument!");
+ }
+ }
+}
+</pre></div>
+
+<p>This function implements a very simple parser for the kinds of strings we are
+interested in. Although it has some holes (it allows "<tt>123KKK</tt>" for
+example), it is good enough for this example. Note that we use the option
+itself to print out the error message (the <tt>error</tt> method always returns
+true) in order to get a nice error message (shown below). Now that we have our
+parser class, we can use it like this:</p>
+
+<div class="doc_code"><pre>
+<b>static</b> <a href="#cl::opt">cl::opt</a>&lt;<b>unsigned</b>, <b>false</b>, FileSizeParser&gt;
+MFS(<i>"max-file-size"</i>, <a href="#cl::desc">cl::desc</a>(<i>"Maximum file size to accept"</i>),
+ <a href="#cl::value_desc">cl::value_desc</a>("<i>size</i>"));
+</pre></div>
+
+<p>Which adds this to the output of our program:</p>
+
+<div class="doc_code"><pre>
+OPTIONS:
+ -help - display available options (--help-hidden for more)
+ ...
+ <b>-max-file-size=&lt;size&gt; - Maximum file size to accept</b>
+</pre></div>
+
+<p>And we can test that our parse works correctly now (the test program just
+prints out the max-file-size argument value):</p>
+
+<div class="doc_code"><pre>
+$ ./test
+MFS: 0
+$ ./test -max-file-size=123MB
+MFS: 128974848
+$ ./test -max-file-size=3G
+MFS: 3221225472
+$ ./test -max-file-size=dog
+-max-file-size option: 'dog' value invalid for file size argument!
+</pre></div>
+
+<p>It looks like it works. The error message that we get is nice and helpful,
+and we seem to accept reasonable file sizes. This wraps up the "custom parser"
+tutorial.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="explotingexternal">Exploiting external storage</a>
+</div>
+
+<div class="doc_text">
+ <p>Several of the LLVM libraries define static <tt>cl::opt</tt> instances that
+ will automatically be included in any program that links with that library.
+ This is a feature. However, sometimes it is necessary to know the value of the
+ command line option outside of the library. In these cases the library does or
+ should provide an external storage location that is accessible to users of the
+ library. Examples of this include the <tt>llvm::DebugFlag</tt> exported by the
+ <tt>lib/Support/Debug.cpp</tt> file and the <tt>llvm::TimePassesIsEnabled</tt>
+ flag exported by the <tt>lib/VMCore/Pass.cpp</tt> file.</p>
+
+<p>TODO: complete this section</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="dynamicopts">Dynamically adding command line options</a>
+</div>
+
+<div class="doc_text">
+
+<p>TODO: fill in this section</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/CompilerDriver.html b/docs/CompilerDriver.html
new file mode 100644
index 0000000..253f471
--- /dev/null
+++ b/docs/CompilerDriver.html
@@ -0,0 +1,823 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <title>The LLVM Compiler Driver (llvmc)</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+ <meta name="author" content="Reid Spencer">
+ <meta name="description"
+ content="A description of the use and design of the LLVM Compiler Driver.">
+</head>
+<body>
+<div class="doc_title">The LLVM Compiler Driver (llvmc)</div>
+<p class="doc_warning">NOTE: This document is a work in progress!</p>
+<ol>
+ <li><a href="#abstract">Abstract</a></li>
+ <li><a href="#introduction">Introduction</a>
+ <ol>
+ <li><a href="#purpose">Purpose</a></li>
+ <li><a href="#operation">Operation</a></li>
+ <li><a href="#phases">Phases</a></li>
+ <li><a href="#actions">Actions</a></li>
+ </ol>
+ </li>
+ <li><a href="#configuration">Configuration</a>
+ <ol>
+ <li><a href="#overview">Overview</a></li>
+ <li><a href="#filetypes">Configuration Files</a></li>
+ <li><a href="#syntax">Syntax</a></li>
+ <li><a href="#substitutions">Substitutions</a></li>
+ <li><a href="#sample">Sample Config File</a></li>
+ </ol>
+ <li><a href="#glossary">Glossary</a>
+</ol>
+<div class="doc_author">
+<p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>
+</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="abstract">Abstract</a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+ <p>This document describes the requirements, design, and configuration of the
+ LLVM compiler driver, <tt>llvmc</tt>. The compiler driver knows about LLVM's
+ tool set and can be configured to know about a variety of compilers for
+ source languages. It uses this knowledge to execute the tools necessary
+ to accomplish general compilation, optimization, and linking tasks. The main
+ purpose of <tt>llvmc</tt> is to provide a simple and consistent interface to
+ all compilation tasks. This reduces the burden on the end user who can just
+ learn to use <tt>llvmc</tt> instead of the entire LLVM tool set and all the
+ source language compilers compatible with LLVM.</p>
+</div>
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="introduction">Introduction</a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+ <p>The <tt>llvmc</tt> <a href="#def_tool">tool</a> is a configurable compiler
+ <a href="#def_driver">driver</a>. As such, it isn't a compiler, optimizer,
+ or a linker itself but it drives (invokes) other software that perform those
+ tasks. If you are familiar with the GNU Compiler Collection's <tt>gcc</tt>
+ tool, <tt>llvmc</tt> is very similar.</p>
+ <p>The following introductory sections will help you understand why this tool
+ is necessary and what it does.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="purpose">Purpose</a></div>
+<div class="doc_text">
+ <p><tt>llvmc</tt> was invented to make compilation of user programs with
+ LLVM-based tools easier. To accomplish this, <tt>llvmc</tt> strives to:</p>
+ <ul>
+ <li>Be the single point of access to most of the LLVM tool set.</li>
+ <li>Hide the complexities of the LLVM tools through a single interface.</li>
+ <li>Provide a consistent interface for compiling all languages.</li>
+ </ul>
+ <p>Additionally, <tt>llvmc</tt> makes it easier to write a compiler for use
+ with LLVM, because it:</p>
+ <ul>
+ <li>Makes integration of existing non-LLVM tools simple.</li>
+ <li>Extends the capabilities of minimal compiler tools by optimizing their
+ output.</li>
+ <li>Reduces the number of interfaces a compiler writer must know about
+ before a working compiler can be completed (essentially only the VMCore
+ interfaces need to be understood).</li>
+ <li>Supports source language translator invocation via both dynamically
+ loadable shared objects and invocation of an executable.</li>
+ </ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="operation">Operation</a></div>
+<div class="doc_text">
+ <p>At a high level, <tt>llvmc</tt> operation is very simple. The basic action
+ taken by <tt>llvmc</tt> is to simply invoke some tool or set of tools to fill
+ the user's request for compilation. Every execution of <tt>llvmc</tt>takes the
+ following sequence of steps:</p>
+ <dl>
+ <dt><b>Collect Command Line Options</b></dt>
+ <dd>The command line options provide the marching orders to <tt>llvmc</tt>
+ on what actions it should perform. This is the request the user is making
+ of <tt>llvmc</tt> and it is interpreted first. See the <tt>llvmc</tt>
+ <a href="CommandGuide/html/llvmc.html">manual page</a> for details on the
+ options.</dd>
+ <dt><b>Read Configuration Files</b></dt>
+ <dd>Based on the options and the suffixes of the filenames presented, a set
+ of configuration files are read to configure the actions <tt>llvmc</tt> will
+ take. Configuration files are provided by either LLVM or the
+ compiler tools that <tt>llvmc</tt> invokes. These files determine what
+ actions <tt>llvmc</tt> will take in response to the user's request. See
+ the section on <a href="#configuration">configuration</a> for more details.
+ </dd>
+ <dt><b>Determine Phases To Execute</b></dt>
+ <dd>Based on the command line options and configuration files,
+ <tt>llvmc</tt> determines the compilation <a href="#phases">phases</a> that
+ must be executed by the user's request. This is the primary work of
+ <tt>llvmc</tt>.</dd>
+ <dt><b>Determine Actions To Execute</b></dt>
+ <dd>Each <a href="#phases">phase</a> to be executed can result in the
+ invocation of one or more <a href="#actions">actions</a>. An action is
+ either a whole program or a function in a dynamically linked shared library.
+ In this step, <tt>llvmc</tt> determines the sequence of actions that must be
+ executed. Actions will always be executed in a deterministic order.</dd>
+ <dt><b>Execute Actions</b></dt>
+ <dd>The <a href="#actions">actions</a> necessary to support the user's
+ original request are executed sequentially and deterministically. All
+ actions result in either the invocation of a whole program to perform the
+ action or the loading of a dynamically linkable shared library and invocation
+ of a standard interface function within that library.</dd>
+ <dt><b>Termination</b></dt>
+ <dd>If any action fails (returns a non-zero result code), <tt>llvmc</tt>
+ also fails and returns the result code from the failing action. If
+ everything succeeds, <tt>llvmc</tt> will return a zero result code.</dd>
+ </dl>
+ <p><tt>llvmc</tt>'s operation must be simple, regular and predictable.
+ Developers need to be able to rely on it to take a consistent approach to
+ compilation. For example, the invocation:</p>
+ <code>
+ llvmc -O2 x.c y.c z.c -o xyz</code>
+ <p>must produce <i>exactly</i> the same results as:</p>
+ <pre><tt>
+ llvmc -O2 x.c -o x.o
+ llvmc -O2 y.c -o y.o
+ llvmc -O2 z.c -o z.o
+ llvmc -O2 x.o y.o z.o -o xyz</tt></pre>
+ <p>To accomplish this, <tt>llvmc</tt> uses a very simple goal oriented
+ procedure to do its work. The overall goal is to produce a functioning
+ executable. To accomplish this, <tt>llvmc</tt> always attempts to execute a
+ series of compilation <a href="#def_phase">phases</a> in the same sequence.
+ However, the user's options to <tt>llvmc</tt> can cause the sequence of phases
+ to start in the middle or finish early.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="phases"></a>Phases </div>
+<div class="doc_text">
+ <p><tt>llvmc</tt> breaks every compilation task into the following five
+ distinct phases:</p>
+ <dl><dt><b>Preprocessing</b></dt><dd>Not all languages support preprocessing;
+ but for those that do, this phase can be invoked. This phase is for
+ languages that provide combining, filtering, or otherwise altering with the
+ source language input before the translator parses it. Although C and C++
+ are the most common users of this phase, other languages may provide their
+ own preprocessor (whether its the C pre-processor or not).</dd>
+ </dl>
+ <dl><dt><b>Translation</b></dt><dd>The translation phase converts the source
+ language input into something that LLVM can interpret and use for
+ downstream phases. The translation is essentially from "non-LLVM form" to
+ "LLVM form".</dd>
+ </dl>
+ <dl><dt><b>Optimization</b></dt><dd>Once an LLVM Module has been obtained from
+ the translation phase, the program enters the optimization phase. This phase
+ attempts to optimize all of the input provided on the command line according
+ to the options provided.</dd>
+ </dl>
+ <dl><dt><b>Linking</b></dt><dd>The inputs are combined to form a complete
+ program.</dd>
+ </dl>
+ <p>The following table shows the inputs, outputs, and command line options
+ applicable to each phase.</p>
+ <table>
+ <tr>
+ <th style="width: 10%">Phase</th>
+ <th style="width: 25%">Inputs</th>
+ <th style="width: 25%">Outputs</th>
+ <th style="width: 40%">Options</th>
+ </tr>
+ <tr><td><b>Preprocessing</b></td>
+ <td class="td_left"><ul><li>Source Language File</li></ul></td>
+ <td class="td_left"><ul><li>Source Language File</li></ul></td>
+ <td class="td_left"><dl>
+ <dt><tt>-E</tt></dt>
+ <dd>Stops the compilation after preprocessing</dd>
+ </dl></td>
+ </tr>
+ <tr>
+ <td><b>Translation</b></td>
+ <td class="td_left"><ul>
+ <li>Source Language File</li>
+ </ul></td>
+ <td class="td_left"><ul>
+ <li>LLVM Assembly</li>
+ <li>LLVM Bitcode</li>
+ <li>LLVM C++ IR</li>
+ </ul></td>
+ <td class="td_left"><dl>
+ <dt><tt>-c</tt></dt>
+ <dd>Stops the compilation after translation so that optimization and
+ linking are not done.</dd>
+ <dt><tt>-S</tt></dt>
+ <dd>Stops the compilation before object code is written so that only
+ assembly code remains.</dd>
+ </dl></td>
+ </tr>
+ <tr>
+ <td><b>Optimization</b></td>
+ <td class="td_left"><ul>
+ <li>LLVM Assembly</li>
+ <li>LLVM Bitcode</li>
+ </ul></td>
+ <td class="td_left"><ul>
+ <li>LLVM Bitcode</li>
+ </ul></td>
+ <td class="td_left"><dl>
+ <dt><tt>-Ox</tt>
+ <dd>This group of options controls the amount of optimization
+ performed.</dd>
+ </dl></td>
+ </tr>
+ <tr>
+ <td><b>Linking</b></td>
+ <td class="td_left"><ul>
+ <li>LLVM Bitcode</li>
+ <li>Native Object Code</li>
+ <li>LLVM Library</li>
+ <li>Native Library</li>
+ </ul></td>
+ <td class="td_left"><ul>
+ <li>LLVM Bitcode Executable</li>
+ <li>Native Executable</li>
+ </ul></td>
+ <td class="td_left"><dl>
+ <dt><tt>-L</tt></dt><dd>Specifies a path for library search.</dd>
+ <dt><tt>-l</tt></dt><dd>Specifies a library to link in.</dd>
+ </dl></td>
+ </tr>
+ </table>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="actions"></a>Actions</div>
+<div class="doc_text">
+ <p>An action, with regard to <tt>llvmc</tt> is a basic operation that it takes
+ in order to fulfill the user's request. Each phase of compilation will invoke
+ zero or more actions in order to accomplish that phase.</p>
+ <p>Actions come in two forms:</p>
+ <ul>
+ <li>Invokable Executables</li>
+ <li>Functions in a shared library</li>
+ </ul>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="configuration">Configuration</a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+ <p>This section of the document describes the configuration files used by
+ <tt>llvmc</tt>. Configuration information is relatively static for a
+ given release of LLVM and a compiler tool. However, the details may
+ change from release to release of either. Users are encouraged to simply use
+ the various options of the <tt>llvmc</tt> command and ignore the configuration
+ of the tool. These configuration files are for compiler writers and LLVM
+ developers. Those wishing to simply use <tt>llvmc</tt> don't need to understand
+ this section but it may be instructive on how the tool works.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="overview"></a>Overview</div>
+<div class="doc_text">
+<p><tt>llvmc</tt> is highly configurable both on the command line and in
+configuration files. The options it understands are generic, consistent and
+simple by design. Furthermore, the <tt>llvmc</tt> options apply to the
+compilation of any LLVM enabled programming language. To be enabled as a
+supported source language compiler, a compiler writer must provide a
+configuration file that tells <tt>llvmc</tt> how to invoke the compiler
+and what its capabilities are. The purpose of the configuration files then
+is to allow compiler writers to specify to <tt>llvmc</tt> how the compiler
+should be invoked. Users may but are not advised to alter the compiler's
+<tt>llvmc</tt> configuration.</p>
+
+<p>Because <tt>llvmc</tt> just invokes other programs, it must deal with the
+available command line options for those programs regardless of whether they
+were written for LLVM or not. Furthermore, not all compiler tools will
+have the same capabilities. Some compiler tools will simply generate LLVM assembly
+code, others will be able to generate fully optimized bitcode. In general,
+<tt>llvmc</tt> doesn't make any assumptions about the capabilities or command
+line options of a sub-tool. It simply uses the details found in the
+configuration files and leaves it to the compiler writer to specify the
+configuration correctly.</p>
+
+<p>This approach means that new compiler tools can be up and working very
+quickly. As a first cut, a tool can simply compile its source to raw
+(unoptimized) bitcode or LLVM assembly and <tt>llvmc</tt> can be configured
+to pick up the slack (translate LLVM assembly to bitcode, optimize the
+bitcode, generate native assembly, link, etc.). In fact, the compiler tools
+need not use any LLVM libraries, and it could be written in any language
+(instead of C++). The configuration data will allow the full range of
+optimization, assembly, and linking capabilities that LLVM provides to be added
+to these kinds of tools. Enabling the rapid development of front-ends is one
+of the primary goals of <tt>llvmc</tt>.</p>
+
+<p>As a compiler tool matures, it may utilize the LLVM libraries and tools
+to more efficiently produce optimized bitcode directly in a single compilation
+and optimization program. In these cases, multiple tools would not be needed
+and the configuration data for the compiler would change.</p>
+
+<p>Configuring <tt>llvmc</tt> to the needs and capabilities of a source language
+compiler is relatively straight-forward. A compiler writer must provide a
+definition of what to do for each of the five compilation phases for each of
+the optimization levels. The specification consists simply of prototypical
+command lines into which <tt>llvmc</tt> can substitute command line
+arguments and file names. Note that any given phase can be completely blank if
+the source language's compiler combines multiple phases into a single program.
+For example, quite often pre-processing, translation, and optimization are
+combined into a single program. The specification for such a compiler would have
+blank entries for pre-processing and translation but a full command line for
+optimization.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="filetypes">Configuration Files</a></div>
+<div class="doc_subsubsection"><a name="filecontents">File Contents</a></div>
+<div class="doc_text">
+ <p>Each configuration file provides the details for a single source language
+ that is to be compiled. This configuration information tells <tt>llvmc</tt>
+ how to invoke the language's pre-processor, translator, optimizer, assembler
+ and linker. Note that a given source language needn't provide all these tools
+ as many of them exist in llvm currently.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="dirsearch">Directory Search</a></div>
+<div class="doc_text">
+ <p><tt>llvmc</tt> always looks for files of a specific name. It uses the
+ first file with the name its looking for by searching directories in the
+ following order:<br/>
+ <ol>
+ <li>Any directory specified by the <tt>-config-dir</tt> option will be
+ checked first.</li>
+ <li>If the environment variable LLVM_CONFIG_DIR is set, and it contains
+ the name of a valid directory, that directory will be searched next.</li>
+ <li>If the user's home directory (typically <tt>/home/user</tt> contains
+ a sub-directory named <tt>.llvm</tt> and that directory contains a
+ sub-directory named <tt>etc</tt> then that directory will be tried
+ next.</li>
+ <li>If the LLVM installation directory (typically <tt>/usr/local/llvm</tt>
+ contains a sub-directory named <tt>etc</tt> then that directory will be
+ tried last.</li>
+ <li>A standard "system" directory will be searched next. This is typically
+ <tt>/etc/llvm</tt> on UNIX&trade; and <tt>C:\WINNT</tt> on Microsoft
+ Windows&trade;.</li>
+ <li>If the configuration file sought still can't be found, <tt>llvmc</tt>
+ will print an error message and exit.</li>
+ </ol>
+ <p>The first file found in this search will be used. Other files with the
+ same name will be ignored even if they exist in one of the subsequent search
+ locations.</p>
+</div>
+
+<div class="doc_subsubsection"><a name="filenames">File Names</a></div>
+<div class="doc_text">
+ <p>In the directories searched, each configuration file is given a specific
+ name to foster faster lookup (so llvmc doesn't have to do directory searches).
+ The name of a given language specific configuration file is simply the same
+ as the suffix used to identify files containing source in that language.
+ For example, a configuration file for C++ source might be named
+ <tt>cpp</tt>, <tt>C</tt>, or <tt>cxx</tt>. For languages that support multiple
+ file suffixes, multiple (probably identical) files (or symbolic links) will
+ need to be provided.</p>
+</div>
+
+<div class="doc_subsubsection"><a name="whatgetsread">What Gets Read</a></div>
+<div class="doc_text">
+ <p>Which configuration files are read depends on the command line options and
+ the suffixes of the file names provided on <tt>llvmc</tt>'s command line. Note
+ that the <tt>-x LANGUAGE</tt> option alters the language that <tt>llvmc</tt>
+ uses for the subsequent files on the command line. Only the configuration
+ files actually needed to complete <tt>llvmc</tt>'s task are read. Other
+ language specific files will be ignored.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="syntax"></a>Syntax</div>
+<div class="doc_text">
+ <p>The syntax of the configuration files is very simple and somewhat
+ compatible with Java's property files. Here are the syntax rules:</p>
+ <ul>
+ <li>The file encoding is ASCII.</li>
+ <li>The file is line oriented. There should be one configuration definition
+ per line. Lines are terminated by the newline (0x0A) and/or carriage return
+ characters (0x0D)</li>
+ <li>A backslash (<tt>\</tt>) before a newline causes the newline to be
+ ignored. This is useful for line continuation of long definitions. A
+ backslash anywhere else is recognized as a backslash.</li>
+ <li>A configuration item consists of a name, an <tt>=</tt> and a value.</li>
+ <li>A name consists of a sequence of identifiers separated by period.</li>
+ <li>An identifier consists of specific keywords made up of only lower case
+ and upper case letters (e.g. <tt>lang.name</tt>).</li>
+ <li>Values come in four flavors: booleans, integers, commands and
+ strings.</li>
+ <li>Valid "false" boolean values are <tt>false False FALSE no No NO
+ off Off</tt> and <tt>OFF</tt>.</li>
+ <li>Valid "true" boolean values are <tt>true True TRUE yes Yes YES
+ on On</tt> and <tt>ON</tt>.</li>
+ <li>Integers are simply sequences of digits.</li>
+ <li>Commands start with a program name and are followed by a sequence of
+ words that are passed to that program as command line arguments. Program
+ arguments that begin and end with the <tt>%</tt> sign will have their value
+ substituted. Program names beginning with <tt>/</tt> are considered to be
+ absolute. Otherwise the <tt>PATH</tt> will be applied to find the program to
+ execute.</li>
+ <li>Strings are composed of multiple sequences of characters from the
+ character class <tt>[-A-Za-z0-9_:%+/\\|,]</tt> separated by white
+ space.</li>
+ <li>White space on a line is folded. Multiple blanks or tabs will be
+ reduced to a single blank.</li>
+ <li>White space before the configuration item's name is ignored.</li>
+ <li>White space on either side of the <tt>=</tt> is ignored.</li>
+ <li>White space in a string value is used to separate the individual
+ components of the string value but otherwise ignored.</li>
+ <li>Comments are introduced by the <tt>#</tt> character. Everything after a
+ <tt>#</tt> and before the end of line is ignored.</li>
+ </ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="items">Configuration Items</a></div>
+<div class="doc_text">
+ <p>The table below provides definitions of the allowed configuration items
+ that may appear in a configuration file. Every item has a default value and
+ does not need to appear in the configuration file. Missing items will have the
+ default value. Each identifier may appear as all lower case, first letter
+ capitalized or all upper case.</p>
+ <table>
+ <tbody>
+ <tr>
+ <th>Name</th>
+ <th>Value Type</th>
+ <th>Description</th>
+ <th>Default</th>
+ </tr>
+ <tr><td colspan="4"><h4>LLVMC ITEMS</h4></td></tr>
+ <tr>
+ <td><b>version</b></td>
+ <td>string</td>
+ <td class="td_left">Provides the version string for the contents of this
+ configuration file. What is accepted as a legal configuration file
+ will change over time and this item tells <tt>llvmc</tt> which version
+ should be expected.</td>
+ <td><i>b</i></td>
+ </tr>
+ <tr><td colspan="4"><h4>LANG ITEMS</h4></td></tr>
+ <tr>
+ <td><b>lang.name</b></td>
+ <td>string</td>
+ <td class="td_left">Provides the common name for a language definition.
+ For example "C++", "Pascal", "FORTRAN", etc.</td>
+ <td><i>blank</i></td>
+ </tr>
+ <tr>
+ <td><b>lang.opt1</b></td>
+ <td>string</td>
+ <td class="td_left">Specifies the parameters to give the optimizer when
+ <tt>-O1</tt> is specified on the <tt>llvmc</tt> command line.</td>
+ <td><tt>-simplifycfg -instcombine -mem2reg</tt></td>
+ </tr>
+ <tr>
+ <td><b>lang.opt2</b></td>
+ <td>string</td>
+ <td class="td_left">Specifies the parameters to give the optimizer when
+ <tt>-O2</tt> is specified on the <tt>llvmc</tt> command line.</td>
+ <td><i>TBD</i></td>
+ </tr>
+ <tr>
+ <td><b>lang.opt3</b></td>
+ <td>string</td>
+ <td class="td_left">Specifies the parameters to give the optimizer when
+ <tt>-O3</tt> is specified on the <tt>llvmc</tt> command line.</td>
+ <td><i>TBD</i></td>
+ </tr>
+ <tr>
+ <td><b>lang.opt4</b></td>
+ <td>string</td>
+ <td class="td_left">Specifies the parameters to give the optimizer when
+ <tt>-O4</tt> is specified on the <tt>llvmc</tt> command line.</td>
+ <td><i>TBD</i></td>
+ </tr>
+ <tr>
+ <td><b>lang.opt5</b></td>
+ <td>string</td>
+ <td class="td_left">Specifies the parameters to give the optimizer when
+ <tt>-O5</tt> is specified on the <tt>llvmc</tt> command line.</td>
+ <td><i>TBD</i></td>
+ </tr>
+ <tr><td colspan="4"><h4>PREPROCESSOR ITEMS</h4></td></tr>
+ <tr>
+ <td><b>preprocessor.command</b></td>
+ <td>command</td>
+ <td class="td_left">This provides the command prototype that will be used
+ to run the preprocessor. This is generally only used with the
+ <tt>-E</tt> option.</td>
+ <td>&lt;blank&gt;</td>
+ </tr>
+ <tr>
+ <td><b>preprocessor.required</b></td>
+ <td>boolean</td>
+ <td class="td_left">This item specifies whether the pre-processing phase
+ is required by the language. If the value is true, then the
+ <tt>preprocessor.command</tt> value must not be blank. With this option,
+ <tt>llvmc</tt> will always run the preprocessor as it assumes that the
+ translation and optimization phases don't know how to pre-process their
+ input.</td>
+ <td>false</td>
+ </tr>
+ <tr><td colspan="4"><h4>TRANSLATOR ITEMS</h4></td></tr>
+ <tr>
+ <td><b>translator.command</b></td>
+ <td>command</td>
+ <td class="td_left">This provides the command prototype that will be used
+ to run the translator. Valid substitutions are <tt>%in%</tt> for the
+ input file and <tt>%out%</tt> for the output file.</td>
+ <td>&lt;blank&gt;</td>
+ </tr>
+ <tr>
+ <td><b>translator.output</b></td>
+ <td><tt>bitcode</tt> or <tt>assembly</tt></td>
+ <td class="td_left">This item specifies the kind of output the language's
+ translator generates.</td>
+ <td><tt>bitcode</tt></td>
+ </tr>
+ <tr>
+ <td><b>translator.preprocesses</b></td>
+ <td>boolean</td>
+ <td class="td_left">Indicates that the translator also preprocesses. If
+ this is true, then <tt>llvmc</tt> will skip the pre-processing phase
+ whenever the final phase is not pre-processing.</td>
+ <td><tt>false</tt></td>
+ </tr>
+ <tr><td colspan="4"><h4>OPTIMIZER ITEMS</h4></td></tr>
+ <tr>
+ <td><b>optimizer.command</b></td>
+ <td>command</td>
+ <td class="td_left">This provides the command prototype that will be used
+ to run the optimizer. Valid substitutions are <tt>%in%</tt> for the
+ input file and <tt>%out%</tt> for the output file.</td>
+ <td>&lt;blank&gt;</td>
+ </tr>
+ <tr>
+ <td><b>optimizer.output</b></td>
+ <td><tt>bitcode</tt> or <tt>assembly</tt></td>
+ <td class="td_left">This item specifies the kind of output the language's
+ optimizer generates. Valid values are "assembly" and "bitcode"</td>
+ <td><tt>bitcode</tt></td>
+ </tr>
+ <tr>
+ <td><b>optimizer.preprocesses</b></td>
+ <td>boolean</td>
+ <td class="td_left">Indicates that the optimizer also preprocesses. If
+ this is true, then <tt>llvmc</tt> will skip the pre-processing phase
+ whenever the final phase is optimization or later.</td>
+ <td><tt>false</tt></td>
+ </tr>
+ <tr>
+ <td><b>optimizer.translates</b></td>
+ <td>boolean</td>
+ <td class="td_left">Indicates that the optimizer also translates. If
+ this is true, then <tt>llvmc</tt> will skip the translation phase
+ whenever the final phase is optimization or later.</td>
+ <td><tt>false</tt></td>
+ </tr>
+ <tr><td colspan="4"><h4>ASSEMBLER ITEMS</h4></td></tr>
+ <tr>
+ <td><b>assembler.command</b></td>
+ <td>command</td>
+ <td class="td_left">This provides the command prototype that will be used
+ to run the assembler. Valid substitutions are <tt>%in%</tt> for the
+ input file and <tt>%out%</tt> for the output file.</td>
+ <td>&lt;blank&gt;</td>
+ </tr>
+ </tbody>
+ </table>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="substitutions">Substitutions</a></div>
+<div class="doc_text">
+ <p>On any configuration item that ends in <tt>command</tt>, you must
+ specify substitution tokens. Substitution tokens begin and end with a percent
+ sign (<tt>%</tt>) and are replaced by the corresponding text. Any substitution
+ token may be given on any <tt>command</tt> line but some are more useful than
+ others. In particular each command <em>should</em> have both an <tt>%in%</tt>
+ and an <tt>%out%</tt> substitution. The table below provides definitions of
+ each of the allowed substitution tokens.</p>
+ <table>
+ <tbody>
+ <tr>
+ <th>Substitution Token</th>
+ <th>Replacement Description</th>
+ </tr>
+ <tr>
+ <td><tt>%args%</tt></td>
+ <td class="td_left">Replaced with all the tool-specific arguments given
+ to <tt>llvmc</tt> via the <tt>-T</tt> set of options. This just allows
+ you to place these arguments in the correct place on the command line.
+ If the <tt>%args%</tt> option does not appear on your command line,
+ then you are explicitly disallowing the <tt>-T</tt> option for your
+ tool.
+ </td>
+ <tr>
+ <td><tt>%force%</tt></td>
+ <td class="td_left">Replaced with the <tt>-f</tt> option if it was
+ specified on the <tt>llvmc</tt> command line. This is intended to tell
+ the compiler tool to force the overwrite of output files.
+ </td>
+ </tr>
+ <tr>
+ <td><tt>%in%</tt></td>
+ <td class="td_left">Replaced with the full path of the input file. You
+ needn't worry about the cascading of file names. <tt>llvmc</tt> will
+ create temporary files and ensure that the output of one phase is the
+ input to the next phase.</td>
+ </tr>
+ <tr>
+ <td><tt>%opt%</tt></td>
+ <td class="td_left">Replaced with the optimization options for the
+ tool. If the tool understands the <tt>-O</tt> options then that will
+ be passed. Otherwise, the <tt>lang.optN</tt> series of configuration
+ items will specify which arguments are to be given.</td>
+ </tr>
+ <tr>
+ <td><tt>%out%</tt></td>
+ <td class="td_left">Replaced with the full path of the output file.
+ Note that this is not necessarily the output file specified with the
+ <tt>-o</tt> option on <tt>llvmc</tt>'s command line. It might be a
+ temporary file that will be passed to a subsequent phase's input.
+ </td>
+ </tr>
+ <tr>
+ <td><tt>%stats%</tt></td>
+ <td class="td_left">If your command accepts the <tt>-stats</tt> option,
+ use this substitution token. If the user requested <tt>-stats</tt>
+ from the <tt>llvmc</tt> command line then this token will be replaced
+ with <tt>-stats</tt>, otherwise it will be ignored.
+ </td>
+ </tr>
+ <tr>
+ <td><tt>%target%</tt></td>
+ <td class="td_left">Replaced with the name of the target "machine" for
+ which code should be generated. The value used here is taken from the
+ <tt>llvmc</tt> option <tt>-march</tt>.
+ </td>
+ </tr>
+ <tr>
+ <td><tt>%time%</tt></td>
+ <td class="td_left">If your command accepts the <tt>-time-passes</tt>
+ option, use this substitution token. If the user requested
+ <tt>-time-passes</tt> from the <tt>llvmc</tt> command line then this
+ token will be replaced with <tt>-time-passes</tt>, otherwise it will
+ be ignored.
+ </td>
+ </tr>
+ </tbody>
+ </table>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="sample">Sample Config File</a></div>
+<div class="doc_text">
+ <p>Since an example is always instructive, here's how the Stacker language
+ configuration file looks.</p>
+ <pre><tt>
+# Stacker Configuration File For llvmc
+
+##########################################################
+# Language definitions
+##########################################################
+ lang.name=Stacker
+ lang.opt1=-simplifycfg -instcombine -mem2reg
+ lang.opt2=-simplifycfg -instcombine -mem2reg -load-vn \
+ -gcse -dse -scalarrepl -sccp
+ lang.opt3=-simplifycfg -instcombine -mem2reg -load-vn \
+ -gcse -dse -scalarrepl -sccp -branch-combine -adce \
+ -globaldce -inline -licm
+ lang.opt4=-simplifycfg -instcombine -mem2reg -load-vn \
+ -gcse -dse -scalarrepl -sccp -ipconstprop \
+ -branch-combine -adce -globaldce -inline -licm
+ lang.opt5=-simplifycfg -instcombine -mem2reg --load-vn \
+ -gcse -dse scalarrepl -sccp -ipconstprop \
+ -branch-combine -adce -globaldce -inline -licm \
+ -block-placement
+
+##########################################################
+# Pre-processor definitions
+##########################################################
+
+ # Stacker doesn't have a preprocessor but the following
+ # allows the -E option to be supported
+ preprocessor.command=cp %in% %out%
+ preprocessor.required=false
+
+##########################################################
+# Translator definitions
+##########################################################
+
+ # To compile stacker source, we just run the stacker
+ # compiler with a default stack size of 2048 entries.
+ translator.command=stkrc -s 2048 %in% -o %out% %time% \
+ %stats% %force% %args%
+
+ # stkrc doesn't preprocess but we set this to true so
+ # that we don't run the cp command by default.
+ translator.preprocesses=true
+
+ # The translator is required to run.
+ translator.required=true
+
+ # stkrc doesn't handle the -On options
+ translator.output=bitcode
+
+##########################################################
+# Optimizer definitions
+##########################################################
+
+ # For optimization, we use the LLVM "opt" program
+ optimizer.command=opt %in% -o %out% %opt% %time% %stats% \
+ %force% %args%
+
+ optimizer.required = true
+
+ # opt doesn't translate
+ optimizer.translates = no
+
+ # opt doesn't preprocess
+ optimizer.preprocesses=no
+
+ # opt produces bitcode
+ optimizer.output = bc
+
+##########################################################
+# Assembler definitions
+##########################################################
+ assembler.command=llc %in% -o %out% %target% %time% %stats%
+</tt></pre>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="glossary">Glossary</a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+ <p>This document uses precise terms in reference to the various artifacts and
+ concepts related to compilation. The terms used throughout this document are
+ defined below.</p>
+ <dl>
+ <dt><a name="def_assembly"><b>assembly</b></a></dt>
+ <dd>A compilation <a href="#def_phase">phase</a> in which LLVM bitcode or
+ LLVM assembly code is assembled to a native code format (either target
+ specific aseembly language or the platform's native object file format).
+ </dd>
+
+ <dt><a name="def_compiler"><b>compiler</b></a></dt>
+ <dd>Refers to any program that can be invoked by <tt>llvmc</tt> to accomplish
+ the work of one or more compilation <a href="#def_phase">phases</a>.</dd>
+
+ <dt><a name="def_driver"><b>driver</b></a></dt>
+ <dd>Refers to <tt>llvmc</tt> itself.</dd>
+
+ <dt><a name="def_linking"><b>linking</b></a></dt>
+ <dd>A compilation <a href="#def_phase">phase</a> in which LLVM bitcode files
+ and (optionally) native system libraries are combined to form a complete
+ executable program.</dd>
+
+ <dt><a name="def_optimization"><b>optimization</b></a></dt>
+ <dd>A compilation <a href="#def_phase">phase</a> in which LLVM bitcode is
+ optimized.</dd>
+
+ <dt><a name="def_phase"><b>phase</b></a></dt>
+ <dd>Refers to any one of the five compilation phases that that
+ <tt>llvmc</tt> supports. The five phases are:
+ <a href="#def_preprocessing">preprocessing</a>,
+ <a href="#def_translation">translation</a>,
+ <a href="#def_optimization">optimization</a>,
+ <a href="#def_assembly">assembly</a>,
+ <a href="#def_linking">linking</a>.</dd>
+
+ <dt><a name="def_sourcelanguage"><b>source language</b></a></dt>
+ <dd>Any common programming language (e.g. C, C++, Java, Stacker, ML,
+ FORTRAN). These languages are distinguished from any of the lower level
+ languages (such as LLVM or native assembly), by the fact that a
+ <a href="#def_translation">translation</a> <a href="#def_phase">phase</a>
+ is required before LLVM can be applied.</dd>
+
+ <dt><a name="def_tool"><b>tool</b></a></dt>
+ <dd>Refers to any program in the LLVM tool set.</dd>
+
+ <dt><a name="def_translation"><b>translation</b></a></dt>
+ <dd>A compilation <a href="#def_phase">phase</a> in which
+ <a href="#def_sourcelanguage">source language</a> code is translated into
+ either LLVM assembly language or LLVM bitcode.</dd>
+ </dl>
+</div>
+<!-- *********************************************************************** -->
+<hr>
+<address> <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a><a
+ href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a><a
+ href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
+<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+Last modified: $Date$
+</address>
+<!-- vim: sw=2
+-->
+</body>
+</html>
diff --git a/docs/CompilerWriterInfo.html b/docs/CompilerWriterInfo.html
new file mode 100644
index 0000000..16810e5
--- /dev/null
+++ b/docs/CompilerWriterInfo.html
@@ -0,0 +1,261 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <title>Architecture/platform information for compiler writers</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<div class="doc_title">
+ Architecture/platform information for compiler writers
+</div>
+
+<div class="doc_warning">
+ <p>Note: This document is a work-in-progress. Additions and clarifications
+ are welcome.</p>
+</div>
+
+<ol>
+ <li><a href="#hw">Hardware</a>
+ <ol>
+ <li><a href="#alpha">Alpha</a></li>
+ <li><a href="#arm">ARM</a></li>
+ <li><a href="#ia64">Itanium</a></li>
+ <li><a href="#mips">MIPS</a></li>
+ <li><a href="#ppc">PowerPC</a></li>
+ <li><a href="#sparc">SPARC</a></li>
+ <li><a href="#x86">X86</a></li>
+ <li><a href="#other">Other lists</a></li>
+ </ol></li>
+ <li><a href="#abi">Application Binary Interface (ABI)</a>
+ <ol>
+ <li><a href="#linux">Linux</a></li>
+ <li><a href="#osx">OS X</a></li>
+ </ol></li>
+ <li><a href="#misc">Miscellaneous resources</a></li>
+</ol>
+
+<div class="doc_author">
+ <p>Compiled by <a href="http://misha.brukman.net">Misha Brukman</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="hw">Hardware</a></div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="alpha">Alpha</a></div>
+
+<div class="doc_text">
+<ul>
+<li><a
+href="http://ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html">Alpha manuals</a>
+</li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="arm">ARM</a></div>
+
+<div class="doc_text">
+<ul>
+<li><a href="http://www.arm.com/documentation/">ARM documentation</a>
+(<a href="http://www.arm.com/documentation/ARMProcessor_Cores/">Processor
+Cores</a>)</li>
+<li><a href="http://www.arm.com/products/DevTools/ABI.html">ABI</a></li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="ia64">Itanium (ia64)</a></div>
+
+<div class="doc_text">
+<ul>
+<li><a
+href="http://developer.intel.com/design/itanium2/documentation.htm">Itanium documentation</a>
+</li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="mips">MIPS</a></div>
+
+<div class="doc_text">
+<ul>
+<li><a
+href="http://mips.com/content/Documentation/MIPSDocumentation/ProcessorArchitecture/doclibrary">MIPS
+Processor Architecture</a></li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="ppc">PowerPC</a></div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">IBM - Official manuals and docs</div>
+
+<div class="doc_text">
+
+<ul>
+<li><a
+href="http://www-106.ibm.com/developerworks/eserver/articles/archguide.html">PowerPC
+Architecture Book</a>
+<ul>
+ <li>Book I: <a
+ href="http://www-106.ibm.com/developerworks/eserver/pdfs/archpub1.pdf">PowerPC
+ User Instruction Set Architecture</a></li>
+ <li>Book II: <a
+ href="http://www-106.ibm.com/developerworks/eserver/pdfs/archpub2.pdf">PowerPC
+ Virtual Environment Architecture</a></li>
+ <li>Book III: <a
+ href="http://www-106.ibm.com/developerworks/eserver/pdfs/archpub3.pdf">PowerPC
+ Operating Environment Architecture</a></li>
+</ul></li>
+<li><a
+href="http://www-3.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF7785256996007558C6">PowerPC
+Compiler Writer's Guide</a></li>
+<li><A
+href="http://www-3.ibm.com/chips/techlib/techlib.nsf/products/PowerPC">PowerPC
+Processor Manuals</a></li>
+<li><a
+href="http://www-106.ibm.com/developerworks/linux/library/l-powarch/">Intro to
+PowerPC architecture</a></li>
+<li><a href="http://publibn.boulder.ibm.com/doc_link/en_US/a_doc_lib/aixassem/alangref/alangreftfrm.htm">IBM AIX/5L for POWER Assembly reference</a></li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Other documents, collections, notes</div>
+
+<div class="doc_text">
+
+<ul>
+<li><a href="http://penguinppc.org/dev/#library">PowerPC ABI documents</a></li>
+<li><a href="http://gcc.gnu.org/ml/gcc-patches/2003-09/msg00997.html">PowerPC64
+alignment of long doubles (from GCC)</a></li>
+<li><a href="http://sources.redhat.com/ml/binutils/2002-04/msg00573.html">Long
+branch stubs for powerpc64-linux (from binutils)</a></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="sparc">SPARC</a></div>
+
+<div class="doc_text">
+
+<ul>
+<li><a href="http://www.sparc.org/resource.htm">SPARC resources</a></li>
+<li><a href="http://www.sparc.org/standards.html">SPARC standards</a></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="x86">X86</a></div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">AMD - Official manuals and docs</div>
+
+<div class="doc_text">
+<ul>
+<li><a
+href="http://www.amd.com/us-en/Processors/TechnicalResources/0,,30_182_739,00.html">AMD processor manuals</a></li>
+<li><a href="http://www.x86-64.org/documentation">X86-64 ABI</a></li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Intel - Official manuals and docs</div>
+
+<div class="doc_text">
+<ul>
+<li><a
+href="http://developer.intel.com/design/pentium4/manuals/index_new.htm">IA-32
+manuals</a></li>
+<li><a
+href="http://www.intel.com/design/itanium/documentation.htm?iid=ipp_srvr_proc_itanium2+techdocs">Intel
+Itanium documentation</a></li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Other x86-specific information</div>
+
+<div class="doc_text">
+<ul>
+<li><a href="http://www.agner.org/assem/calling_conventions.pdf">Calling
+conventions for different C++ compilers and operating systems</a></li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="other">Other relevant lists</a></div>
+
+<div class="doc_text">
+
+<ul>
+<li><a href="http://gcc.gnu.org/readings.html">GCC reading list</a></li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="abi">ABI</a></div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="linux">Linux</a></div>
+
+<div class="doc_text">
+<ol>
+<li><a href="http://www.linuxbase.org/spec/ELF/ppc64/">PowerPC 64-bit ELF ABI
+Supplement</a></li>
+</ol>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="osx">OS X</a></div>
+
+<div class="doc_text">
+<ol>
+<li><a
+href="http://developer.apple.com/documentation/Darwin/RuntimeArchitecture-date.html">Mach-O
+Runtime Architecture</a></li>
+<li><a href="http://www.unsanity.org/archives/000044.php">Notes on Mach-O
+ABI</a></li>
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="misc">Miscellaneous resources</a></div>
+<!-- *********************************************************************** -->
+
+<ul>
+<li><a
+href="http://www.nondot.org/sabre/os/articles/ExecutableFileFormats/">Executable
+File Format library</a></li>
+<li><a href="http://gcc.gnu.org/projects/prefetch.html">GCC prefetch project</a>
+page has a good survey of the prefetching capabilities of a variety of modern
+processors.</li>
+</ul>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="http://misha.brukman.net">Misha Brukman</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html
new file mode 100644
index 0000000..34e4d9e
--- /dev/null
+++ b/docs/DeveloperPolicy.html
@@ -0,0 +1,504 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM Developer Policy</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">LLVM Developer Policy</div>
+<ol>
+ <li><a href="#introduction">Introduction</a></li>
+ <li><a href="#policies">Developer Policies</a>
+ <ol>
+ <li><a href="#informed">Stay Informed</a></li>
+ <li><a href="#patches">Making a Patch</a></li>
+ <li><a href="#reviews">Code Reviews</a></li>
+ <li><a href="#testcases">Test Cases</a></li>
+ <li><a href="#quality">Quality</a></li>
+ <li><a href="#commitaccess">Obtaining Commit Access</a></li>
+ <li><a href="#newwork">Making a Major Change</a></li>
+ <li><a href="#incremental">Incremental Development</a></li>
+ <li><a href="#attribution">Attribution of Changes</a></li>
+ </ol></li>
+ <li><a href="#clp">Copyright, License, and Patents</a>
+ <ol>
+ <li><a href="#copyright">Copyright</a></li>
+ <li><a href="#license">License</a></li>
+ <li><a href="#patents">Patents</a></li>
+ <li><a href="#devagree">Developer Agreements</a></li>
+ </ol></li>
+</ol>
+<div class="doc_author">Written by the LLVM Oversight Team</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+ <p>This document contains the LLVM Developer Policy which defines the
+ project's policy towards developers and their contributions. The intent of
+ this policy is to eliminate mis-communication, rework, and confusion that
+ might arise from the distributed nature of LLVM's development. By stating
+ the policy in clear terms, we hope each developer can know ahead of time
+ what to expect when making LLVM contributions.</p>
+ <p>This policy is also designed to accomplish the following objectives:</p>
+ <ol>
+ <li>Attract both users and developers to the LLVM project.</li>
+ <li>Make life as simple and easy for contributors as possible.</li>
+ <li>Keep the top of Subversion trees as stable as possible.</li>
+ </ol>
+
+ <p>This policy is aimed at frequent contributors to LLVM. People interested in
+ contributing one-off patches can do so in an informal way by sending them to
+ the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">
+ llvm-commits mailing list</a> and engaging another developer to see it through
+ the process.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="policies">Developer Policies</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+ <p>This section contains policies that pertain to frequent LLVM
+ developers. We always welcome <a href="#patches">one-off patches</a> from
+ people who do not routinely contribute to LLVM, but we expect more from
+ frequent contributors to keep the system as efficient as possible for
+ everyone.
+ Frequent LLVM contributors are expected to meet the following requirements in
+ order for LLVM to maintain a high standard of quality.<p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="informed">Stay Informed</a> </div>
+<div class="doc_text">
+ <p>Developers should stay informed by reading at least the
+ <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev</a>
+ email list. If you are doing anything more than just casual work on LLVM,
+ it is suggested that you also subscribe to the
+ <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">llvm-commits</a>
+ list and pay attention to changes being made by others.</p>
+ <p>We recommend that active developers register an email account with
+ <a href="http://llvm.org/bugs/">LLVM Bugzilla</a> and preferably subscribe to
+ the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmbugs">llvm-bugs</a>
+ email list to keep track of bugs and enhancements occurring in LLVM.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="patches">Making a Patch</a></div>
+
+<div class="doc_text">
+
+<p>When making a patch for review, the goal is to make it as easy for the
+ reviewer to read it as possible. As such, we recommend that you:</p>
+ <ol>
+ <li>Make your patch against the Subversion trunk, not a branch, and not an
+ old version of LLVM. This makes it easy to apply the patch.</li>
+
+ <li>Similarly, patches should be submitted soon after they are generated.
+ Old patches may not apply correctly if the underlying code changes between
+ the time the patch was created and the time it is applied.</li>
+
+ <li>Patches should be made with this command:
+ <pre>svn diff -x -u</pre>
+ or with the utility <tt>utils/mkpatch</tt>, which makes it easy to read the
+ diff.</li>
+
+ <li>Patches should not include differences in generated code such as the
+ code generated by <tt>flex</tt>, <tt>bison</tt> or <tt>tblgen</tt>. The
+ <tt>utils/mkpatch</tt> utility takes care of this for you.</li>
+
+ </ol>
+
+ <p>When sending a patch to a mailing list, it is a good idea to send it as an
+ <em>attachment</em> to the message, not embedded into the text of the
+ message. This ensures that your mailer will not mangle the patch when it
+ sends it (e.g. by making whitespace changes or by wrapping lines).</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="reviews">Code Reviews</a></div>
+<div class="doc_text">
+ <p>LLVM has a code review policy. Code review is one way to increase the
+ quality of software. We generally follow these policies:</p>
+ <ol>
+ <li>All developers are required to have significant changes reviewed
+ before they are committed to the repository.</li>
+ <li>Code reviews are conducted by email, usually on the llvm-commits
+ list.</li>
+ <li>Code can be reviewed either before it is committed or after. We expect
+ major changes to be reviewed before being committed, but smaller
+ changes (or changes where the developer owns the component) can be
+ reviewed after commit.</li>
+ <li>The developer responsible for a code change is also responsible for
+ making all necessary review-related changes.</li>
+ <li>Code review can be an iterative process, which continues until the patch
+ is ready to be committed.</li>
+ </ol>
+
+ <p>Developers should participate in code reviews as both reviewers and
+ reviewees. If someone is kind enough to review your code, you should
+ return the favor for someone else. Note that anyone is welcome to review
+ and give feedback on a patch, but only people with Subversion write access
+ can approve it.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="testcases">Test Cases</a></div>
+<div class="doc_text">
+ <p>Developers are required to create test cases for any bugs fixed and any new
+ features added. Some tips for getting your testcase approved:</p>
+ <ol>
+ <li>All feature and regression test cases are added to the
+ <tt>llvm/test</tt> directory. The appropriate sub-directory should be
+ selected (see the <a href="TestingGuide.html">Testing Guide</a> for
+ details).</li>
+ <li>Test cases should be written in
+ <a href="LangRef.html">LLVM assembly language</a> unless the
+ feature or regression being tested requires another language (e.g. the
+ bug being fixed or feature being implemented is in the llvm-gcc C++
+ front-end, in which case it must be written in C++).</li>
+ <li>Test cases, especially for regressions, should be reduced as much as
+ possible, by <a href="Bugpoint.html">bugpoint</a> or
+ manually. It is unacceptable
+ to place an entire failing program into <tt>llvm/test</tt> as this creates
+ a <i>time-to-test</i> burden on all developers. Please keep them short.</li>
+ </ol>
+
+ <p>Note that llvm/test is designed for regression and small feature tests
+ only. More extensive test cases (e.g., entire applications, benchmarks,
+ etc) should be added to the <tt>llvm-test</tt> test suite. The llvm-test
+ suite is for coverage (correctness, performance, etc) testing, not feature
+ or regression testing.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="quality">Quality</a></div>
+<div class="doc_text">
+ <p>The minimum quality standards that any change must satisfy before being
+ committed to the main development branch are:</p>
+ <ol>
+ <li>Code must adhere to the
+ <a href="CodingStandards.html">LLVM Coding Standards</a>.</li>
+ <li>Code must compile cleanly (no errors, no warnings) on at least one
+ platform.</li>
+ <li>Bug fixes and new features should <a href="#testcases">include a
+ testcase</a> so we know if the fix/feature ever regresses in the
+ future.</li>
+ <li>Code must pass the dejagnu (<tt>llvm/test</tt>) test suite.</li>
+ <li>The code must not cause regressions on a reasonable subset of llvm-test,
+ where "reasonable" depends on the contributor's judgement and the scope
+ of the change (more invasive changes require more testing). A reasonable
+ subset is "<tt>llvm-test/MultiSource/Benchmarks</tt>".</li>
+ </ol>
+ <p>Additionally, the committer is responsible for addressing any problems
+ found in the future that the change is responsible for. For example:</p>
+ <ul>
+ <li>The code should compile cleanly on all supported platforms.</li>
+ <li>The changes should not cause any correctness regressions in the
+ <tt>llvm-test</tt> suite and must not cause any major performance
+ regressions.</li>
+ <li>The change set should not cause performance or correctness regressions
+ for the LLVM tools.</li>
+ <li>The changes should not cause performance or correctness regressions in
+ code compiled by LLVM on all applicable targets.</li>
+ <li>You are expected to address any <a href="http://llvm.org/bugs/">bugzilla
+ bugs</a> that result from your change.</li>
+ </ul>
+
+ <p>We prefer for this to be handled before submission but understand that it
+ isn't possible to test all of this for every submission. Our nightly
+ testing
+ infrastructure normally finds these problems. A good rule of thumb is to
+ check the nightly testers for regressions the day after your change.</p>
+
+ <p>Commits that violate these quality standards (e.g. are very broken) may
+ be reverted. This is necessary when the change blocks other developers from
+ making progress. The developer is welcome to re-commit the change after
+ the problem has been fixed.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+ <a name="commitaccess">Obtaining Commit Access</a></div>
+<div class="doc_text">
+
+<p>
+We grant commit access to contributors with a track record of submitting high
+quality patches. If you would like commit access, please send an email to the
+<a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM oversight group</a>.</p>
+
+<p>If you have recently been granted commit access, these policies apply:</p>
+<ol>
+ <li>You are granted <i>commit-after-approval</i> to all parts of LLVM.
+ To get approval, submit a <a href="#patches">patch</a> to
+ <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">
+ llvm-commits</a>. When approved you may commit it yourself.</li>
+ <li>You are allowed to commit patches without approval which you think are
+ obvious. This is clearly a subjective decision &mdash; we simply expect you
+ to use good judgement. Examples include: fixing build breakage, reverting
+ obviously broken patches, documentation/comment changes, any other minor
+ changes.</li>
+ <li>You are allowed to commit patches without approval to those portions
+ of LLVM that you have contributed or maintain (i.e., have been assigned
+ responsibility for), with the proviso that such commits must not break the
+ build. This is a "trust but verify" policy and commits of this nature are
+ reviewed after they are committed.</li>
+ <li>Multiple violations of these policies or a single egregious violation
+ may cause commit access to be revoked.</li>
+</ol>
+
+<p>In any case, your changes are still subject to <a href="#reviews">code
+review</a> (either before or after they are committed, depending on the nature
+of the change). You are encouraged to review other peoples' patches as well,
+but you aren't required to.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="newwork">Making a Major Change</a></div>
+<div class="doc_text">
+ <p>When a developer begins a major new project with the aim of contributing
+ it back to LLVM, s/he should inform the community with an email to
+ the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev</a>
+ email list, to the extent possible. The reason for this is to:
+ <ol>
+ <li>keep the community informed about future changes to LLVM, </li>
+ <li>avoid duplication of effort by preventing multiple parties working on
+ the same thing and not knowing about it, and</li>
+ <li>ensure that any technical issues around the proposed work are
+ discussed and resolved before any significant work is done.</li>
+ </ol>
+
+ <p>The design of LLVM is carefully controlled to ensure that all the pieces
+ fit together well and are as consistent as possible. If you plan to make a
+ major change to the way LLVM works or want to add a major new extension, it
+ is a good idea to get consensus with the development
+ community before you start working on it.</p>
+
+ <p>Once the design of the new feature is finalized, the work itself should be
+ done as a series of <a href="#incremental">incremental changes</a>, not as
+ a long-term development branch.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="incremental">Incremental Development</a>
+</div>
+<div class="doc_text">
+ <p>In the LLVM project, we do all significant changes as a series of
+ incremental patches. We have a strong dislike for huge changes or
+ long-term development branches. Long-term development branches have a
+ number of drawbacks:</p>
+
+ <ol>
+ <li>Branches must have mainline merged into them periodically. If the branch
+ development and mainline development occur in the same pieces of code,
+ resolving merge conflicts can take a lot of time.</li>
+ <li>Other people in the community tend to ignore work on branches.</li>
+ <li>Huge changes (produced when a branch is merged back onto mainline) are
+ extremely difficult to <a href="#reviews">code review</a>.</li>
+ <li>Branches are not routinely tested by our nightly tester
+ infrastructure.</li>
+ <li>Changes developed as monolithic large changes often don't work until the
+ entire set of changes is done. Breaking it down into a set of smaller
+ changes increases the odds that any of the work will be committed to the
+ main repository.</li>
+ </ol>
+
+ <p>
+ To address these problems, LLVM uses an incremental development style and we
+ require contributors to follow this practice when making a large/invasive
+ change. Some tips:</p>
+
+ <ul>
+ <li>Large/invasive changes usually have a number of secondary changes that
+ are required before the big change can be made (e.g. API cleanup, etc).
+ These sorts of changes can often be done before the major change is done,
+ independently of that work.</li>
+ <li>The remaining inter-related work should be decomposed into unrelated
+ sets of changes if possible. Once this is done, define the first increment
+ and get consensus on what the end goal of the change is.</li>
+
+ <li>Each change in the set can be stand alone (e.g. to fix a bug), or part
+ of a planned series of changes that works towards the development goal.</li>
+
+ <li>Each change should be kept as small as possible. This simplifies your
+ work (into a logical progression), simplifies code review and reduces the
+ chance that you will get negative feedback on the change. Small increments
+ also facilitate the maintenance of a high quality code base.</li>
+
+ <li>Often, an independent precursor to a big change is to add a new API and
+ slowly migrate clients to use the new API. Each change to use the new
+ API is often "obvious" and can be committed without review. Once the
+ new API is in place and used, it is much easier to replace the
+ underlying implementation of the API. This implementation change is
+ logically separate from the API change.</li>
+ </ul>
+
+ <p>If you are interested in making a large change, and this scares you, please
+ make sure to first <a href="#newwork">discuss the change/gather
+ consensus</a> then ask about the best way to go about making
+ the change.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="attribution">Attribution of
+Changes</a></div>
+<div class="doc_text">
+ <p>We believe in correct attribution of contributions to
+ their contributors. However, we do not want the source code to be littered
+ with random attributions (this is noisy/distracting and revision control
+ keeps a perfect history of this anyway). As such, we follow these rules:</p>
+ <ol>
+ <li>Developers who originate new files in LLVM should place their name at
+ the top of the file per the
+ <a href="CodingStandards.html#scf_commenting">Coding Standards</a>.</li>
+ <li>There should be only one name at the top of the file and it should be
+ the person who created the file.</li>
+ <li>Placing your name in the file does not imply <a
+ href="#clp">copyright</a>: it is only used to attribute the file to
+ its original author.</li>
+ <li>Developers should be aware that after some time has passed, the name at
+ the top of a file may become meaningless as maintenance/ownership of files
+ changes. Despite this, once set, the attribution of a file never changes.
+ Revision control keeps an accurate history of contributions.</li>
+ <li>Developers should maintain their entry in the
+ <a href="http://llvm.org/svn/llvm-project/llvm/trunk/CREDITS.TXT">CREDITS.txt</a>
+ file to summarize their contributions.</li>
+ <li>Commit comments should contain correct attribution of the person who
+ submitted the patch if that person is not the committer (i.e. when a
+ developer with commit privileges commits a patch for someone else).</li>
+ </ol>
+</div>
+
+
+
+<!--=========================================================================-->
+<div class="doc_section">
+ <a name="clp">Copyright, License, and Patents</a>
+</div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+ <p>This section addresses the issues of copyright, license and patents for
+ the LLVM project.
+ Currently, the University of Illinois is the LLVM copyright holder and the
+ terms of its license to LLVM users and developers is the
+ <a href="http://www.opensource.org/licenses/UoI-NCSA.php">University of
+ Illinois/NCSA Open Source License</a>.</p>
+
+<div class="doc_notes">
+ <p><b>NOTE: This section deals with legal matters but does not provide
+ legal advice. We are not lawyers, please seek legal counsel from an
+ attorney.</b></p>
+</div>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="copyright">Copyright</a></div>
+<div class="doc_text">
+ <p>
+ <p>For consistency and ease of management, the project requires the
+ copyright for all LLVM software to be held by a single copyright holder:
+ the University of Illinois (UIUC).</p>
+
+ <p>
+ Although UIUC may eventually reassign the copyright of the software to another
+ entity (e.g. a dedicated non-profit "LLVM Organization", or something)
+ the intent for the project is to always have a single entity hold the
+ copyrights to LLVM at any given time.</p>
+
+ <p>We believe that having a single copyright
+ holder is in the best interests of all developers and users as it greatly
+ reduces the managerial burden for any kind of administrative or technical
+ decisions about LLVM. The goal of the LLVM project is to always keep the code
+ open and <a href="#license">licensed under a very liberal license</a>.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="license">License</a></div>
+<div class="doc_text">
+ <p>We intend to keep LLVM perpetually open source
+ and to use a liberal open source license. The current license is the
+ <a href="http://www.opensource.org/licenses/UoI-NCSA.php">
+ University of Illinois/NCSA Open Source License</a>, which boils
+ down to this:</p>
+ <ul>
+ <li>You can freely distribute LLVM.</li>
+ <li>You must retain the copyright notice if you redistribute LLVM.</li>
+ <li>Binaries derived from LLVM must reproduce the copyright notice.</li>
+ <li>You can't use our names to promote your LLVM derived products.</li>
+ <li>There's no warranty on LLVM at all.</li>
+ </ul>
+
+ <p>We believe this fosters the widest adoption of LLVM because it <b>allows
+ commercial products to be derived from LLVM</b> with few restrictions and
+ without a requirement for making any derived works also open source (i.e.
+ LLVM's license is not a "copyleft" license like the GPL). We suggest that you
+ read the <a href="http://www.opensource.org/licenses/UoI-NCSA.php">License</a>
+ if further clarification is needed.</p>
+
+ <p>Note that the LLVM Project does distribute llvm-gcc, <b>which is GPL.</b>
+ This means that anything "linked" into llvm-gcc must itself be compatible
+ with the GPL, and must be releasable under the terms of the GPL. This implies
+ that <b>any code linked into llvm-gcc and distributed to others may be subject
+ to the viral aspects of the GPL</b> (for example, a proprietary code generator
+ linked into llvm-gcc must be made available under the GPL). This is not a
+ problem for code already distributed under a more liberal license (like the
+ UIUC license), and does not affect code generated by llvm-gcc. It may be a
+ problem if you intend to base commercial development on llvm-gcc without
+ redistributing your source code.</p>
+
+ <p>We have no plans to change the license of LLVM. If you have questions
+ or comments about the license, please contact the <a
+ href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="patents">Patents</a></div>
+<div class="doc_text">
+
+<p>To the best of our knowledge, LLVM does not infringe on any patents (we have
+ actually removed code from LLVM in the past that was found to infringe).
+ Having code in LLVM that infringes on patents would violate an important
+ goal of the project by making it hard or impossible to reuse the code for
+ arbitrary purposes (including commercial use).</p>
+
+<p>When contributing code, we expect contributors to notify us of any potential
+ for patent-related trouble with their changes. If you own the rights to a
+ patent and would like to contribute code to LLVM that relies on it, we
+ require that you sign an agreement that allows any other user of LLVM to
+ freely use your patent. Please contact the <a
+ href="mailto:llvm-oversight@cs.uiuc.edu">oversight group</a> for more
+ details.</p>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="devagree">Developer Agreements</a></div>
+<div class="doc_text">
+ <p>With regards to the LLVM copyright and licensing, developers agree to
+ assign their copyrights to UIUC for any contribution made so that
+ the entire software base can be managed by a single copyright holder. This
+ implies that any contributions can be licensed under the license that the
+ project uses.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+ Written by the
+ <a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html
new file mode 100644
index 0000000..d49a285
--- /dev/null
+++ b/docs/ExceptionHandling.html
@@ -0,0 +1,439 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>Exception Handling in LLVM</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Exception Handling in LLVM</div>
+
+<table class="layout" style="width:100%">
+ <tr class="layout">
+ <td class="left">
+<ul>
+ <li><a href="#introduction">Introduction</a>
+ <ol>
+ <li><a href="#itanium">Itanium ABI Zero-cost Exception Handling</a></li>
+ <li><a href="#overview">Overview</a></li>
+ </ol></li>
+ <li><a href="#codegen">LLVM Code Generation</a>
+ <ol>
+ <li><a href="#throw">Throw</a></li>
+ <li><a href="#try_catch">Try/Catch</a></li>
+ <li><a href="#finallys">Finallys</a></li>
+ <li><a href="#throw_filters">Throw Filters</a></li>
+ </ol></li>
+ <li><a href="#format_common_intrinsics">Exception Handling Intrinsics</a>
+ <ol>
+ <li><a href="#llvm_eh_exception"><tt>llvm.eh.exception</tt></a></li>
+ <li><a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a></li>
+ <li><a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a></li>
+ </ol></li>
+ <li><a href="#asm">Asm Table Formats</a>
+ <ol>
+ <li><a href="#unwind_tables">Exception Handling Frame</a></li>
+ <li><a href="#exception_tables">Exception Tables</a></li>
+ </ol></li>
+ <li><a href="#todo">ToDo</a></li>
+</ul>
+</td>
+</tr></table>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is the central repository for all information pertaining to
+exception handling in LLVM. It describes the format that LLVM exception
+handling information takes, which is useful for those interested in creating
+front-ends or dealing directly with the information. Further, this document
+provides specific examples of what exception handling information is used for
+C/C++.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="itanium">Itanium ABI Zero-cost Exception Handling</a>
+</div>
+
+<div class="doc_text">
+
+<p>Exception handling for most programming languages is designed to recover from
+conditions that rarely occur during general use of an application. To that end,
+exception handling should not interfere with the main flow of an
+application&apos;s algorithm by performing checkpointing tasks such as saving
+the current pc or register state.</p>
+
+<p>The Itanium ABI Exception Handling Specification defines a methodology for
+providing outlying data in the form of exception tables without inlining
+speculative exception handling code in the flow of an application&apos;s main
+algorithm. Thus, the specification is said to add "zero-cost" to the normal
+execution of an application.</p>
+
+<p>A more complete description of the Itanium ABI exception handling runtime
+support of can be found at <a
+href="http://www.codesourcery.com/cxx-abi/abi-eh.html">Itanium C++ ABI:
+Exception Handling.</a> A description of the exception frame format can be
+found at <a
+href="http://refspecs.freestandards.org/LSB_3.0.0/LSB-Core-generic/LSB-
+Core-generic/ehframechpt.html">Exception Frames</a>, with details of the Dwarf
+specification at <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">Dwarf 3
+Standard.</a> A description for the C++ exception table formats can be found at
+<a href="http://www.codesourcery.com/cxx-abi/exceptions.pdf">Exception Handling
+Tables.</a></p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="overview">Overview</a>
+</div>
+
+<div class="doc_text">
+
+<p>When an exception is thrown in llvm code, the runtime does a best effort to
+find a handler suited to process the circumstance.</p>
+
+<p>The runtime first attempts to find an <i>exception frame</i> corresponding to
+the function where the exception was thrown. If the programming language (ex.
+C++) supports exception handling, the exception frame contains a reference to an
+exception table describing how to process the exception. If the language (ex.
+C) does not support exception handling or if the exception needs to be forwarded
+to a prior activation, the exception frame contains information about how to
+unwind the current activation and restore the state of the prior activation.
+This process is repeated until the exception is handled. If the exception is
+not handled and no activations remain, then the application is terminated with
+an appropriate error message.</p>
+
+<p>Since different programming languages have different behaviors when handling
+exceptions, the exception handling ABI provides a mechanism for supplying
+<i>personalities.</i> An exception handling personality is defined by way of a
+<i>personality function</i> (ex. for C++ <tt>__gxx_personality_v0</tt>) which
+receives the context of the exception, an <i>exception structure</i> containing
+the exception object type and value, and a reference to the exception table for
+the current function. The personality function for the current compile unit is
+specified in a <i>common exception frame</i>.</p>
+
+<p>The organization of an exception table is language dependent. For C++, an
+exception table is organized as a series of code ranges defining what to do if
+an exception occurs in that range. Typically, the information associated with a
+range defines which types of exception objects (using C++ <i>type info</i>) that
+are handled in that range, and an associated action that should take place.
+Actions typically pass control to a <i>landing pad</i>.</p>
+
+<p>A landing pad corresponds to the code found in the catch portion of a
+try/catch sequence. When execution resumes at a landing pad, it receives the
+exception structure and a selector corresponding to the <i>type</i> of exception
+thrown. The selector is then used to determine which catch should actually
+process the exception.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section">
+ <a name="codegen">LLVM Code Generation</a>
+</div>
+
+<div class="doc_text">
+
+<p>At the time of this writing, only C++ exception handling support is available
+in LLVM. So the remainder of this document will be somewhat C++-centric.</p>
+
+<p>From the C++ developers perspective, exceptions are defined in terms of the
+<tt>throw</tt> and <tt>try/catch</tt> statements. In this section we will
+describe the implementation of llvm exception handling in terms of C++
+examples.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="throw">Throw</a>
+</div>
+
+<div class="doc_text">
+
+<p>Languages that support exception handling typically provide a <tt>throw</tt>
+operation to initiate the exception process. Internally, a throw operation
+breaks down into two steps. First, a request is made to allocate exception
+space for an exception structure. This structure needs to survive beyond the
+current activation. This structure will contain the type and value of the
+object being thrown. Second, a call is made to the runtime to raise the
+exception, passing the exception structure as an argument.</p>
+
+<p>In C++, the allocation of the exception structure is done by the
+<tt>__cxa_allocate_exception</tt> runtime function. The exception raising is
+handled by <tt>__cxa_throw</tt>. The type of the exception is represented using
+a C++ RTTI type info structure.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="try_catch">Try/Catch</a>
+</div>
+
+<div class="doc_text">
+
+<p>A call within the scope of a try statement can potentially raise an exception.
+In those circumstances, the LLVM C++ front-end replaces the call with an
+<tt>invoke</tt> instruction. Unlike a call, the invoke has two potential
+continuation points; where to continue when the call succeeds as per normal, and
+where to continue if the call raises an exception, either by a throw or the
+unwinding of a throw.</p>
+
+<p>The term used to define a the place where an invoke continues after an
+exception is called a <i>landing pad</i>. LLVM landing pads are conceptually
+alternative function entry points where a exception structure reference and a type
+info index are passed in as arguments. The landing pad saves the exception
+structure reference and then proceeds to select the catch block that corresponds
+to the type info of the exception object.</p>
+
+<p>Two llvm intrinsic functions are used convey information about the landing
+pad to the back end.</p>
+
+<p><a href="#llvm_eh_exception"><tt>llvm.eh.exception</tt></a> takes no
+arguments and returns the exception structure reference. The backend replaces
+this intrinsic with the code that accesses the first argument of a call. The
+LLVM C++ front end generates code to save this value in an alloca location for
+further use in the landing pad and catch code.</p>
+
+<p><a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> takes a minimum of
+three arguments. The first argument is the reference to the exception
+structure. The second argument is a reference to the personality function to be
+used for this try catch sequence. Each of the remaining arguments is either a
+reference to the type info for a catch statement, or a non-negative integer
+followed by that many type info references, representing a
+<a href="#throw_filters">filter</a>.
+The exception is tested against the arguments sequentially from first to last.
+The <i>catch all</i> (...) is represented with a <tt>null i8*</tt>. The result
+of the <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> is a positive
+number if the exception matched a type info, a negative number if it matched a
+filter, and zero if it didn't match anything. If a type info matched then the
+returned value is the index of the type info in the exception table.
+The LLVM C++ front end generates code to save this value in an alloca location
+for further use in the landing pad and catch code.</p>
+
+<p>Once the landing pad has the type info selector, the code branches to the
+code for the first catch. The catch then checks the value of the type info
+selector against the index of type info for that catch. Since the type info
+index is not known until all the type info have been gathered in the backend,
+the catch code will call the <a
+href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a> intrinsic to
+determine the index for a given type info. If the catch fails to match the
+selector then control is passed on to the next catch. Note: Since the landing
+pad will not be used if there is no match in the list of type info on the call
+to <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>, then neither the
+last catch nor <i>catch all</i> need to perform the the check against the
+selector.</p>
+
+<p>Finally, the entry and exit of catch code is bracketed with calls to
+<tt>__cxa_begin_catch</tt> and <tt>__cxa_end_catch</tt>.
+<tt>__cxa_begin_catch</tt> takes a exception structure reference as an argument
+and returns the value of the exception object.</tt> <tt>__cxa_end_catch</tt>
+takes a exception structure reference as an argument. This function clears the
+exception from the exception space. Note: a rethrow from within the catch may
+replace this call with a <tt>__cxa_rethrow</tt>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="finallys">Finallys</a>
+</div>
+
+<div class="doc_text">
+
+<p>To handle destructors and cleanups in try code, control may not run directly
+from a landing pad to the first catch. Control may actually flow from the
+landing pad to clean up code and then to the first catch. Since the required
+clean up for each invoke in a try may be different (ex., intervening
+constructor), there may be several landing pads for a given try.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="throw_filters">Throw Filters</a>
+</div>
+
+<div class="doc_text">
+
+<p>C++ allows the specification of which exception types that can be thrown from
+a function. To represent this a top level landing pad may exist to filter out
+invalid types. To express this in LLVM code the landing pad will call <a
+href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>. The arguments are the
+number of different type infos the function may throw, followed by the type
+infos themselves.
+<a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> will return a negative
+value if the exception does not match any of the type infos. If no match is
+found then a call to <tt>__cxa_call_unexpected</tt> should be made, otherwise
+<tt>_Unwind_Resume</tt>. Each of these functions require a reference to the
+exception structure.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section">
+ <a name="format_common_intrinsics">Exception Handling Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM uses several intrinsic functions (name prefixed with "llvm.eh") to
+provide exception handling information at various points in generated code.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="llvm_eh_exception">llvm.eh.exception</a>
+</div>
+
+<div class="doc_text">
+<pre>
+ i8* %<a href="#llvm_eh_exception">llvm.eh.exception</a>( )
+</pre>
+
+<p>This intrinsic indicates that the exception structure is available at this
+point in the code. The backend will replace this intrinsic with code to fetch
+the first argument of a call. The effect is that the intrinsic result is the
+exception structure reference.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="llvm_eh_selector">llvm.eh.selector</a>
+</div>
+
+<div class="doc_text">
+<pre>
+ i32 %<a href="#llvm_eh_selector">llvm.eh.selector</a>(i8*, i8*, i8*, ...)
+</pre>
+
+<p>This intrinsic indicates that the exception selector is available at this
+point in the code. The backend will replace this intrinsic with code to fetch
+the second argument of a call. The effect is that the intrinsic result is the
+exception selector.</p>
+
+<p><a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> takes a minimum of
+three arguments. The first argument is the reference to the exception
+structure. The second argument is a reference to the personality function to be
+used for this try catch sequence. Each of the remaining arguments is either a
+reference to the type info for a catch statement, or a non-negative integer
+followed by that many type info references, representing a
+<a href="#throw_filters">filter</a>.
+The exception is tested against the arguments sequentially from first to last.
+The <i>catch all</i> (...) is represented with a <tt>null i8*</tt>. The result
+of the <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> is a positive
+number if the exception matched a type info, a negative number if it matched a
+filter, and zero if it didn't match anything. If a type info matched then the
+returned value is the index of the type info in the exception table.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="llvm_eh_typeid_for">llvm.eh.typeid.for</a>
+</div>
+
+<div class="doc_text">
+<pre>
+ i32 %<a href="#llvm_eh_typeid_for">llvm.eh.typeid.for</a>(i8*)
+</pre>
+
+<p>This intrinsic returns the type info index in the exception table of the
+current function. This value can be used to compare against the result of <a
+href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>. The single argument is
+a reference to a type info.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section">
+ <a name="asm">Asm Table Formats</a>
+</div>
+
+<div class="doc_text">
+
+<p>There are two tables that are used by the exception handling runtime to
+determine which actions should take place when an exception is thrown.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="unwind_tables">Exception Handling Frame</a>
+</div>
+
+<div class="doc_text">
+
+<p>An exception handling frame <tt>eh_frame</tt> is very similar to the unwind
+frame used by dwarf debug info. The frame contains all the information
+necessary to tear down the current frame and restore the state of the prior
+frame. There is an exception handling frame for each function in a compile
+unit, plus a common exception handling frame that defines information common to
+all functions in the unit.</p>
+
+<p>Todo - Table details here.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="exception_tables">Exception Tables</a>
+</div>
+
+<div class="doc_text">
+
+<p>An exception table contains information about what actions to take when an
+exception is thrown in a particular part of a function&apos;s code. There is
+one exception table per function except leaf routines and functions that have
+only calls to non-throwing functions will not need an exception table.</p>
+
+<p>Todo - Table details here.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section">
+ <a name="todo">ToDo</a>
+</div>
+
+<div class="doc_text">
+
+<ol>
+
+<li><p>Testing/Testing/Testing.</li></p>
+
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/ExtendingLLVM.html b/docs/ExtendingLLVM.html
new file mode 100644
index 0000000..7505c0e
--- /dev/null
+++ b/docs/ExtendingLLVM.html
@@ -0,0 +1,392 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>Extending LLVM: Adding instructions, intrinsics, types, etc.</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">
+ Extending LLVM: Adding instructions, intrinsics, types, etc.
+</div>
+
+<ol>
+ <li><a href="#introduction">Introduction and Warning</a></li>
+ <li><a href="#intrinsic">Adding a new intrinsic function</a></li>
+ <li><a href="#instruction">Adding a new instruction</a></li>
+ <li><a href="#sdnode">Adding a new SelectionDAG node</a></li>
+ <li><a href="#type">Adding a new type</a>
+ <ol>
+ <li><a href="#fund_type">Adding a new fundamental type</a></li>
+ <li><a href="#derived_type">Adding a new derived type</a></li>
+ </ol></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="http://misha.brukman.net">Misha Brukman</a>,
+ Brad Jones, Nate Begeman,
+ and <a href="http://nondot.org/sabre">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="introduction">Introduction and Warning</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>During the course of using LLVM, you may wish to customize it for your
+research project or for experimentation. At this point, you may realize that
+you need to add something to LLVM, whether it be a new fundamental type, a new
+intrinsic function, or a whole new instruction.</p>
+
+<p>When you come to this realization, stop and think. Do you really need to
+extend LLVM? Is it a new fundamental capability that LLVM does not support at
+its current incarnation or can it be synthesized from already pre-existing LLVM
+elements? If you are not sure, ask on the <a
+href="http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM-dev</a> list. The
+reason is that extending LLVM will get involved as you need to update all the
+different passes that you intend to use with your extension, and there are
+<em>many</em> LLVM analyses and transformations, so it may be quite a bit of
+work.</p>
+
+<p>Adding an <a href="#intrinsic">intrinsic function</a> is far easier than
+adding an instruction, and is transparent to optimization passes. If your added
+functionality can be expressed as a
+function call, an intrinsic function is the method of choice for LLVM
+extension.</p>
+
+<p>Before you invest a significant amount of effort into a non-trivial
+extension, <span class="doc_warning">ask on the list</span> if what you are
+looking to do can be done with already-existing infrastructure, or if maybe
+someone else is already working on it. You will save yourself a lot of time and
+effort by doing so.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="intrinsic">Adding a new intrinsic function</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Adding a new intrinsic function to LLVM is much easier than adding a new
+instruction. Almost all extensions to LLVM should start as an intrinsic
+function and then be turned into an instruction if warranted.</p>
+
+<ol>
+<li><tt>llvm/docs/LangRef.html</tt>:
+ Document the intrinsic. Decide whether it is code generator specific and
+ what the restrictions are. Talk to other people about it so that you are
+ sure it's a good idea.</li>
+
+<li><tt>llvm/include/llvm/Intrinsics*.td</tt>:
+ Add an entry for your intrinsic. Describe its memory access characteristics
+ for optimization (this controls whether it will be DCE'd, CSE'd, etc). Note
+ that any intrinsic using the <tt>llvm_int_ty</tt> type for an argument will
+ be deemed by <tt>tblgen</tt> as overloaded and the corresponding suffix
+ will be required on the intrinsic's name.</li>
+
+<li><tt>llvm/lib/Analysis/ConstantFolding.cpp</tt>: If it is possible to
+ constant fold your intrinsic, add support to it in the
+ <tt>canConstantFoldCallTo</tt> and <tt>ConstantFoldCall</tt> functions.</li>
+
+<li><tt>llvm/test/Regression/*</tt>: Add test cases for your test cases to the
+ test suite</li>
+</ol>
+
+<p>Once the intrinsic has been added to the system, you must add code generator
+support for it. Generally you must do the following steps:</p>
+
+<dl>
+<dt>Add support to the C backend in <tt>lib/Target/CBackend/</tt></dt>
+
+<dd>Depending on the intrinsic, there are a few ways to implement this. For
+most intrinsics, it makes sense to add code to lower your intrinsic in
+<tt>LowerIntrinsicCall</tt> in <tt>lib/CodeGen/IntrinsicLowering.cpp</tt>.
+Second, if it makes sense to lower the intrinsic to an expanded sequence of C
+code in all cases, just emit the expansion in <tt>visitCallInst</tt> in
+<tt>Writer.cpp</tt>. If the intrinsic has some way to express it with GCC
+(or any other compiler) extensions, it can be conditionally supported based on
+the compiler compiling the CBE output (see <tt>llvm.prefetch</tt> for an
+example).
+Third, if the intrinsic really has no way to be lowered, just have the code
+generator emit code that prints an error message and calls abort if executed.
+</dd>
+
+<dl>
+<dt>Add support to the .td file for the target(s) of your choice in
+ <tt>lib/Target/*/*.td</tt>.</dt>
+
+<dd>This is usually a matter of adding a pattern to the .td file that matches
+ the intrinsic, though it may obviously require adding the instructions you
+ want to generate as well. There are lots of examples in the PowerPC and X86
+ backend to follow.</dd>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="sdnode">Adding a new SelectionDAG node</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>As with intrinsics, adding a new SelectionDAG node to LLVM is much easier
+than adding a new instruction. New nodes are often added to help represent
+instructions common to many targets. These nodes often map to an LLVM
+instruction (add, sub) or intrinsic (byteswap, population count). In other
+cases, new nodes have been added to allow many targets to perform a common task
+(converting between floating point and integer representation) or capture more
+complicated behavior in a single node (rotate).</p>
+
+<ol>
+<li><tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>:
+ Add an enum value for the new SelectionDAG node.</li>
+<li><tt>lib/CodeGen/SelectionDAG/SelectionDAG.cpp</tt>:
+ Add code to print the node to <tt>getOperationName</tt>. If your new node
+ can be evaluated at compile time when given constant arguments (such as an
+ add of a constant with another constant), find the <tt>getNode</tt> method
+ that takes the appropriate number of arguments, and add a case for your node
+ to the switch statement that performs constant folding for nodes that take
+ the same number of arguments as your new node.</li>
+<li><tt>lib/CodeGen/SelectionDAG/LegalizeDAG.cpp</tt>:
+ Add code to <a href="CodeGenerator.html#selectiondag_legalize">legalize,
+ promote, and expand</a> the node as necessary. At a minimum, you will need
+ to add a case statement for your node in <tt>LegalizeOp</tt> which calls
+ LegalizeOp on the node's operands, and returns a new node if any of the
+ operands changed as a result of being legalized. It is likely that not all
+ targets supported by the SelectionDAG framework will natively support the
+ new node. In this case, you must also add code in your node's case
+ statement in <tt>LegalizeOp</tt> to Expand your node into simpler, legal
+ operations. The case for <tt>ISD::UREM</tt> for expanding a remainder into
+ a divide, multiply, and a subtract is a good example.</li>
+<li><tt>lib/CodeGen/SelectionDAG/LegalizeDAG.cpp</tt>:
+ If targets may support the new node being added only at certain sizes, you
+ will also need to add code to your node's case statement in
+ <tt>LegalizeOp</tt> to Promote your node's operands to a larger size, and
+ perform the correct operation. You will also need to add code to
+ <tt>PromoteOp</tt> to do this as well. For a good example, see
+ <tt>ISD::BSWAP</tt>,
+ which promotes its operand to a wider size, performs the byteswap, and then
+ shifts the correct bytes right to emulate the narrower byteswap in the
+ wider type.</li>
+<li><tt>lib/CodeGen/SelectionDAG/LegalizeDAG.cpp</tt>:
+ Add a case for your node in <tt>ExpandOp</tt> to teach the legalizer how to
+ perform the action represented by the new node on a value that has been
+ split into high and low halves. This case will be used to support your
+ node with a 64 bit operand on a 32 bit target.</li>
+<li><tt>lib/CodeGen/SelectionDAG/DAGCombiner.cpp</tt>:
+ If your node can be combined with itself, or other existing nodes in a
+ peephole-like fashion, add a visit function for it, and call that function
+ from <tt></tt>. There are several good examples for simple combines you
+ can do; <tt>visitFABS</tt> and <tt>visitSRL</tt> are good starting places.
+ </li>
+<li><tt>lib/Target/PowerPC/PPCISelLowering.cpp</tt>:
+ Each target has an implementation of the <tt>TargetLowering</tt> class,
+ usually in its own file (although some targets include it in the same
+ file as the DAGToDAGISel). The default behavior for a target is to
+ assume that your new node is legal for all types that are legal for
+ that target. If this target does not natively support your node, then
+ tell the target to either Promote it (if it is supported at a larger
+ type) or Expand it. This will cause the code you wrote in
+ <tt>LegalizeOp</tt> above to decompose your new node into other legal
+ nodes for this target.</li>
+<li><tt>lib/Target/TargetSelectionDAG.td</tt>:
+ Most current targets supported by LLVM generate code using the DAGToDAG
+ method, where SelectionDAG nodes are pattern matched to target-specific
+ nodes, which represent individual instructions. In order for the targets
+ to match an instruction to your new node, you must add a def for that node
+ to the list in this file, with the appropriate type constraints. Look at
+ <tt>add</tt>, <tt>bswap</tt>, and <tt>fadd</tt> for examples.</li>
+<li><tt>lib/Target/PowerPC/PPCInstrInfo.td</tt>:
+ Each target has a tablegen file that describes the target's instruction
+ set. For targets that use the DAGToDAG instruction selection framework,
+ add a pattern for your new node that uses one or more target nodes.
+ Documentation for this is a bit sparse right now, but there are several
+ decent examples. See the patterns for <tt>rotl</tt> in
+ <tt>PPCInstrInfo.td</tt>.</li>
+<li>TODO: document complex patterns.</li>
+<li><tt>llvm/test/Regression/CodeGen/*</tt>: Add test cases for your new node
+ to the test suite. <tt>llvm/test/Regression/CodeGen/X86/bswap.ll</tt> is
+ a good example.</li>
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="instruction">Adding a new instruction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><span class="doc_warning">WARNING: adding instructions changes the bitcode
+format, and it will take some effort to maintain compatibility with
+the previous version.</span> Only add an instruction if it is absolutely
+necessary.</p>
+
+<ol>
+
+<li><tt>llvm/include/llvm/Instruction.def</tt>:
+ add a number for your instruction and an enum name</li>
+
+<li><tt>llvm/include/llvm/Instructions.h</tt>:
+ add a definition for the class that will represent your instruction</li>
+
+<li><tt>llvm/include/llvm/Support/InstVisitor.h</tt>:
+ add a prototype for a visitor to your new instruction type</li>
+
+<li><tt>llvm/lib/AsmParser/Lexer.l</tt>:
+ add a new token to parse your instruction from assembly text file</li>
+
+<li><tt>llvm/lib/AsmParser/llvmAsmParser.y</tt>:
+ add the grammar on how your instruction can be read and what it will
+ construct as a result</li>
+
+<li><tt>llvm/lib/Bitcode/Reader/Reader.cpp</tt>:
+ add a case for your instruction and how it will be parsed from bitcode</li>
+
+<li><tt>llvm/lib/VMCore/Instruction.cpp</tt>:
+ add a case for how your instruction will be printed out to assembly</li>
+
+<li><tt>llvm/lib/VMCore/Instructions.cpp</tt>:
+ implement the class you defined in
+ <tt>llvm/include/llvm/Instructions.h</tt></li>
+
+<li>Test your instruction</li>
+
+<li><tt>llvm/lib/Target/*</tt>:
+ Add support for your instruction to code generators, or add a lowering
+ pass.</li>
+
+<li><tt>llvm/test/Regression/*</tt>: add your test cases to the test suite.</li>
+
+</ol>
+
+<p>Also, you need to implement (or modify) any analyses or passes that you want
+to understand this new instruction.</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="type">Adding a new type</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><span class="doc_warning">WARNING: adding new types changes the bitcode
+format, and will break compatibility with currently-existing LLVM
+installations.</span> Only add new types if it is absolutely necessary.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="fund_type">Adding a fundamental type</a>
+</div>
+
+<div class="doc_text">
+
+<ol>
+
+<li><tt>llvm/include/llvm/Type.h</tt>:
+ add enum for the new type; add static <tt>Type*</tt> for this type</li>
+
+<li><tt>llvm/lib/VMCore/Type.cpp</tt>:
+ add mapping from <tt>TypeID</tt> =&gt; <tt>Type*</tt>;
+ initialize the static <tt>Type*</tt></li>
+
+<li><tt>llvm/lib/AsmReader/Lexer.l</tt>:
+ add ability to parse in the type from text assembly</li>
+
+<li><tt>llvm/lib/AsmReader/llvmAsmParser.y</tt>:
+ add a token for that type</li>
+
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="derived_type">Adding a derived type</a>
+</div>
+
+<div class="doc_text">
+
+<ol>
+<li><tt>llvm/include/llvm/Type.h</tt>:
+ add enum for the new type; add a forward declaration of the type
+ also</li>
+
+<li><tt>llvm/include/llvm/DerivedTypes.h</tt>:
+ add new class to represent new class in the hierarchy; add forward
+ declaration to the TypeMap value type</li>
+
+<li><tt>llvm/lib/VMCore/Type.cpp</tt>:
+ add support for derived type to:
+<div class="doc_code">
+<pre>
+std::string getTypeDescription(const Type &amp;Ty,
+ std::vector&lt;const Type*&gt; &amp;TypeStack)
+bool TypesEqual(const Type *Ty, const Type *Ty2,
+ std::map&lt;const Type*, const Type*&gt; &amp; EqTypes)
+</pre>
+</div>
+ add necessary member functions for type, and factory methods</li>
+
+<li><tt>llvm/lib/AsmReader/Lexer.l</tt>:
+ add ability to parse in the type from text assembly</li>
+
+<li><tt>llvm/lib/BitCode/Writer/Writer.cpp</tt>:
+ modify <tt>void BitcodeWriter::outputType(const Type *T)</tt> to serialize
+ your type</li>
+
+<li><tt>llvm/lib/BitCode/Reader/Reader.cpp</tt>:
+ modify <tt>const Type *BitcodeReader::ParseType()</tt> to read your data
+ type</li>
+
+<li><tt>llvm/lib/VMCore/AsmWriter.cpp</tt>:
+ modify
+<div class="doc_code">
+<pre>
+void calcTypeName(const Type *Ty,
+ std::vector&lt;const Type*&gt; &amp;TypeStack,
+ std::map&lt;const Type*,std::string&gt; &amp;TypeNames,
+ std::string &amp; Result)
+</pre>
+</div>
+ to output the new derived type
+</li>
+
+
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+ <br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/FAQ.html b/docs/FAQ.html
new file mode 100644
index 0000000..96360d0
--- /dev/null
+++ b/docs/FAQ.html
@@ -0,0 +1,713 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM: Frequently Asked Questions</title>
+ <style type="text/css">
+ @import url("llvm.css");
+ .question { font-weight: bold }
+ .answer { margin-left: 2em }
+ </style>
+</head>
+<body>
+
+<div class="doc_title">
+ LLVM: Frequently Asked Questions
+</div>
+
+<ol>
+ <li><a href="#license">License</a>
+ <ol>
+ <li>Why are the LLVM source code and the front-end distributed under different
+ licenses?</li>
+ <li>Does the University of Illinois Open Source License really qualify as an
+ "open source" license?</li>
+ <li>Can I modify LLVM source code and redistribute the modified source?</li>
+ <li>Can I modify LLVM source code and redistribute binaries or other tools
+ based on it, without redistributing the source?</li>
+ </ol></li>
+
+ <li><a href="#source">Source code</a>
+ <ol>
+ <li>In what language is LLVM written?</li>
+ <li>How portable is the LLVM source code?</li>
+ </ol></li>
+
+ <li><a href="#build">Build Problems</a>
+ <ol>
+ <li>When I run configure, it finds the wrong C compiler.</li>
+ <li>The <tt>configure</tt> script finds the right C compiler, but it uses the
+ LLVM linker from a previous build. What do I do?</li>
+ <li>When creating a dynamic library, I get a strange GLIBC error.</li>
+ <li>I've updated my source tree from Subversion, and now my build is trying
+ to use a file/directory that doesn't exist.</li>
+ <li>I've modified a Makefile in my source tree, but my build tree keeps using
+ the old version. What do I do?</li>
+ <li>I've upgraded to a new version of LLVM, and I get strange build
+ errors.</li>
+ <li>I've built LLVM and am testing it, but the tests freeze.</li>
+ <li>Why do test results differ when I perform different types of builds?</li>
+ <li>Compiling LLVM with GCC 3.3.2 fails, what should I do?</li>
+ <li>When I use the test suite, all of the C Backend tests fail. What is
+ wrong?</li>
+ <li>After Subversion update, rebuilding gives the error "No rule to make
+ target".</li>
+ <li><a href="#llvmc">The <tt>llvmc</tt> program gives me errors/doesn't
+ work.</li></a>
+ </ol></li>
+
+ <li><a href="#felangs">Source Languages</a>
+ <ol>
+ <li><a href="#langs">What source languages are supported?</a></li>
+ <li><a href="#langhlsupp">What support is there for higher level source
+ language constructs for building a compiler?</a></li>
+ <li><a href="GetElementPtr.html">I don't understand the GetElementPtr
+ instruction. Help!</a></li>
+ </ol>
+
+ <li><a href="#cfe">Using the GCC Front End</a>
+ <ol>
+ <li>
+ When I compile software that uses a configure script, the configure script
+ thinks my system has all of the header files and libraries it is testing
+ for. How do I get configure to work correctly?
+ </li>
+
+ <li>
+ When I compile code using the LLVM GCC front end, it complains that it
+ cannot find libcrtend.a.
+ </li>
+
+ <li>
+ How can I disable all optimizations when compiling code using the LLVM GCC front end?
+ </li>
+
+ <li><a href="#translatec++">Can I use LLVM to convert C++ code to C code?</a></li>
+
+ </ol>
+ </li>
+
+ <li><a href="#cfe_code">Questions about code generated by the GCC front-end</a>
+ <ol>
+ <li><a href="#__main">What is this <tt>__main()</tt> call that gets inserted into
+ <tt>main()</tt>?</a></li>
+ <li><a href="#iosinit">What is this <tt>llvm.global_ctors</tt> and
+ <tt>_GLOBAL__I__tmp_webcompile...</tt> stuff that happens when I
+ #include &lt;iostream&gt;?</a></li>
+ <li><a href="#codedce">Where did all of my code go??</a></li>
+ <li><a href="#undef">What is this "<tt>undef</tt>" thing that shows up in my code?</a></li>
+ </ol>
+ </li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="http://llvm.org">The LLVM Team</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="license">License</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="question">
+<p>Why are the LLVM source code and the front-end distributed under different
+licenses?</p>
+</div>
+
+<div class="answer">
+<p>The C/C++ front-ends are based on GCC and must be distributed under the GPL.
+Our aim is to distribute LLVM source code under a <em>much less restrictive</em>
+license, in particular one that does not compel users who distribute tools based
+on modifying the source to redistribute the modified source code as well.</p>
+</div>
+
+<div class="question">
+<p>Does the University of Illinois Open Source License really qualify as an
+"open source" license?</p>
+</div>
+
+<div class="answer">
+<p>Yes, the license is <a
+href="http://www.opensource.org/licenses/UoI-NCSA.php">certified</a> by the Open
+Source Initiative (OSI).</p>
+</div>
+
+<div class="question">
+<p>Can I modify LLVM source code and redistribute the modified source?</p>
+</div>
+
+<div class="answer">
+<p>Yes. The modified source distribution must retain the copyright notice and
+follow the three bulletted conditions listed in the <a
+href="http://llvm.org/releases/1.3/LICENSE.TXT">LLVM license</a>.</p>
+</div>
+
+<div class="question">
+<p>Can I modify LLVM source code and redistribute binaries or other tools based
+on it, without redistributing the source?</p>
+</div>
+
+<div class="answer">
+<p>Yes, this is why we distribute LLVM under a less restrictive license than
+GPL, as explained in the first question above.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="source">Source Code</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="question">
+<p>In what language is LLVM written?</p>
+</div>
+
+<div class="answer">
+<p>All of the LLVM tools and libraries are written in C++ with extensive use of
+the STL.</p>
+</div>
+
+<div class="question">
+<p>How portable is the LLVM source code?</p>
+</div>
+
+<div class="answer">
+<p>The LLVM source code should be portable to most modern UNIX-like operating
+systems. Most of the code is written in standard C++ with operating system
+services abstracted to a support library. The tools required to build and test
+LLVM have been ported to a plethora of platforms.</p>
+
+<p>Some porting problems may exist in the following areas:</p>
+
+<ul>
+
+ <li>The GCC front end code is not as portable as the LLVM suite, so it may not
+ compile as well on unsupported platforms.</li>
+
+ <li>The LLVM build system relies heavily on UNIX shell tools, like the Bourne
+ Shell and sed. Porting to systems without these tools (MacOS 9, Plan 9) will
+ require more effort.</li>
+
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="build">Build Problems</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="question">
+<p>When I run configure, it finds the wrong C compiler.</p>
+</div>
+
+<div class="answer">
+
+<p>The <tt>configure</tt> script attempts to locate first <tt>gcc</tt> and then
+<tt>cc</tt>, unless it finds compiler paths set in <tt>CC</tt> and <tt>CXX</tt>
+for the C and C++ compiler, respectively.</p>
+
+<p>If <tt>configure</tt> finds the wrong compiler, either adjust your
+<tt>PATH</tt> environment variable or set <tt>CC</tt> and <tt>CXX</tt>
+explicitly.</p>
+
+</div>
+
+<div class="question">
+<p>The <tt>configure</tt> script finds the right C compiler, but it uses the
+LLVM linker from a previous build. What do I do?</p>
+</div>
+
+<div class="answer">
+<p>The <tt>configure</tt> script uses the <tt>PATH</tt> to find executables, so
+if it's grabbing the wrong linker/assembler/etc, there are two ways to fix
+it:</p>
+
+<ol>
+
+ <li><p>Adjust your <tt>PATH</tt> environment variable so that the correct
+ program appears first in the <tt>PATH</tt>. This may work, but may not be
+ convenient when you want them <i>first</i> in your path for other
+ work.</p></li>
+
+ <li><p>Run <tt>configure</tt> with an alternative <tt>PATH</tt> that is
+ correct. In a Borne compatible shell, the syntax would be:</p>
+
+ <p><tt>PATH=[the path without the bad program] ./configure ...</tt></p>
+
+ <p>This is still somewhat inconvenient, but it allows <tt>configure</tt>
+ to do its work without having to adjust your <tt>PATH</tt>
+ permanently.</p></li>
+
+</ol>
+
+</div>
+
+<div class="question">
+<p>When creating a dynamic library, I get a strange GLIBC error.</p>
+</div>
+
+<div class="answer">
+<p>Under some operating systems (i.e. Linux), libtool does not work correctly if
+GCC was compiled with the --disable-shared option. To work around this, install
+your own version of GCC that has shared libraries enabled by default.</p>
+</div>
+
+<div class="question">
+ <p>I've updated my source tree from Subversion, and now my build is trying to
+ use a file/directory that doesn't exist.</p>
+</div>
+
+<div class="answer">
+<p>You need to re-run configure in your object directory. When new Makefiles
+are added to the source tree, they have to be copied over to the object tree in
+order to be used by the build.</p>
+</div>
+
+<div class="question">
+<p>I've modified a Makefile in my source tree, but my build tree keeps using the
+old version. What do I do?</p>
+</div>
+
+<div class="answer">
+
+<p>If the Makefile already exists in your object tree, you
+can just run the following command in the top level directory of your object
+tree:</p>
+
+<p><tt>./config.status &lt;relative path to Makefile&gt;</tt><p>
+
+<p>If the Makefile is new, you will have to modify the configure script to copy
+it over.</p>
+
+</div>
+
+<div class="question">
+<p>I've upgraded to a new version of LLVM, and I get strange build errors.</p>
+</div>
+
+<div class="answer">
+
+<p>Sometimes, changes to the LLVM source code alters how the build system works.
+Changes in libtool, autoconf, or header file dependencies are especially prone
+to this sort of problem.</p>
+
+<p>The best thing to try is to remove the old files and re-build. In most
+cases, this takes care of the problem. To do this, just type <tt>make
+clean</tt> and then <tt>make</tt> in the directory that fails to build.</p>
+
+</div>
+
+<div class="question">
+<p>I've built LLVM and am testing it, but the tests freeze.</p>
+</div>
+
+<div class="answer">
+
+<p>This is most likely occurring because you built a profile or release
+(optimized) build of LLVM and have not specified the same information on the
+<tt>gmake</tt> command line.</p>
+
+<p>For example, if you built LLVM with the command:</p>
+
+<div class="doc_code">
+<pre>
+% gmake ENABLE_PROFILING=1
+</pre>
+</div>
+
+<p>...then you must run the tests with the following commands:</p>
+
+<div class="doc_code">
+<pre>
+% cd llvm/test
+% gmake ENABLE_PROFILING=1
+</pre>
+</div>
+
+</div>
+
+<div class="question">
+<p>Why do test results differ when I perform different types of builds?</p>
+</div>
+
+<div class="answer">
+
+<p>The LLVM test suite is dependent upon several features of the LLVM tools and
+libraries.</p>
+
+<p>First, the debugging assertions in code are not enabled in optimized or
+profiling builds. Hence, tests that used to fail may pass.</p>
+
+<p>Second, some tests may rely upon debugging options or behavior that is only
+available in the debug build. These tests will fail in an optimized or profile
+build.</p>
+
+</div>
+
+<div class="question">
+<p>Compiling LLVM with GCC 3.3.2 fails, what should I do?</p>
+</div>
+
+<div class="answer">
+<p>This is <a href="http://gcc.gnu.org/PR?13392">a bug in GCC</a>, and
+ affects projects other than LLVM. Try upgrading or downgrading your GCC.</p>
+</div>
+
+<div class="question">
+ <p>After Subversion update, rebuilding gives the error
+ "No rule to make target".</p>
+</div>
+
+<div class="answer">
+<p>If the error is of the form:</p>
+
+<div class="doc_code">
+<pre>
+gmake[2]: *** No rule to make target `/path/to/somefile', needed by
+`/path/to/another/file.d'.<br>
+Stop.
+</pre>
+</div>
+
+<p>This may occur anytime files are moved within the Subversion repository or
+removed entirely. In this case, the best solution is to erase all
+<tt>.d</tt> files, which list dependencies for source files, and rebuild:</p>
+
+<div class="doc_code">
+<pre>
+% cd $LLVM_OBJ_DIR
+% rm -f `find . -name \*\.d`
+% gmake
+</pre>
+</div>
+
+<p>In other cases, it may be necessary to run <tt>make clean</tt> before
+rebuilding.</p>
+</div>
+
+<div class="question">
+ <a name="llvmc"<p>The <tt>llvmc</tt> program gives me errors/doesn't
+ work.</p></a>
+</div>
+
+<div class="answer">
+ <p><tt>llvmc</tt> is experimental and isn't really supported. We suggest
+ using <tt>llvm-gcc</tt> instead.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="felangs">Source Languages</a></div>
+
+<div class="question"><p>
+ <a name="langs">What source languages are supported?</a></p>
+</div>
+<div class="answer">
+ <p>LLVM currently has full support for C and C++ source languages. These are
+ available through a special version of GCC that LLVM calls the
+ <a href="#cfe">C Front End</a></p>
+ <p>There is an incomplete version of a Java front end available in the
+ <tt>java</tt> module. There is no documentation on this yet so
+ you'll need to download the code, compile it, and try it.</p>
+ <p>In the <tt>stacker</tt> module is a compiler and runtime
+ library for the Stacker language, a "toy" language loosely based on Forth.</p>
+ <p>The PyPy developers are working on integrating LLVM into the PyPy backend
+ so that PyPy language can translate to LLVM.</p>
+</div>
+<div class="question"><a name="langhlsupp">
+ <p>What support is there for a higher level source language constructs for
+ building a compiler?</a></p>
+</div>
+<div class="answer">
+ <p>Currently, there isn't much. LLVM supports an intermediate representation
+ which is useful for code representation but will not support the high level
+ (abstract syntax tree) representation needed by most compilers. There are no
+ facilities for lexical nor semantic analysis. There is, however, a <i>mostly
+ implemented</i> configuration-driven
+ <a href="CompilerDriver.html">compiler driver</a> which simplifies the task
+ of running optimizations, linking, and executable generation.</p>
+</div>
+
+<div class="question"><a name="langhlsupp">
+ <p>I don't understand the GetElementPtr
+ instruction. Help!</a></p>
+</div>
+<div class="answer">
+ <p>See <a href="GetElementPtr.html">The Often Misunderstood GEP
+ Instruction</a>.</li>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="cfe">Using the GCC Front End</a>
+</div>
+
+<div class="question">
+<p>
+When I compile software that uses a configure script, the configure script
+thinks my system has all of the header files and libraries it is testing for.
+How do I get configure to work correctly?
+</p>
+</div>
+
+<div class="answer">
+<p>
+The configure script is getting things wrong because the LLVM linker allows
+symbols to be undefined at link time (so that they can be resolved during JIT
+or translation to the C back end). That is why configure thinks your system
+"has everything."
+</p>
+<p>
+To work around this, perform the following steps:
+</p>
+<ol>
+ <li>Make sure the CC and CXX environment variables contains the full path to
+ the LLVM GCC front end.</li>
+
+ <li>Make sure that the regular C compiler is first in your PATH. </li>
+
+ <li>Add the string "-Wl,-native" to your CFLAGS environment variable.</li>
+</ol>
+
+<p>
+This will allow the <tt>llvm-ld</tt> linker to create a native code executable
+instead of shell script that runs the JIT. Creating native code requires
+standard linkage, which in turn will allow the configure script to find out if
+code is not linking on your system because the feature isn't available on your
+system.</p>
+</div>
+
+<div class="question">
+<p>
+When I compile code using the LLVM GCC front end, it complains that it cannot
+find libcrtend.a.
+</p>
+</div>
+
+<div class="answer">
+<p>
+The only way this can happen is if you haven't installed the runtime library. To
+correct this, do:</p>
+
+<div class="doc_code">
+<pre>
+% cd llvm/runtime
+% make clean ; make install-bytecode
+</pre>
+</div>
+</div>
+
+<div class="question">
+<p>
+How can I disable all optimizations when compiling code using the LLVM GCC front end?
+</p>
+</div>
+
+<div class="answer">
+<p>
+Passing "-Wa,-disable-opt -Wl,-disable-opt" will disable *all* cleanup and
+optimizations done at the llvm level, leaving you with the truly horrible
+code that you desire.
+</p>
+</div>
+
+
+<div class="question">
+<p>
+<a name="translatec++">Can I use LLVM to convert C++ code to C code?</a>
+</p>
+</div>
+
+<div class="answer">
+<p>Yes, you can use LLVM to convert code from any language LLVM supports to C.
+Note that the generated C code will be very low level (all loops are lowered
+to gotos, etc) and not very pretty (comments are stripped, original source
+formatting is totally lost, variables are renamed, expressions are regrouped),
+so this may not be what you're looking for. However, this is a good way to add
+C++ support for a processor that does not otherwise have a C++ compiler.
+</p>
+
+<p>Use commands like this:</p>
+
+<ol>
+<li><p>Compile your program as normal with llvm-g++:</p></li>
+
+<div class="doc_code">
+<pre>
+% llvm-g++ x.cpp -o program
+</pre>
+</div>
+
+<p>or:</p>
+
+<div class="doc_code">
+<pre>
+% llvm-g++ a.cpp -c
+% llvm-g++ b.cpp -c
+% llvm-g++ a.o b.o -o program
+</pre>
+</div>
+
+<p>With llvm-gcc3, this will generate program and program.bc. The .bc file is
+the LLVM version of the program all linked together.</p>
+
+<li><p>Convert the LLVM code to C code, using the LLC tool with the C
+backend:</p></li>
+
+<div class="doc_code">
+<pre>
+% llc -march=c program.bc -o program.c
+</pre>
+</div>
+
+<li><p>Finally, compile the c file:</p></li>
+
+<div class="doc_code">
+<pre>
+% cc x.c
+</pre>
+</div>
+
+</ol>
+
+<p>Note that, by default, the C backend does not support exception handling.
+If you want/need it for a certain program, you can enable it by passing
+"-enable-correct-eh-support" to the llc program. The resultant code will
+use setjmp/longjmp to implement exception support that is correct but
+relatively slow.
+</p>
+
+<p>Also note: this specific sequence of commands won't work if you use a
+function defined in the C++ runtime library (or any other C++ library). To
+access an external C++ library, you must manually
+compile libstdc++ to LLVM bitcode, statically link it into your program, then
+use the commands above to convert the whole result into C code. Alternatively,
+you can compile the libraries and your application into two different chunks
+of C code and link them.</p>
+
+</div>
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="cfe_code">Questions about code generated by the GCC front-end</a>
+</div>
+
+<div class="question"><p>
+<a name="__main"></a>
+What is this <tt>__main()</tt> call that gets inserted into <tt>main()</tt>?
+</p></div>
+
+<div class="answer">
+<p>
+The <tt>__main</tt> call is inserted by the C/C++ compiler in order to guarantee
+that static constructors and destructors are called when the program starts up
+and shuts down. In C, you can create static constructors and destructors by
+using GCC extensions, and in C++ you can do so by creating a global variable
+whose class has a ctor or dtor.
+</p>
+
+<p>
+The actual implementation of <tt>__main</tt> lives in the
+<tt>llvm/runtime/GCCLibraries/crtend/</tt> directory in the source-base, and is
+linked in automatically when you link the program.
+</p>
+</div>
+
+<!--=========================================================================-->
+
+<div class="question">
+<a name="iosinit"></a>
+<p> What is this <tt>llvm.global_ctors</tt> and
+<tt>_GLOBAL__I__tmp_webcompile...</tt> stuff that happens when I #include
+&lt;iostream&gt;?</p>
+</div>
+
+<div class="answer">
+
+<p>If you #include the &lt;iostream&gt; header into a C++ translation unit, the
+file will probably use the <tt>std::cin</tt>/<tt>std::cout</tt>/... global
+objects. However, C++ does not guarantee an order of initialization between
+static objects in different translation units, so if a static ctor/dtor in your
+.cpp file used <tt>std::cout</tt>, for example, the object would not necessarily
+be automatically initialized before your use.</p>
+
+<p>To make <tt>std::cout</tt> and friends work correctly in these scenarios, the
+STL that we use declares a static object that gets created in every translation
+unit that includes <tt>&lt;iostream&gt;</tt>. This object has a static
+constructor and destructor that initializes and destroys the global iostream
+objects before they could possibly be used in the file. The code that you see
+in the .ll file corresponds to the constructor and destructor registration code.
+</p>
+
+<p>If you would like to make it easier to <b>understand</b> the LLVM code
+generated by the compiler in the demo page, consider using <tt>printf()</tt>
+instead of <tt>iostream</tt>s to print values.</p>
+
+</div>
+
+<!--=========================================================================-->
+
+<div class="question"><p>
+<a name="codedce"></a>
+Where did all of my code go??
+</p></div>
+
+<div class="answer">
+<p>
+If you are using the LLVM demo page, you may often wonder what happened to all
+of the code that you typed in. Remember that the demo script is running the
+code through the LLVM optimizers, so if your code doesn't actually do anything
+useful, it might all be deleted.
+</p>
+
+<p>
+To prevent this, make sure that the code is actually needed. For example, if
+you are computing some expression, return the value from the function instead of
+leaving it in a local variable. If you really want to constrain the optimizer,
+you can read from and assign to <tt>volatile</tt> global variables.
+</p>
+</div>
+
+<!--=========================================================================-->
+
+<div class="question"><p>
+<a name="undef"></a>
+<p>What is this "<tt>undef</tt>" thing that shows up in my code?
+</p></div>
+
+<div class="answer">
+<p>
+<a href="LangRef.html#undef"><tt>undef</tt></a> is the LLVM way of representing
+a value that is not defined. You can get these if you do not initialize a
+variable before you use it. For example, the C function:</p>
+
+<div class="doc_code">
+<pre>
+int X() { int i; return i; }
+</pre>
+</div>
+
+<p>Is compiled to "<tt>ret i32 undef</tt>" because "<tt>i</tt>" never has
+a value specified for it.</p>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html
new file mode 100644
index 0000000..0accd0c
--- /dev/null
+++ b/docs/GarbageCollection.html
@@ -0,0 +1,534 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>Accurate Garbage Collection with LLVM</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ Accurate Garbage Collection with LLVM
+</div>
+
+<ol>
+ <li><a href="#introduction">Introduction</a>
+ <ul>
+ <li><a href="#feature">GC features provided and algorithms supported</a></li>
+ </ul>
+ </li>
+
+ <li><a href="#interfaces">Interfaces for user programs</a>
+ <ul>
+ <li><a href="#roots">Identifying GC roots on the stack: <tt>llvm.gcroot</tt></a></li>
+ <li><a href="#allocate">Allocating memory from the GC</a></li>
+ <li><a href="#barriers">Reading and writing references to the heap</a></li>
+ <li><a href="#explicit">Explicit invocation of the garbage collector</a></li>
+ </ul>
+ </li>
+
+ <li><a href="#gcimpl">Implementing a garbage collector</a>
+ <ul>
+ <li><a href="#llvm_gc_readwrite">Implementing <tt>llvm_gc_read</tt> and <tt>llvm_gc_write</tt></a></li>
+ <li><a href="#callbacks">Callback functions used to implement the garbage collector</a></li>
+ </ul>
+ </li>
+ <li><a href="#gcimpls">GC implementations available</a>
+ <ul>
+ <li><a href="#semispace">SemiSpace - A simple copying garbage collector</a></li>
+ </ul>
+ </li>
+
+<!--
+ <li><a href="#codegen">Implementing GC support in a code generator</a></li>
+-->
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="introduction">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Garbage collection is a widely used technique that frees the programmer from
+having to know the life-times of heap objects, making software easier to produce
+and maintain. Many programming languages rely on garbage collection for
+automatic memory management. There are two primary forms of garbage collection:
+conservative and accurate.</p>
+
+<p>Conservative garbage collection often does not require any special support
+from either the language or the compiler: it can handle non-type-safe
+programming languages (such as C/C++) and does not require any special
+information from the compiler. The
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/">Boehm collector</a> is
+an example of a state-of-the-art conservative collector.</p>
+
+<p>Accurate garbage collection requires the ability to identify all pointers in
+the program at run-time (which requires that the source-language be type-safe in
+most cases). Identifying pointers at run-time requires compiler support to
+locate all places that hold live pointer variables at run-time, including the
+<a href="#roots">processor stack and registers</a>.</p>
+
+<p>
+Conservative garbage collection is attractive because it does not require any
+special compiler support, but it does have problems. In particular, because the
+conservative garbage collector cannot <i>know</i> that a particular word in the
+machine is a pointer, it cannot move live objects in the heap (preventing the
+use of compacting and generational GC algorithms) and it can occasionally suffer
+from memory leaks due to integer values that happen to point to objects in the
+program. In addition, some aggressive compiler transformations can break
+conservative garbage collectors (though these seem rare in practice).
+</p>
+
+<p>
+Accurate garbage collectors do not suffer from any of these problems, but they
+can suffer from degraded scalar optimization of the program. In particular,
+because the runtime must be able to identify and update all pointers active in
+the program, some optimizations are less effective. In practice, however, the
+locality and performance benefits of using aggressive garbage allocation
+techniques dominates any low-level losses.
+</p>
+
+<p>
+This document describes the mechanisms and interfaces provided by LLVM to
+support accurate garbage collection.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="feature">GC features provided and algorithms supported</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+LLVM provides support for a broad class of garbage collection algorithms,
+including compacting semi-space collectors, mark-sweep collectors, generational
+collectors, and even reference counting implementations. It includes support
+for <a href="#barriers">read and write barriers</a>, and associating <a
+href="#roots">meta-data with stack objects</a> (used for tagless garbage
+collection). All LLVM code generators support garbage collection, including the
+C backend.
+</p>
+
+<p>
+We hope that the primitive support built into LLVM is sufficient to support a
+broad class of garbage collected languages, including Scheme, ML, scripting
+languages, Java, C#, etc. That said, the implemented garbage collectors may
+need to be extended to support language-specific features such as finalization,
+weak references, or other features. As these needs are identified and
+implemented, they should be added to this specification.
+</p>
+
+<p>
+LLVM does not currently support garbage collection of multi-threaded programs or
+GC-safe points other than function calls, but these will be added in the future
+as there is interest.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="interfaces">Interfaces for user programs</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section describes the interfaces provided by LLVM and by the garbage
+collector run-time that should be used by user programs. As such, this is the
+interface that front-end authors should generate code for.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="roots">Identifying GC roots on the stack: <tt>llvm.gcroot</tt></a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><tt>
+ void %llvm.gcroot(&lt;ty&gt;** %ptrloc, &lt;ty2&gt;* %metadata)
+</tt></div>
+
+<p>
+The <tt>llvm.gcroot</tt> intrinsic is used to inform LLVM of a pointer variable
+on the stack. The first argument contains the address of the variable on the
+stack, and the second contains a pointer to metadata that should be associated
+with the pointer (which <b>must</b> be a constant or global value address). At
+runtime, the <tt>llvm.gcroot</tt> intrinsic stores a null pointer into the
+specified location to initialize the pointer.</p>
+
+<p>
+Consider the following fragment of Java code:
+</p>
+
+<pre>
+ {
+ Object X; // A null-initialized reference to an object
+ ...
+ }
+</pre>
+
+<p>
+This block (which may be located in the middle of a function or in a loop nest),
+could be compiled to this LLVM code:
+</p>
+
+<pre>
+Entry:
+ ;; In the entry block for the function, allocate the
+ ;; stack space for X, which is an LLVM pointer.
+ %X = alloca %Object*
+ ...
+
+ ;; "CodeBlock" is the block corresponding to the start
+ ;; of the scope above.
+CodeBlock:
+ ;; Initialize the object, telling LLVM that it is now live.
+ ;; Java has type-tags on objects, so it doesn't need any
+ ;; metadata.
+ call void %llvm.gcroot(%Object** %X, sbyte* null)
+ ...
+
+ ;; As the pointer goes out of scope, store a null value into
+ ;; it, to indicate that the value is no longer live.
+ store %Object* null, %Object** %X
+ ...
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="allocate">Allocating memory from the GC</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><tt>
+ sbyte *%llvm_gc_allocate(unsigned %Size)
+</tt></div>
+
+<p>The <tt>llvm_gc_allocate</tt> function is a global function defined by the
+garbage collector implementation to allocate memory. It returns a
+zeroed-out block of memory of the appropriate size.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="barriers">Reading and writing references to the heap</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><tt>
+ sbyte *%llvm.gcread(sbyte *, sbyte **)<br>
+ void %llvm.gcwrite(sbyte*, sbyte*, sbyte**)
+</tt></div>
+
+<p>Several of the more interesting garbage collectors (e.g., generational
+collectors) need to be informed when the mutator (the program that needs garbage
+collection) reads or writes object references into the heap. In the case of a
+generational collector, it needs to keep track of which "old" generation objects
+have references stored into them. The amount of code that typically needs to be
+executed is usually quite small (and not on the critical path of any
+computation), so the overall performance impact of the inserted code is
+tolerable.</p>
+
+<p>To support garbage collectors that use read or write barriers, LLVM provides
+the <tt>llvm.gcread</tt> and <tt>llvm.gcwrite</tt> intrinsics. The first
+intrinsic has exactly the same semantics as a non-volatile LLVM load and the
+second has the same semantics as a non-volatile LLVM store, with the
+additions that they also take a pointer to the start of the memory
+object as an argument. At code generation
+time, these intrinsics are replaced with calls into the garbage collector
+(<tt><a href="#llvm_gc_readwrite">llvm_gc_read</a></tt> and <tt><a
+href="#llvm_gc_readwrite">llvm_gc_write</a></tt> respectively), which are then
+inlined into the code.
+</p>
+
+<p>
+If you are writing a front-end for a garbage collected language, every load or
+store of a reference from or to the heap should use these intrinsics instead of
+normal LLVM loads/stores.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="initialize">Garbage collector startup and initialization</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><tt>
+ void %llvm_gc_initialize(unsigned %InitialHeapSize)
+</tt></div>
+
+<p>
+The <tt>llvm_gc_initialize</tt> function should be called once before any other
+garbage collection functions are called. This gives the garbage collector the
+chance to initialize itself and allocate the heap spaces. The initial heap size
+to allocate should be specified as an argument.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="explicit">Explicit invocation of the garbage collector</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><tt>
+ void %llvm_gc_collect()
+</tt></div>
+
+<p>
+The <tt>llvm_gc_collect</tt> function is exported by the garbage collector
+implementations to provide a full collection, even when the heap is not
+exhausted. This can be used by end-user code as a hint, and may be ignored by
+the garbage collector.
+</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="gcimpl">Implementing a garbage collector</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Implementing a garbage collector for LLVM is fairly straight-forward. The LLVM
+garbage collectors are provided in a form that makes them easy to link into the
+language-specific runtime that a language front-end would use. They require
+functionality from the language-specific runtime to get information about <a
+href="#gcdescriptors">where pointers are located in heap objects</a>.
+</p>
+
+<p>The
+implementation must include the <a
+href="#allocate"><tt>llvm_gc_allocate</tt></a> and <a
+href="#explicit"><tt>llvm_gc_collect</tt></a> functions, and it must implement
+the <a href="#llvm_gc_readwrite">read/write barrier</a> functions as well. To
+do this, it will probably have to <a href="#traceroots">trace through the roots
+from the stack</a> and understand the <a href="#gcdescriptors">GC descriptors
+for heap objects</a>. Luckily, there are some <a href="#gcimpls">example
+implementations</a> available.
+</p>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="llvm_gc_readwrite">Implementing <tt>llvm_gc_read</tt> and <tt>llvm_gc_write</tt></a>
+</div>
+
+<div class="doc_text">
+ <div class="doc_code"><tt>
+ void *llvm_gc_read(void*, void **)<br>
+ void llvm_gc_write(void*, void *, void**)
+ </tt></div>
+
+<p>
+These functions <i>must</i> be implemented in every garbage collector, even if
+they do not need read/write barriers. In this case, just load or store the
+pointer, then return.
+</p>
+
+<p>
+If an actual read or write barrier is needed, it should be straight-forward to
+implement it.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="callbacks">Callback functions used to implement the garbage collector</a>
+</div>
+
+<div class="doc_text">
+<p>
+Garbage collector implementations make use of call-back functions that are
+implemented by other parts of the LLVM system.
+</p>
+</div>
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection">
+ <a name="traceroots">Tracing GC pointers from the program stack</a>
+</div>
+
+<div class="doc_text">
+ <div class="doc_code"><tt>
+ void llvm_cg_walk_gcroots(void (*FP)(void **Root, void *Meta));
+ </tt></div>
+
+<p>
+The <tt>llvm_cg_walk_gcroots</tt> function is a function provided by the code
+generator that iterates through all of the GC roots on the stack, calling the
+specified function pointer with each record. For each GC root, the address of
+the pointer and the meta-data (from the <a
+href="#roots"><tt>llvm.gcroot</tt></a> intrinsic) are provided.
+</p>
+</div>
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection">
+ <a name="staticroots">Tracing GC pointers from static roots</a>
+</div>
+
+<div class="doc_text">
+TODO
+</div>
+
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection">
+ <a name="gcdescriptors">Tracing GC pointers from heap objects</a>
+</div>
+
+<div class="doc_text">
+<p>
+The three most common ways to keep track of where pointers live in heap objects
+are (listed in order of space overhead required):</p>
+
+<ol>
+<li>In languages with polymorphic objects, pointers from an object header are
+usually used to identify the GC pointers in the heap object. This is common for
+object-oriented languages like Self, Smalltalk, Java, or C#.</li>
+
+<li>If heap objects are not polymorphic, often the "shape" of the heap can be
+determined from the roots of the heap or from some other meta-data [<a
+href="#appel89">Appel89</a>, <a href="#goldberg91">Goldberg91</a>, <a
+href="#tolmach94">Tolmach94</a>]. In this case, the garbage collector can
+propagate the information around from meta data stored with the roots. This
+often eliminates the need to have a header on objects in the heap. This is
+common in the ML family.</li>
+
+<li>If all heap objects have pointers in the same locations, or pointers can be
+distinguished just by looking at them (e.g., the low order bit is clear), no
+book-keeping is needed at all. This is common for Lisp-like languages.</li>
+</ol>
+
+<p>The LLVM garbage collectors are capable of supporting all of these styles of
+language, including ones that mix various implementations. To do this, it
+allows the source-language to associate meta-data with the <a
+href="#roots">stack roots</a>, and the heap tracing routines can propagate the
+information. In addition, LLVM allows the front-end to extract GC information
+from in any form from a specific object pointer (this supports situations #1 and
+#3).
+</p>
+
+<p><b>Making this efficient</b></p>
+
+
+
+</div>
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="gcimpls">GC implementations available</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+To make this more concrete, the currently implemented LLVM garbage collectors
+all live in the <tt>llvm/runtime/GC/*</tt> directories in the LLVM source-base.
+If you are interested in implementing an algorithm, there are many interesting
+possibilities (mark/sweep, a generational collector, a reference counting
+collector, etc), or you could choose to improve one of the existing algorithms.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="semispace">SemiSpace - A simple copying garbage collector</a>
+</div>
+
+<div class="doc_text">
+<p>
+SemiSpace is a very simple copying collector. When it starts up, it allocates
+two blocks of memory for the heap. It uses a simple bump-pointer allocator to
+allocate memory from the first block until it runs out of space. When it runs
+out of space, it traces through all of the roots of the program, copying blocks
+to the other half of the memory space.
+</p>
+
+</div>
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection">
+ Possible Improvements
+</div>
+
+<div class="doc_text">
+
+<p>
+If a collection cycle happens and the heap is not compacted very much (say less
+than 25% of the allocated memory was freed), the memory regions should be
+doubled in size.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="references">References</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><a name="appel89">[Appel89]</a> Runtime Tags Aren't Necessary. Andrew
+W. Appel. Lisp and Symbolic Computation 19(7):703-705, July 1989.</p>
+
+<p><a name="goldberg91">[Goldberg91]</a> Tag-free garbage collection for
+strongly typed programming languages. Benjamin Goldberg. ACM SIGPLAN
+PLDI'91.</p>
+
+<p><a name="tolmach94">[Tolmach94]</a> Tag-free garbage collection using
+explicit type parameters. Andrew Tolmach. Proceedings of the 1994 ACM
+conference on LISP and functional programming.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html
new file mode 100644
index 0000000..710273f
--- /dev/null
+++ b/docs/GetElementPtr.html
@@ -0,0 +1,311 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <title>The Often Misunderstood GEP Instruction</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+ <style type="text/css">
+ TABLE { text-align: left; border: 1px solid black; border-collapse: collapse; margin: 0 0 0 0; }
+ </style>
+</head>
+<body>
+
+<div class="doc_title">
+ The Often Misunderstood GEP Instruction
+</div>
+
+<ol>
+ <li><a href="#intro">Introduction</a></li>
+ <li><a href="#questions">The Questions</a>
+ <ol>
+ <li><a href="#extra_index">Why is the extra 0 index required?</a></li>
+ <li><a href="#deref">What is dereferenced by GEP?</a></li>
+ <li><a href="#firstptr">Why can you index through the first pointer but not
+ subsequent ones?</a></li>
+ <li><a href="#lead0">Why don't GEP x,0,0,1 and GEP x,1 alias? </a></li>
+ <li><a href="#trail0">Why do GEP x,1,0,0 and GEP x,1 alias? </a></li>
+ </ol></li>
+ <li><a href="#summary">Summary</a></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by: <a href="mailto:rspencer@reidspencer.com">Reid Spencer</a>.</p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro"><b>Introduction</b></a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+ <p>This document seeks to dispel the mystery and confusion surrounding LLVM's
+ GetElementPtr (GEP) instruction. Questions about the wiley GEP instruction are
+ probably the most frequently occuring questions once a developer gets down to
+ coding with LLVM. Here we lay out the sources of confusion and show that the
+ GEP instruction is really quite simple.
+ </p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="questions"><b>The Questions</b></a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+ <p>When people are first confronted with the GEP instruction, they tend to
+ relate it to known concepts from other programming paradigms, most notably C
+ array indexing and field selection. However, GEP is a little different and
+ this leads to the following questions, all of which are answered in the
+ following sections.</p>
+ <ol>
+ <li><a href="#firstptr">What is the first index of the GEP instruction?</a>
+ </li>
+ <li><a href="#extra_index">Why is the extra 0 index required?</a></li>
+ <li><a href="#deref">What is dereferenced by GEP?</a></li>
+ <li><a href="#lead0">Why don't GEP x,0,0,1 and GEP x,1 alias? </a></li>
+ <li><a href="#trail0">Why do GEP x,1,0,0 and GEP x,1 alias? </a></li>
+ </ol>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+ <a name="firstptr"><b>What is the first index of the GEP instruction?</b></a>
+</div>
+<div class="doc_text">
+ <p>Quick answer: The index stepping through the first operand.</p>
+ <p>The confusion with the first index usually arises from thinking about
+ the GetElementPtr instruction as if it was a C index operator. They aren't the
+ same. For example, when we write, in "C":</p>
+ <pre>
+ AType* Foo;
+ ...
+ X = &amp;Foo-&gt;F;</pre>
+ <p>it is natural to think that there is only one index, the selection of the
+ field <tt>F</tt>. However, in this example, <tt>Foo</tt> is a pointer. That
+ pointer must be indexed explicitly in LLVM. C, on the other hand, indexs
+ through it transparently. To arrive at the same address location as the C
+ code, you would provide the GEP instruction with two index operands. The
+ first operand indexes through the pointer; the second operand indexes the
+ field <tt>F</tt> of the structure, just as if you wrote:</p>
+ <pre>
+ X = &amp;Foo[0].F;</pre>
+ <p>Sometimes this question gets rephrased as:</p>
+ <blockquote><p><i>Why is it okay to index through the first pointer, but
+ subsequent pointers won't be dereferenced?</i></p></blockquote>
+ <p>The answer is simply because memory does not have to be accessed to
+ perform the computation. The first operand to the GEP instruction must be a
+ value of a pointer type. The value of the pointer is provided directly to
+ the GEP instruction as an operand without any need for accessing memory. It
+ must, therefore be indexed and requires an index operand. Consider this
+ example:</p>
+ <pre>
+ struct munger_struct {
+ int f1;
+ int f2;
+ };
+ void munge(struct munger_struct *P)
+ {
+ P[0].f1 = P[1].f1 + P[2].f2;
+ }
+ ...
+ munger_struct Array[3];
+ ...
+ munge(Array);</pre>
+ <p>In this "C" example, the front end compiler (llvm-gcc) will generate three
+ GEP instructions for the three indices through "P" in the assignment
+ statement. The function argument <tt>P</tt> will be the first operand of each
+ of these GEP instructions. The second operand indexes through that pointer.
+ The third operand will be the field offset into the
+ <tt>struct munger_struct</tt> type, for either the <tt>f1</tt> or
+ <tt>f2</tt> field. So, in LLVM assembly the <tt>munge</tt> function looks
+ like:</p>
+ <pre>
+ void %munge(%struct.munger_struct* %P) {
+ entry:
+ %tmp = getelementptr %struct.munger_struct* %P, i32 1, i32 0
+ %tmp = load i32* %tmp
+ %tmp6 = getelementptr %struct.munger_struct* %P, i32 2, i32 1
+ %tmp7 = load i32* %tmp6
+ %tmp8 = add i32 %tmp7, %tmp
+ %tmp9 = getelementptr %struct.munger_struct* %P, i32 0, i32 0
+ store i32 %tmp8, i32* %tmp9
+ ret void
+ }</pre>
+ <p>In each case the first operand is the pointer through which the GEP
+ instruction starts. The same is true whether the first operand is an
+ argument, allocated memory, or a global variable. </p>
+ <p>To make this clear, let's consider a more obtuse example:</p>
+ <pre>
+ %MyVar = unintialized global i32
+ ...
+ %idx1 = getelementptr i32* %MyVar, i64 0
+ %idx2 = getelementptr i32* %MyVar, i64 1
+ %idx3 = getelementptr i32* %MyVar, i64 2</pre>
+ <p>These GEP instructions are simply making address computations from the
+ base address of <tt>MyVar</tt>. They compute, as follows (using C syntax):
+ </p>
+ <ul>
+ <li> idx1 = (char*) &amp;MyVar + 0</li>
+ <li> idx2 = (char*) &amp;MyVar + 4</li>
+ <li> idx3 = (char*) &amp;MyVar + 8</li>
+ </ul>
+ <p>Since the type <tt>i32</tt> is known to be four bytes long, the indices
+ 0, 1 and 2 translate into memory offsets of 0, 4, and 8, respectively. No
+ memory is accessed to make these computations because the address of
+ <tt>%MyVar</tt> is passed directly to the GEP instructions.</p>
+ <p>The obtuse part of this example is in the cases of <tt>%idx2</tt> and
+ <tt>%idx3</tt>. They result in the computation of addresses that point to
+ memory past the end of the <tt>%MyVar</tt> global, which is only one
+ <tt>i32</tt> long, not three <tt>i32</tt>s long. While this is legal in LLVM,
+ it is inadvisable because any load or store with the pointer that results
+ from these GEP instructions would produce undefined results.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+ <a name="extra_index"><b>Why is the extra 0 index required?</b></a>
+</div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+ <p>Quick answer: there are no superfluous indices.</p>
+ <p>This question arises most often when the GEP instruction is applied to a
+ global variable which is always a pointer type. For example, consider
+ this:</p><pre>
+ %MyStruct = uninitialized global { float*, i32 }
+ ...
+ %idx = getelementptr { float*, i32 }* %MyStruct, i64 0, i32 1</pre>
+ <p>The GEP above yields an <tt>i32*</tt> by indexing the <tt>i32</tt> typed
+ field of the structure <tt>%MyStruct</tt>. When people first look at it, they
+ wonder why the <tt>i64 0</tt> index is needed. However, a closer inspection
+ of how globals and GEPs work reveals the need. Becoming aware of the following
+ facts will dispell the confusion:</p>
+ <ol>
+ <li>The type of <tt>%MyStruct</tt> is <i>not</i> <tt>{ float*, i32 }</tt>
+ but rather <tt>{ float*, i32 }*</tt>. That is, <tt>%MyStruct</tt> is a
+ pointer to a structure containing a pointer to a <tt>float</tt> and an
+ <tt>i32</tt>.</li>
+ <li>Point #1 is evidenced by noticing the type of the first operand of
+ the GEP instruction (<tt>%MyStruct</tt>) which is
+ <tt>{ float*, i32 }*</tt>.</li>
+ <li>The first index, <tt>i64 0</tt> is required to step over the global
+ variable <tt>%MyStruct</tt>. Since the first argument to the GEP
+ instruction must always be a value of pointer type, the first index
+ steps through that pointer. A value of 0 means 0 elements offset from that
+ pointer.</li>
+ <li>The second index, <tt>i32 1</tt> selects the second field of the
+ structure (the <tt>i32</tt>). </li>
+ </ol>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+ <a name="deref"><b>What is dereferenced by GEP?</b></a>
+</div>
+<div class="doc_text">
+ <p>Quick answer: nothing.</p>
+ <p>The GetElementPtr instruction dereferences nothing. That is, it doesn't
+ access memory in any way. That's what the Load and Store instructions are for.
+ GEP is only involved in the computation of addresses. For example, consider
+ this:</p>
+ <pre>
+ %MyVar = uninitialized global { [40 x i32 ]* }
+ ...
+ %idx = getelementptr { [40 x i32]* }* %MyVar, i64 0, i32 0, i64 0, i64 17</pre>
+ <p>In this example, we have a global variable, <tt>%MyVar</tt> that is a
+ pointer to a structure containing a pointer to an array of 40 ints. The
+ GEP instruction seems to be accessing the 18th integer of the structure's
+ array of ints. However, this is actually an illegal GEP instruction. It
+ won't compile. The reason is that the pointer in the structure <i>must</i>
+ be dereferenced in order to index into the array of 40 ints. Since the
+ GEP instruction never accesses memory, it is illegal.</p>
+ <p>In order to access the 18th integer in the array, you would need to do the
+ following:</p>
+ <pre>
+ %idx = getelementptr { [40 x i32]* }* %, i64 0, i32 0
+ %arr = load [40 x i32]** %idx
+ %idx = getelementptr [40 x i32]* %arr, i64 0, i64 17</pre>
+ <p>In this case, we have to load the pointer in the structure with a load
+ instruction before we can index into the array. If the example was changed
+ to:</p>
+ <pre>
+ %MyVar = uninitialized global { [40 x i32 ] }
+ ...
+ %idx = getelementptr { [40 x i32] }*, i64 0, i32 0, i64 17</pre>
+ <p>then everything works fine. In this case, the structure does not contain a
+ pointer and the GEP instruction can index through the global variable,
+ into the first field of the structure and access the 18th <tt>i32</tt> in the
+ array there.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+ <a name="lead0"><b>Why don't GEP x,0,0,1 and GEP x,1 alias?</b></a>
+</div>
+<div class="doc_text">
+ <p>Quick Answer: They compute different address locations.</p>
+ <p>If you look at the first indices in these GEP
+ instructions you find that they are different (0 and 1), therefore the address
+ computation diverges with that index. Consider this example:</p>
+ <pre>
+ %MyVar = global { [10 x i32 ] }
+ %idx1 = getlementptr { [10 x i32 ] }* %MyVar, i64 0, i32 0, i64 1
+ %idx2 = getlementptr { [10 x i32 ] }* %MyVar, i64 1</pre>
+ <p>In this example, <tt>idx1</tt> computes the address of the second integer
+ in the array that is in the structure in %MyVar, that is <tt>MyVar+4</tt>. The
+ type of <tt>idx1</tt> is <tt>i32*</tt>. However, <tt>idx2</tt> computes the
+ address of <i>the next</i> structure after <tt>%MyVar</tt>. The type of
+ <tt>idx2</tt> is <tt>{ [10 x i32] }*</tt> and its value is equivalent
+ to <tt>MyVar + 40</tt> because it indexes past the ten 4-byte integers
+ in <tt>MyVar</tt>. Obviously, in such a situation, the pointers don't
+ alias.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+ <a name="trail0"><b>Why do GEP x,1,0,0 and GEP x,1 alias?</b></a>
+</div>
+<div class="doc_text">
+ <p>Quick Answer: They compute the same address location.</p>
+ <p>These two GEP instructions will compute the same address because indexing
+ through the 0th element does not change the address. However, it does change
+ the type. Consider this example:</p>
+ <pre>
+ %MyVar = global { [10 x i32 ] }
+ %idx1 = getlementptr { [10 x i32 ] }* %MyVar, i64 1, i32 0, i64 0
+ %idx2 = getlementptr { [10 x i32 ] }* %MyVar, i64 1</pre>
+ <p>In this example, the value of <tt>%idx1</tt> is <tt>%MyVar+40</tt> and
+ its type is <tt>i32*</tt>. The value of <tt>%idx2</tt> is also
+ <tt>MyVar+40</tt> but its type is <tt>{ [10 x i32] }*</tt>.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="summary"><b>Summary</b></a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+ <p>In summary, here's some things to always remember about the GetElementPtr
+ instruction:</p>
+ <ol>
+ <li>The GEP instruction never accesses memory, it only provides pointer
+ computations.</li>
+ <li>The first operand to the GEP instruction is always a pointer and it must
+ be indexed.</li>
+ <li>There are no superfluous indices for the GEP instruction.</li>
+ <li>Trailing zero indices are superfluous for pointer aliasing, but not for
+ the types of the pointers.</li>
+ <li>Leading zero indices are not superfluous for pointer aliasing nor the
+ types of the pointers.</li>
+ </ol>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br/>
+ Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html
new file mode 100644
index 0000000..079c819
--- /dev/null
+++ b/docs/GettingStarted.html
@@ -0,0 +1,1640 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <title>Getting Started with LLVM System</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ Getting Started with the LLVM System
+</div>
+
+<ul>
+ <li><a href="#overview">Overview</a>
+ <li><a href="#quickstart">Getting Started Quickly (A Summary)</a>
+ <li><a href="#requirements">Requirements</a>
+ <ol>
+ <li><a href="#hardware">Hardware</a>
+ <li><a href="#software">Software</a>
+ <li><a href="#brokengcc">Broken versions of GCC and other tools</a>
+ </ol></li>
+
+ <li><a href="#starting">Getting Started with LLVM</a>
+ <ol>
+ <li><a href="#terminology">Terminology and Notation</a>
+ <li><a href="#environment">Setting Up Your Environment</a>
+ <li><a href="#unpack">Unpacking the LLVM Archives</a>
+ <li><a href="#checkout">Checkout LLVM from Subversion</a>
+ <li><a href="#installcf">Install the GCC Front End</a>
+ <li><a href="#config">Local LLVM Configuration</a>
+ <li><a href="#compile">Compiling the LLVM Suite Source Code</a>
+ <li><a href="#cross-compile">Cross-Compiling LLVM</a>
+ <li><a href="#objfiles">The Location of LLVM Object Files</a>
+ <li><a href="#optionalconfig">Optional Configuration Items</a>
+ </ol></li>
+
+ <li><a href="#layout">Program layout</a>
+ <ol>
+ <li><a href="#examples"><tt>llvm/examples</tt></a>
+ <li><a href="#include"><tt>llvm/include</tt></a>
+ <li><a href="#lib"><tt>llvm/lib</tt></a>
+ <li><a href="#projects"><tt>llvm/projects</tt></a>
+ <li><a href="#runtime"><tt>llvm/runtime</tt></a>
+ <li><a href="#test"><tt>llvm/test</tt></a>
+ <li><a href="#llvmtest"><tt>llvm-test</tt></a>
+ <li><a href="#tools"><tt>llvm/tools</tt></a>
+ <li><a href="#utils"><tt>llvm/utils</tt></a>
+ <li><a href="#win32"><tt>llvm/win32</tt></a>
+ </ol></li>
+
+ <li><a href="#tutorial">An Example Using the LLVM Tool Chain</a>
+ <ol>
+ <li><a href="#tutorial4">Example with llvm-gcc4</a></li>
+ </ol>
+ <li><a href="#problems">Common Problems</a>
+ <li><a href="#links">Links</a>
+</ul>
+
+<div class="doc_author">
+ <p>Written by:
+ <a href="mailto:criswell@uiuc.edu">John Criswell</a>,
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a>,
+ <a href="http://misha.brukman.net">Misha Brukman</a>,
+ <a href="http://www.cs.uiuc.edu/~vadve">Vikram Adve</a>, and
+ <a href="mailto:gshi1@uiuc.edu">Guochun Shi</a>.
+ </p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="overview"><b>Overview</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to LLVM! In order to get started, you first need to know some
+basic information.</p>
+
+<p>First, LLVM comes in two pieces. The first piece is the LLVM suite. This
+contains all of the tools, libraries, and header files needed to use the low
+level virtual machine. It contains an assembler, disassembler, bitcode
+analyzer and bitcode optimizer. It also contains a test suite that can be
+used to test the LLVM tools and the GCC front end.</p>
+
+<p>The second piece is the GCC front end. This component provides a version of
+GCC that compiles C and C++ code into LLVM bitcode. Currently, the GCC front
+end uses the GCC parser to convert code to LLVM. Once
+compiled into LLVM bitcode, a program can be manipulated with the LLVM tools
+from the LLVM suite.</p>
+
+<p>
+There is a third, optional piece called llvm-test. It is a suite of programs
+with a testing harness that can be used to further test LLVM's functionality
+and performance.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="quickstart"><b>Getting Started Quickly (A Summary)</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Here's the short story for getting up and running quickly with LLVM:</p>
+
+<ol>
+ <li>Read the documentation.</li>
+ <li>Read the documentation.</li>
+ <li>Remember that you were warned twice about reading the documentation.</li>
+ <li>Install the GCC front end if you intend to compile C or C++:
+ <ol>
+ <li><tt>cd <i>where-you-want-the-C-front-end-to-live</i></tt></li>
+ <li><tt>gunzip --stdout llvm-gcc.<i>platform</i>.tar.gz | tar -xvf -</tt>
+ </li>
+ <li><tt>cd llvm-gcc3.4/<i>platform</i> (llvm-gcc3.4 only)<br>
+ ./fixheaders</tt></li>
+ <li>Add llvm-gcc's "bin" directory to your PATH variable.</li>
+ </ol></li>
+
+ <li>Get the LLVM Source Code
+ <ul>
+ <li>With the distributed files (or use <a href="#checkout">SVN</a>):
+ <ol>
+ <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+ <li><tt>gunzip --stdout llvm-<i>version</i>.tar.gz | tar -xvf -</tt>
+ </ol></li>
+
+ </ul></li>
+
+ <li><b>[Optional]</b> Get the Test Suite Source Code
+ <ul>
+ <li>With the distributed files (or use <a href="#checkout">SVN</a>):
+ <ol>
+ <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+ <li><tt>cd llvm/projects</tt>
+ <li><tt>gunzip --stdout llvm-test-<i>version</i>.tar.gz | tar -xvf -</tt>
+ </ol></li>
+
+ </ul></li>
+
+
+ <li>Configure the LLVM Build Environment
+ <ol>
+ <li><tt>cd <i>where-you-want-to-build-llvm</i></tt></li>
+ <li><tt><i>/path/to/llvm/</i>configure [options]</tt><br>
+ Some common options:
+
+ <ul>
+ <li><tt>--prefix=<i>directory</i></tt>
+ <p>Specify for <i>directory</i> the full pathname of where you
+ want the LLVM tools and libraries to be installed (default
+ <tt>/usr/local</tt>).</p></li>
+ <li><tt>--with-llvmgccdir=<i>directory</i></tt>
+ <p>Optionally, specify for <i>directory</i> the full pathname of the
+ C/C++ front end installation to use with this LLVM configuration. If
+ not specified, the PATH will be searched.</p></li>
+ <li><tt>--enable-spec2000=<i>directory</i></tt>
+ <p>Enable the SPEC2000 benchmarks for testing. The SPEC2000
+ benchmarks should be available in
+ <tt><i>directory</i></tt>.</p></li>
+ </ul>
+ </ol></li>
+
+ <li>Build the LLVM Suite:
+ <ol>
+ <li><tt>gmake -k |&amp; tee gnumake.out
+ &nbsp;&nbsp;&nbsp;# this is csh or tcsh syntax</tt></li>
+ <li>If you get an "internal compiler error (ICE)" or test failures, see
+ <a href="#brokengcc">below</a>.</li>
+ </ol>
+
+</ol>
+
+<p>Consult the <a href="#starting">Getting Started with LLVM</a> section for
+detailed information on configuring and compiling LLVM. See <a
+href="#environment">Setting Up Your Environment</a> for tips that simplify
+working with the GCC front end and LLVM tools. Go to <a href="#layout">Program
+Layout</a> to learn about the layout of the source code tree.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="requirements"><b>Requirements</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Before you begin to use the LLVM system, review the requirements given below.
+This may save you some trouble by knowing ahead of time what hardware and
+software you will need.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="hardware"><b>Hardware</b></a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM is known to work on the following platforms:</p>
+
+<table cellpadding="3" summary="Known LLVM platforms">
+<tr>
+ <th>OS</th>
+ <th>Arch</th>
+ <th>Compilers</th>
+</tr>
+<tr>
+ <td>Linux</td>
+ <td>x86<sup><a href="#pf_1">1</a></sup></td>
+ <td>GCC</td>
+</tr>
+<tr>
+ <td>Solaris</td>
+ <td>V9 (Ultrasparc)</td>
+ <td>GCC</td>
+</tr>
+<tr>
+ <td>FreeBSD</td>
+ <td>x86<sup><a href="#pf_1">1</a></sup></td>
+ <td>GCC</td>
+</tr>
+<tr>
+ <td>MacOS X<sup><a href="#pf_2">2</a></sup></td>
+ <td>PowerPC</td>
+ <td>GCC</td>
+</tr>
+<tr>
+ <td>MacOS X<sup><a href="#pf_2">2</a></sup></td>
+ <td>x86</td>
+ <td>GCC</td>
+
+</tr>
+<tr>
+ <td>Cygwin/Win32</td>
+ <td>x86<sup><a href="#pf_1">1</a>,<a href="#pf_8">8</a></sup></td>
+ <td>GCC 3.4.X, binutils 2.15</td>
+</tr>
+<tr>
+ <td>MinGW/Win32</td>
+ <td>x86<sup><a href="#pf_1">1</a>,<a href="#pf_6">6</a>,<a href="#pf_8">8</a></sup></td>
+ <td>GCC 3.4.X, binutils 2.15</td>
+</tr>
+<tr>
+ <td>Linux</td>
+ <td>amd64<sup><a href="#pf_3">3</a></sup></td>
+ <td>GCC</td>
+</tr>
+</table>
+
+<p>LLVM has partial support for the following platforms:</p>
+
+<table summary="LLVM partial platform support">
+<tr>
+ <th>OS</th>
+ <th>Arch</th>
+ <th>Compilers</th>
+</tr>
+<tr>
+ <td>Windows</td>
+ <td>x86<sup><a href="#pf_1">1</a></sup></td>
+ <td>Visual Studio .NET<sup><a href="#pf_4">4</a>,<a href="#pf_5">5</a></sup></td>
+<tr>
+ <td>AIX<sup><a href="#pf_3">3</a>,<a href="#pf_4">4</a></sup></td>
+ <td>PowerPC</td>
+ <td>GCC</td>
+</tr>
+<tr>
+ <td>Linux<sup><a href="#pf_3">3</a>,<a href="#pf_5">5</a></sup></td>
+ <td>PowerPC</td>
+ <td>GCC</td>
+</tr>
+
+<tr>
+ <td>Linux<sup><a href="#pf_7">7</a></sup></td>
+ <td>Alpha</td>
+ <td>GCC</td>
+</tr>
+<tr>
+ <td>Linux<sup><a href="#pf_7">7</a></sup></td>
+ <td>Itanium (IA-64)</td>
+ <td>GCC</td>
+</tr>
+<tr>
+ <td>HP-UX<sup><a href="#pf_7">7</a></sup></td>
+ <td>Itanium (IA-64)</td>
+ <td>HP aCC</td>
+</tr>
+</table>
+
+<p><b>Notes:</b></p>
+
+<div class="doc_notes">
+<ol>
+<li><a name="pf_1">Code generation supported for Pentium processors and
+up</a></li>
+<li><a name="pf_2">Code generation supported for 32-bit ABI only</a></li>
+<li><a name="pf_3">No native code generation</a></li>
+<li><a name="pf_4">Build is not complete: one or more tools don't link</a></li>
+<li><a name="pf_5">The GCC-based C/C++ frontend does not build</a></li>
+<li><a name="pf_6">The port is done using the MSYS shell.</a>
+<a href="http://www.mingw.org/MinGWiki/">Download</a> and install
+bison (excl. M4.exe) and flex in that order. Build binutils-2.15 from source,
+if necessary. Bison &amp; flex can be also grabbed from GNUWin32 sf.net
+project.</li>
+<li><a name="pf_7">Native code generation exists but is not complete.</a></li>
+<li><a name="pf_8">Binutils</a> up to post-2.17 has bug in bfd/cofflink.c
+ preventing LLVM from building correctly. Several workarounds have been
+ introduced into LLVM build system, but the bug can occur anytime in the
+ future. We highly recommend that you rebuild your current binutils with the
+ patch from <a href="http://sourceware.org/bugzilla/show_bug.cgi?id=2659">
+ Binutils bugzilla</a>, if it wasn't already applied.</li>
+</ol>
+</div>
+
+<p>Note that you will need about 1-3 GB of space for a full LLVM build in Debug
+mode, depending on the system (it is so large because of all the debugging
+information and the fact that the libraries are statically linked into multiple
+tools). If you do not need many of the tools and you are space-conscious,
+you can disable them individually in <tt>llvm/tools/Makefile</tt>. The Release
+build requires considerably less space.</p>
+
+<p>The LLVM suite <i>may</i> compile on other platforms, but it is not
+guaranteed to do so. If compilation is successful, the LLVM utilities should be
+able to assemble, disassemble, analyze, and optimize LLVM bitcode. Code
+generation should work as well, although the generated native code may not work
+on your platform.</p>
+
+<p>The GCC front end is not very portable at the moment. If you want to get it
+to work on another platform, you can download a copy of the source and <a
+href="CFEBuildInstrs.html">try to compile it</a> on your platform.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="software"><b>Software</b></a></div>
+<div class="doc_text">
+ <p>Compiling LLVM requires that you have several software packages
+ installed. The table below lists those required packages. The Package column
+ is the usual name for the software package that LLVM depends on. The Version
+ column provides "known to work" versions of the package. The Notes column
+ describes how LLVM uses the package and provides other details.</p>
+ <table summary="Packages required to compile LLVM">
+ <tr><th>Package</th><th>Version</th><th>Notes</th></tr>
+
+ <tr>
+ <td><a href="http://savannah.gnu.org/projects/make">GNU Make</a></td>
+ <td>3.79, 3.79.1</td>
+ <td>Makefile/build processor</td>
+ </tr>
+
+ <tr>
+ <td><a href="http://gcc.gnu.org">GCC</a></td>
+ <td>3.4.2</td>
+ <td>C/C++ compiler<sup><a href="#sf1">1</a></sup></td>
+ </tr>
+
+ <tr>
+ <td><a href="http://www.gnu.org/software/texinfo">TeXinfo</a></td>
+ <td>4.5</td>
+ <td>For building the CFE</td>
+ </tr>
+
+ <tr>
+ <td><a href="http://www.gnu.org/software/flex">Flex</a></td>
+ <td>2.5.4</td>
+ <td>LEX compiler</td>
+ </tr>
+
+ <tr>
+ <td><a href="http://www.gnu.org/software/bison/bison.html">Bison</a></td>
+ <td>1.28, 1.35, 1.75, 1.875d, 2.0, or 2.1<br>(not 1.85 or 1.875)</td>
+ <td>YACC compiler</td>
+ </tr>
+
+ <tr>
+ <td><a href="http://subversion.tigris.org/project_packages.html">SVN</a></td>
+ <td>&ge;1.3</td>
+ <td>Subversion access to LLVM<sup><a href="#sf2">2</a></sup></td>
+ </tr>
+
+ <tr>
+ <td><a href="http://savannah.gnu.org/projects/dejagnu">DejaGnu</a></td>
+ <td>1.4.2</td>
+ <td>Automated test suite<sup><a href="#sf3">3</a></sup></td>
+ </tr>
+
+ <tr>
+ <td><a href="http://www.tcl.tk/software/tcltk/">tcl</a></td>
+ <td>8.3, 8.4</td>
+ <td>Automated test suite<sup><a href="#sf3">3</a></sup></td>
+ </tr>
+
+ <tr>
+ <td><a href="http://expect.nist.gov/">expect</a></td>
+ <td>5.38.0</td>
+ <td>Automated test suite<sup><a href="#sf3">3</a></sup></td>
+ </tr>
+
+ <tr>
+ <td><a href="http://www.perl.com/download.csp">perl</a></td>
+ <td>&ge;5.6.0</td>
+ <td>Nightly tester, utilities</td>
+ </tr>
+
+ <tr>
+ <td><a href="http://savannah.gnu.org/projects/m4">GNU M4</a>
+ <td>1.4</td>
+ <td>Macro processor for configuration<sup><a href="#sf4">4</a></sup></td>
+ </tr>
+
+ <tr>
+ <td><a href="http://www.gnu.org/software/autoconf">GNU Autoconf</a></td>
+ <td>2.59</td>
+ <td>Configuration script builder<sup><a href="#sf4">4</a></sup></td>
+ </tr>
+
+ <tr>
+ <td><a href="http://www.gnu.org/software/automake">GNU Automake</a></td>
+ <td>1.9.2</td>
+ <td>aclocal macro generator<sup><a href="#sf4">4</a></sup></td>
+ </tr>
+
+ <tr>
+ <td><a href="http://savannah.gnu.org/projects/libtool">libtool</a></td>
+ <td>1.5.10</td>
+ <td>Shared library manager<sup><a href="#sf4">4</a></sup></td>
+ </tr>
+
+ </table>
+
+ <p><b>Notes:</b></p>
+ <div class="doc_notes">
+ <ol>
+ <li><a name="sf1">Only the C and C++ languages are needed so there's no
+ need to build the other languages for LLVM's purposes.</a> See
+ <a href="#brokengcc">below</a> for specific version info.</li>
+ <li><a name="sf2">You only need Subversion if you intend to build from the
+ latest LLVM sources. If you're working from a release distribution, you
+ don't need Subversion.</a></li>
+ <li><a name="sf3">Only needed if you want to run the automated test
+ suite in the <tt>llvm/test</tt> directory.</a></li>
+ <li><a name="sf4">If you want to make changes to the configure scripts,
+ you will need GNU autoconf (2.59), and consequently, GNU M4 (version 1.4
+ or higher). You will also need automake (1.9.2). We only use aclocal
+ from that package.</a></li>
+ </ol>
+ </div>
+
+ <p>Additionally, your compilation host is expected to have the usual
+ plethora of Unix utilities. Specifically:</p>
+ <ul>
+ <li><b>ar</b> - archive library builder</li>
+ <li><b>bzip2*</b> - bzip2 command for distribution generation</li>
+ <li><b>bunzip2*</b> - bunzip2 command for distribution checking</li>
+ <li><b>chmod</b> - change permissions on a file</li>
+ <li><b>cat</b> - output concatenation utility</li>
+ <li><b>cp</b> - copy files</li>
+ <li><b>date</b> - print the current date/time </li>
+ <li><b>echo</b> - print to standard output</li>
+ <li><b>egrep</b> - extended regular expression search utility</li>
+ <li><b>etags</b> - C/C++ tag file creator for vim/emacs</li>
+ <li><b>find</b> - find files/dirs in a file system</li>
+ <li><b>grep</b> - regular expression search utility</li>
+ <li><b>gzip*</b> - gzip command for distribution generation</li>
+ <li><b>gunzip*</b> - gunzip command for distribution checking</li>
+ <li><b>install</b> - install directories/files </li>
+ <li><b>mkdir</b> - create a directory</li>
+ <li><b>mv</b> - move (rename) files</li>
+ <li><b>ranlib</b> - symbol table builder for archive libraries</li>
+ <li><b>rm</b> - remove (delete) files and directories</li>
+ <li><b>sed</b> - stream editor for transforming output</li>
+ <li><b>sh</b> - Bourne shell for make build scripts</li>
+ <li><b>tar</b> - tape archive for distribution generation</li>
+ <li><b>test</b> - test things in file system</li>
+ <li><b>unzip*</b> - unzip command for distribution checking</li>
+ <li><b>zip*</b> - zip command for distribution generation</li>
+ </ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="brokengcc">Broken versions of GCC and other tools</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM is very demanding of the host C++ compiler, and as such tends to expose
+bugs in the compiler. In particular, several versions of GCC crash when trying
+to compile LLVM. We routinely use GCC 3.3.3, 3.4.0, and Apple 4.0.1
+successfully with them (however, see important notes below). Other versions
+of GCC will probably work as well. GCC versions listed
+here are known to not work. If you are using one of these versions, please try
+to upgrade your GCC to something more recent. If you run into a problem with a
+version of GCC not listed here, please <a href="mailto:llvmdev@cs.uiuc.edu">let
+us know</a>. Please use the "<tt>gcc -v</tt>" command to find out which version
+of GCC you are using.
+</p>
+
+<p><b>GCC versions prior to 3.0</b>: GCC 2.96.x and before had several
+problems in the STL that effectively prevent it from compiling LLVM.
+</p>
+
+<p><b>GCC 3.2.2</b>: This version of GCC fails to compile LLVM.</p>
+
+<p><b>GCC 3.3.2</b>: This version of GCC suffered from a <a
+href="http://gcc.gnu.org/PR13392">serious bug</a> which causes it to crash in
+the "<tt>convert_from_eh_region_ranges_1</tt>" GCC function.</p>
+
+<p><b>Cygwin GCC 3.3.3</b>: The version of GCC 3.3.3 commonly shipped with
+ Cygwin does not work. Please <a href="CFEBuildInstrs.html#cygwin">upgrade
+ to a newer version</a> if possible.</p>
+<p><b>SuSE GCC 3.3.3</b>: The version of GCC 3.3.3 shipped with SuSE 9.1 (and
+ possibly others) does not compile LLVM correctly (it appears that exception
+ handling is broken in some cases). Please download the FSF 3.3.3 or upgrade
+ to a newer version of GCC.</p>
+<p><b>GCC 3.4.0 on linux/x86 (32-bit)</b>: GCC miscompiles portions of the
+ code generator, causing an infinite loop in the llvm-gcc build when built
+ with optimizations enabled (i.e. a release build).</p>
+<p><b>GCC 3.4.2 on linux/x86 (32-bit)</b>: GCC miscompiles portions of the
+ code generator at -O3, as with 3.4.0. However gcc 3.4.2 (unlike 3.4.0)
+ correctly compiles LLVM at -O2. A work around is to build release LLVM
+ builds with "make ENABLE_OPTIMIZED=1 OPTIMIZE_OPTION=-O2 ..."</p>
+<p><b>GCC 3.4.x on X86-64/amd64</b>: GCC <a href="http://llvm.org/PR1056">
+ miscompiles portions of LLVM</a>.</p>
+<p><b>IA-64 GCC 4.0.0</b>: The IA-64 version of GCC 4.0.0 is known to
+ miscompile LLVM.</p>
+<p><b>Apple Xcode 2.3</b>: GCC crashes when compiling LLVM at -O3 (which is the
+ default with ENABLE_OPTIMIZED=1. To work around this, build with
+ "ENABLE_OPTIMIZED=1 OPTIMIZE_OPTION=-O2".</p>
+<p><b>GCC 4.1.1</b>: GCC fails to build LLVM with template concept check errors
+ compiling some files. At the time of this writing, GCC mainline (4.2)
+ did not share the problem.</p>
+<p><b>GCC 4.1.1 on X86-64/amd64</b>: GCC <a href="http://llvm.org/PR1063">
+ miscompiles portions of LLVM</a> when compiling llvm itself into 64-bit
+ code. LLVM will appear to mostly work but will be buggy, e.g. failing
+ portions of its testsuite.</p>
+<p><b>GCC 4.1.2 on OpenSUSE</b>: Seg faults during libstdc++ build and on x86_64
+platforms compiling md5.c gets a mangled constant.</p>
+<p><b>GNU ld 2.16.X</b>. Some 2.16.X versions of the ld linker will produce very
+long warning messages complaining that some ".gnu.linkonce.t.*" symbol was
+defined in a discarded section. You can safely ignore these messages as they are
+erroneous and the linkage is correct. These messages disappear using ld
+2.17.</p>
+
+<p><b>GNU binutils 2.17</b>: Binutils 2.17 contains <a
+href="http://sourceware.org/bugzilla/show_bug.cgi?id=3111">a bug</a> which
+causes huge link times (minutes instead of seconds) when building LLVM. We
+recommend upgrading to a newer version (2.17.50.0.4 or later).</p>
+
+</div>
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="starting"><b>Getting Started with LLVM</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The remainder of this guide is meant to get you up and running with
+LLVM and to give you some basic information about the LLVM environment.</p>
+
+<p>The later sections of this guide describe the <a
+href="#layout">general layout</a> of the the LLVM source tree, a <a
+href="#tutorial">simple example</a> using the LLVM tool chain, and <a
+href="#links">links</a> to find more information about LLVM or to get
+help via e-mail.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="terminology">Terminology and Notation</a>
+</div>
+
+<div class="doc_text">
+
+<p>Throughout this manual, the following names are used to denote paths
+specific to the local system and working environment. <i>These are not
+environment variables you need to set but just strings used in the rest
+of this document below</i>. In any of the examples below, simply replace
+each of these names with the appropriate pathname on your local system.
+All these paths are absolute:</p>
+
+<dl>
+ <dt>SRC_ROOT
+ <dd>
+ This is the top level directory of the LLVM source tree.
+ <br><br>
+
+ <dt>OBJ_ROOT
+ <dd>
+ This is the top level directory of the LLVM object tree (i.e. the
+ tree where object files and compiled programs will be placed. It
+ can be the same as SRC_ROOT).
+ <br><br>
+
+ <dt>LLVMGCCDIR
+ <dd>
+ This is where the LLVM GCC Front End is installed.
+ <p>
+ For the pre-built GCC front end binaries, the LLVMGCCDIR is
+ <tt>llvm-gcc/<i>platform</i>/llvm-gcc</tt>.
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="environment">Setting Up Your Environment</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+In order to compile and use LLVM, you may need to set some environment
+variables.
+
+<dl>
+ <dt><tt>LLVM_LIB_SEARCH_PATH</tt>=<tt>/path/to/your/bitcode/libs</tt></dt>
+ <dd>[Optional] This environment variable helps LLVM linking tools find the
+ locations of your bitcode libraries. It is provided only as a
+ convenience since you can specify the paths using the -L options of the
+ tools and the C/C++ front-end will automatically use the bitcode files
+ installed in its
+ <tt>lib</tt> directory.</dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="unpack">Unpacking the LLVM Archives</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+If you have the LLVM distribution, you will need to unpack it before you
+can begin to compile it. LLVM is distributed as a set of two files: the LLVM
+suite and the LLVM GCC front end compiled for your platform. There is an
+additional test suite that is optional. Each file is a TAR archive that is
+compressed with the gzip program.
+</p>
+
+<p>The files are as follows, with <em>x.y</em> marking the version number:
+<dl>
+ <dt><tt>llvm-x.y.tar.gz</tt></dt>
+ <dd>Source release for the LLVM libraries and tools.<br/></dd>
+
+ <dt><tt>llvm-test-x.y.tar.gz</tt></dt>
+ <dd>Source release for the LLVM test suite.</dd>
+
+ <dt><tt>llvm-gcc4-x.y.source.tar.gz</tt></dt>
+ <dd>Source release of the llvm-gcc4 front end. See README.LLVM in the root
+ directory for build instructions.<br/></dd>
+
+ <dt><tt>llvm-gcc4-x.y-platform.tar.gz</tt></dt>
+ <dd>Binary release of the llvm-gcc4 front end for a specific platform.<br/></dd>
+
+</dl>
+
+<p>It is also possible to download the sources of the llvm-gcc4 front end from a
+read-only subversion mirror at
+svn://anonsvn.opensource.apple.com/svn/llvm/trunk. </p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="checkout">Checkout LLVM from Subversion</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you have access to our Subversion repository, you can get a fresh copy of
+the entire source code. All you need to do is check it out from Subvresion as
+follows:</p>
+
+<ul>
+ <li><tt>cd <i>where-you-want-llvm-to-live</i></tt></li>
+ <li>Read-Only: <tt>svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm</tt></li>
+ <li>Read-Write:<tt>svn co https://user@llvm.org/svn/llvm-project/llvm/trunk
+ llvm</tt></li>
+</ul>
+
+
+<p>This will create an '<tt>llvm</tt>' directory in the current
+directory and fully populate it with the LLVM source code, Makefiles,
+test directories, and local copies of documentation files.</p>
+
+<p>If you want to get a specific release (as opposed to the most recent
+revision), you can checkout it from the '<tt>tags</tt>' directory (instead of
+'<tt>trunk</tt>'). The following releases are located in the following
+ subdirectories of the '<tt>tags</tt>' directory:</p>
+
+<ul>
+<li>Release 2.0: <b>RELEASE_20</b></li>
+<li>Release 1.9: <b>RELEASE_19</b></li>
+<li>Release 1.8: <b>RELEASE_18</b></li>
+<li>Release 1.7: <b>RELEASE_17</b></li>
+<li>Release 1.6: <b>RELEASE_16</b></li>
+<li>Release 1.5: <b>RELEASE_15</b></li>
+<li>Release 1.4: <b>RELEASE_14</b></li>
+<li>Release 1.3: <b>RELEASE_13</b></li>
+<li>Release 1.2: <b>RELEASE_12</b></li>
+<li>Release 1.1: <b>RELEASE_11</b></li>
+<li>Release 1.0: <b>RELEASE_1</b></li>
+</ul>
+
+<p>If you would like to get the LLVM test suite (a separate package as of 1.4),
+you get it from the Subversion repository:</p>
+
+<div class="doc_code">
+<pre>
+% cd llvm/projects
+% svn co http://llvm.org/svn/llvm-project/test-suite/trunk llvm-test
+</pre>
+</div>
+
+<p>By placing it in the <tt>llvm/projects</tt>, it will be automatically
+configured by the LLVM configure script as well as automatically updated when
+you run <tt>svn update</tt>.</p>
+
+<p>If you would like to get the GCC front end source code, you can also get it
+and build it yourself. Please follow <a href="CFEBuildInstrs.html">these
+instructions</a> to successfully get and build the LLVM GCC front-end.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="installcf">Install the GCC Front End</a>
+</div>
+
+<div class="doc_text">
+
+<p>Before configuring and compiling the LLVM suite, you can optionally extract the
+LLVM GCC front end from the binary distribution. It is used for running the
+llvm-test testsuite and for compiling C/C++ programs. Note that you can optionally
+<a href="CFEBuildInstrs.html">build llvm-gcc yourself</a> after building the
+main LLVM repository.</p>
+
+<p>To install the GCC front end, do the following:</p>
+
+<ol>
+ <li><tt>cd <i>where-you-want-the-front-end-to-live</i></tt></li>
+ <li><tt>gunzip --stdout llvmgcc-<i>version</i>.<i>platform</i>.tar.gz | tar -xvf
+ -</tt></li>
+</ol>
+
+<p>Once the binary is uncompressed, you should add a symlink for llvm-gcc and
+llvm-g++ to some directory in your path. When you configure LLVM, it will
+automatically detect llvm-gcc's presence (if it is in your path) enabling its
+use in llvm-test. Note that you can always build or install llvm-gcc at any
+pointer after building the main LLVM repository: just reconfigure llvm and
+llvm-test will pick it up.
+</p>
+
+<p>The binary versions of the GCC front end may not suit all of your needs. For
+example, the binary distribution may include an old version of a system header
+file, not "fix" a header file that needs to be fixed for GCC, or it may be
+linked with libraries not available on your system.</p>
+
+<p>In cases like these, you may want to try <a
+href="CFEBuildInstrs.html">building the GCC front end from source.</a> This is
+much easier now than it was in the past.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="config">Local LLVM Configuration</a>
+</div>
+
+<div class="doc_text">
+
+ <p>Once checked out from the Subversion repository, the LLVM suite source
+ code must be
+configured via the <tt>configure</tt> script. This script sets variables in the
+various <tt>*.in</tt> files, most notably <tt>llvm/Makefile.config</tt> and
+<tt>llvm/include/Config/config.h</tt>. It also populates <i>OBJ_ROOT</i> with
+the Makefiles needed to begin building LLVM.</p>
+
+<p>The following environment variables are used by the <tt>configure</tt>
+script to configure the build system:</p>
+
+<table summary="LLVM configure script environment variables">
+ <tr><th>Variable</th><th>Purpose</th></tr>
+ <tr>
+ <td>CC</td>
+ <td>Tells <tt>configure</tt> which C compiler to use. By default,
+ <tt>configure</tt> will look for the first GCC C compiler in
+ <tt>PATH</tt>. Use this variable to override
+ <tt>configure</tt>'s default behavior.</td>
+ </tr>
+ <tr>
+ <td>CXX</td>
+ <td>Tells <tt>configure</tt> which C++ compiler to use. By default,
+ <tt>configure</tt> will look for the first GCC C++ compiler in
+ <tt>PATH</tt>. Use this variable to override
+ <tt>configure</tt>'s default behavior.</td>
+ </tr>
+</table>
+
+<p>The following options can be used to set or enable LLVM specific options:</p>
+
+<dl>
+ <dt><i>--with-llvmgccdir</i></dt>
+ <dd>Path to the LLVM C/C++ FrontEnd to be used with this LLVM configuration.
+ The value of this option should specify the full pathname of the C/C++ Front
+ End to be used. If this option is not provided, the PATH will be searched for
+ a program named <i>llvm-gcc</i> and the C/C++ FrontEnd install directory will
+ be inferred from the path found. If the option is not given, and no llvm-gcc
+ can be found in the path then a warning will be produced by
+ <tt>configure</tt> indicating this situation. LLVM may still be built with
+ the <tt>tools-only</tt> target but attempting to build the runtime libraries
+ will fail as these libraries require llvm-gcc and llvm-g++. See
+ <a href="#installcf">Install the GCC Front End</a> for details on installing
+ the C/C++ Front End. See
+ <a href="CFEBuildInstrs.html">Bootstrapping the LLVM C/C++ Front-End</a>
+ for details on building the C/C++ Front End.</dd>
+ <dt><i>--with-tclinclude</i></dt>
+ <dd>Path to the tcl include directory under which <tt>tclsh</tt> can be
+ found. Use this if you have multiple tcl installations on your machine and you
+ want to use a specific one (8.x) for LLVM. LLVM only uses tcl for running the
+ dejagnu based test suite in <tt>llvm/test</tt>. If you don't specify this
+ option, the LLVM configure script will search for the tcl 8.4 and 8.3
+ releases.
+ <br><br>
+ </dd>
+ <dt><i>--enable-optimized</i></dt>
+ <dd>
+ Enables optimized compilation by default (debugging symbols are removed
+ and GCC optimization flags are enabled). The default is to use an
+ unoptimized build (also known as a debug build).
+ <br><br>
+ </dd>
+ <dt><i>--enable-debug-runtime</i></dt>
+ <dd>
+ Enables debug symbols in the runtime libraries. The default is to strip
+ debug symbols from the runtime libraries.
+ </dd>
+ <dt><i>--enable-jit</i></dt>
+ <dd>
+ Compile the Just In Time (JIT) compiler functionality. This is not
+ available
+ on all platforms. The default is dependent on platform, so it is best
+ to explicitly enable it if you want it.
+ <br><br>
+ </dd>
+ <dt><i>--enable-targets=</i><tt>target-option</tt></dt>
+ <dd>Controls which targets will be built and linked into llc. The default
+ value for <tt>target_options</tt> is "all" which builds and links all
+ available targets. The value "host-only" can be specified to build only a
+ native compiler (no cross-compiler targets available). The "native" target is
+ selected as the target of the build host. You can also specify a comma
+ separated list of target names that you want available in llc. The target
+ names use all lower case. The current set of targets is: <br/>
+ <tt>alpha, ia64, powerpc, skeleton, sparc, x86</tt>.
+ <br><br></dd>
+ <dt><i>--enable-doxygen</i></dt>
+ <dd>Look for the doxygen program and enable construction of doxygen based
+ documentation from the source code. This is disabled by default because
+ generating the documentation can take a long time and producess 100s of
+ megabytes of output.</dd>
+ <dt><i>--with-udis86</i></dt>
+ <dd>LLVM can use external disassembler library for various purposes (now it's
+ used only for examining code produced by JIT). This option will enable usage
+ of <a href="http://udis86.sourceforge.net/">udis86</a> x86 (both 32 and 64
+ bits) disassembler library.</dd>
+</dl>
+
+<p>To configure LLVM, follow these steps:</p>
+
+<ol>
+ <li><p>Change directory into the object root directory:</p>
+
+ <div class="doc_code"><pre>% cd <i>OBJ_ROOT</i></pre></div></li>
+
+ <li><p>Run the <tt>configure</tt> script located in the LLVM source
+ tree:</p>
+
+ <div class="doc_code">
+ <pre>% <i>SRC_ROOT</i>/configure --prefix=/install/path [other options]</pre>
+ </div></li>
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="compile">Compiling the LLVM Suite Source Code</a>
+</div>
+
+<div class="doc_text">
+
+<p>Once you have configured LLVM, you can build it. There are three types of
+builds:</p>
+
+<dl>
+ <dt>Debug Builds
+ <dd>
+ These builds are the default when one types <tt>gmake</tt> (unless the
+ <tt>--enable-optimized</tt> option was used during configuration). The
+ build system will compile the tools and libraries with debugging
+ information.
+ <br><br>
+
+ <dt>Release (Optimized) Builds
+ <dd>
+ These builds are enabled with the <tt>--enable-optimized</tt> option to
+ <tt>configure</tt> or by specifying <tt>ENABLE_OPTIMIZED=1</tt> on the
+ <tt>gmake</tt> command line. For these builds, the build system will
+ compile the tools and libraries with GCC optimizations enabled and strip
+ debugging information from the libraries and executables it generates.
+ <br><br>
+
+ <dt>Profile Builds
+ <dd>
+ These builds are for use with profiling. They compile profiling
+ information into the code for use with programs like <tt>gprof</tt>.
+ Profile builds must be started by specifying <tt>ENABLE_PROFILING=1</tt>
+ on the <tt>gmake</tt> command line.
+</dl>
+
+<p>Once you have LLVM configured, you can build it by entering the
+<i>OBJ_ROOT</i> directory and issuing the following command:</p>
+
+<div class="doc_code"><pre>% gmake</pre></div>
+
+<p>If the build fails, please <a href="#brokengcc">check here</a> to see if you
+are using a version of GCC that is known not to compile LLVM.</p>
+
+<p>
+If you have multiple processors in your machine, you may wish to use some of
+the parallel build options provided by GNU Make. For example, you could use the
+command:</p>
+
+<div class="doc_code"><pre>% gmake -j2</pre></div>
+
+<p>There are several special targets which are useful when working with the LLVM
+source code:</p>
+
+<dl>
+ <dt><tt>gmake clean</tt>
+ <dd>
+ Removes all files generated by the build. This includes object files,
+ generated C/C++ files, libraries, and executables.
+ <br><br>
+
+ <dt><tt>gmake dist-clean</tt>
+ <dd>
+ Removes everything that <tt>gmake clean</tt> does, but also removes files
+ generated by <tt>configure</tt>. It attempts to return the source tree to the
+ original state in which it was shipped.
+ <br><br>
+
+ <dt><tt>gmake install</tt>
+ <dd>
+ Installs LLVM header files, libraries, tools, and documentation in a
+ hierarchy
+ under $PREFIX, specified with <tt>./configure --prefix=[dir]</tt>, which
+ defaults to <tt>/usr/local</tt>.
+ <br><br>
+
+ <dt><tt>gmake -C runtime install-bytecode</tt>
+ <dd>
+ Assuming you built LLVM into $OBJDIR, when this command is run, it will
+ install bitcode libraries into the GCC front end's bitcode library
+ directory. If you need to update your bitcode libraries,
+ this is the target to use once you've built them.
+ <br><br>
+</dl>
+
+<p>Please see the <a href="MakefileGuide.html">Makefile Guide</a> for further
+details on these <tt>make</tt> targets and descriptions of other targets
+available.</p>
+
+<p>It is also possible to override default values from <tt>configure</tt> by
+declaring variables on the command line. The following are some examples:</p>
+
+<dl>
+ <dt><tt>gmake ENABLE_OPTIMIZED=1</tt>
+ <dd>
+ Perform a Release (Optimized) build.
+ <br><br>
+
+ <dt><tt>gmake ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1</tt>
+ <dd>
+ Perform a Release (Optimized) build without assertions enabled.
+ <br><br>
+
+ <dt><tt>gmake ENABLE_PROFILING=1</tt>
+ <dd>
+ Perform a Profiling build.
+ <br><br>
+
+ <dt><tt>gmake VERBOSE=1</tt>
+ <dd>
+ Print what <tt>gmake</tt> is doing on standard output.
+ <br><br>
+
+ <dt><tt>gmake TOOL_VERBOSE=1</tt></dt>
+ <dd>Ask each tool invoked by the makefiles to print out what it is doing on
+ the standard output. This also implies <tt>VERBOSE=1</tt>.
+ <br><br></dd>
+</dl>
+
+<p>Every directory in the LLVM object tree includes a <tt>Makefile</tt> to build
+it and any subdirectories that it contains. Entering any directory inside the
+LLVM object tree and typing <tt>gmake</tt> should rebuild anything in or below
+that directory that is out of date.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="cross-compile">Cross-Compiling LLVM</a>
+</div>
+
+<div class="doc_text">
+ <p>It is possible to cross-compile LLVM. That is, you can create LLVM
+ executables and libraries for a platform different than the one one which you
+ are compiling. To do this, a few additional steps are
+ required. <sup><a href="#ccn_1">1</a></sup> To cross-compile LLVM, use
+ these instructions:</p>
+ <ol>
+ <li>Configure and build LLVM as a native compiler. You will need
+ just <tt>TableGen</tt> from that build.
+ <ul>
+ <li>If you have <tt>$LLVM_OBJ_ROOT=$LLVM_SRC_ROOT</tt> just execute
+ <tt>make -C utils/TableGen</tt> after configuring.</li>
+ <li>Otherwise you will need to monitor building process and terminate
+ it just after <tt>TableGen</tt> was built.</li>
+ </ul>
+ </li>
+ <li>Copy the TableGen binary to somewhere safe (out of your build tree).
+ </li>
+ <li>Configure LLVM to build with a cross-compiler. To do this, supply the
+ configure script with <tt>--build</tt> and <tt>--host</tt> options that
+ are different. The values of these options must be legal target triples
+ that your GCC compiler supports.</li>
+ <li>Put the saved <tt>TableGen</tt> executable into the
+ into <tt>$LLVM_OBJ_ROOT/{BUILD_TYPE}/bin</tt> directory (e.g. into
+ <tt>.../Release/bin</tt> for a Release build).</li>
+ <li>Build LLVM as usual.</li>
+ </ol>
+ <p>The result of such a build will produce executables that are not executable
+ on your build host (--build option) but can be executed on your compile host
+ (--host option).</p>
+ <p><b>Notes:</b></p>
+ <div class="doc_notes">
+ <ol>
+ <li><a name="ccn_1">Cross-compiling</a> was tested only with Linux as
+ build platform and Windows as host using mingw32 cross-compiler. Other
+ combinations have not been tested.</li>
+ </ol>
+ </div>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="objfiles">The Location of LLVM Object Files</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM build system is capable of sharing a single LLVM source tree among
+several LLVM builds. Hence, it is possible to build LLVM for several different
+platforms or configurations using the same source tree.</p>
+
+<p>This is accomplished in the typical autoconf manner:</p>
+
+<ul>
+ <li><p>Change directory to where the LLVM object files should live:</p>
+
+ <div class="doc_code"><pre>% cd <i>OBJ_ROOT</i></pre></div></li>
+
+ <li><p>Run the <tt>configure</tt> script found in the LLVM source
+ directory:</p>
+
+ <div class="doc_code"><pre>% <i>SRC_ROOT</i>/configure</pre></div></li>
+</ul>
+
+<p>The LLVM build will place files underneath <i>OBJ_ROOT</i> in directories
+named after the build type:</p>
+
+<dl>
+ <dt>Debug Builds
+ <dd>
+ <dl>
+ <dt>Tools
+ <dd><tt><i>OBJ_ROOT</i>/Debug/bin</tt>
+ <dt>Libraries
+ <dd><tt><i>OBJ_ROOT</i>/Debug/lib</tt>
+ </dl>
+ <br><br>
+
+ <dt>Release Builds
+ <dd>
+ <dl>
+ <dt>Tools
+ <dd><tt><i>OBJ_ROOT</i>/Release/bin</tt>
+ <dt>Libraries
+ <dd><tt><i>OBJ_ROOT</i>/Release/lib</tt>
+ </dl>
+ <br><br>
+
+ <dt>Profile Builds
+ <dd>
+ <dl>
+ <dt>Tools
+ <dd><tt><i>OBJ_ROOT</i>/Profile/bin</tt>
+ <dt>Libraries
+ <dd><tt><i>OBJ_ROOT</i>/Profile/lib</tt>
+ </dl>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="optionalconfig">Optional Configuration Items</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+If you're running on a Linux system that supports the "<a
+href="http://www.tat.physik.uni-tuebingen.de/~rguenth/linux/binfmt_misc.html">binfmt_misc</a>"
+module, and you have root access on the system, you can set your system up to
+execute LLVM bitcode files directly. To do this, use commands like this (the
+first command may not be required if you are already using the module):</p>
+
+<div class="doc_code">
+<pre>
+$ mount -t binfmt_misc none /proc/sys/fs/binfmt_misc
+$ echo ':llvm:M::llvm::/path/to/lli:' &gt; /proc/sys/fs/binfmt_misc/register
+$ chmod u+x hello.bc (if needed)
+$ ./hello.bc
+</pre>
+</div>
+
+<p>
+This allows you to execute LLVM bitcode files directly. Thanks to Jack
+Cummings for pointing this out!
+</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="layout"><b>Program Layout</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>One useful source of information about the LLVM source base is the LLVM <a
+href="http://www.doxygen.org">doxygen</a> documentation available at <tt><a
+href="http://llvm.org/doxygen/">http://llvm.org/doxygen/</a></tt>.
+The following is a brief introduction to code layout:</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="examples"><tt>llvm/examples</tt></a></div>
+<div class="doc_text">
+ <p>This directory contains some simple examples of how to use the LLVM IR and
+ JIT.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="include"><tt>llvm/include</tt></a></div>
+<div class="doc_text">
+
+<p>This directory contains public header files exported from the LLVM
+library. The three main subdirectories of this directory are:</p>
+
+<dl>
+ <dt><tt><b>llvm/include/llvm</b></tt></dt>
+ <dd>This directory contains all of the LLVM specific header files. This
+ directory also has subdirectories for different portions of LLVM:
+ <tt>Analysis</tt>, <tt>CodeGen</tt>, <tt>Target</tt>, <tt>Transforms</tt>,
+ etc...</dd>
+
+ <dt><tt><b>llvm/include/llvm/Support</b></tt></dt>
+ <dd>This directory contains generic support libraries that are provided with
+ LLVM but not necessarily specific to LLVM. For example, some C++ STL utilities
+ and a Command Line option processing library store their header files here.
+ </dd>
+
+ <dt><tt><b>llvm/include/llvm/Config</b></tt></dt>
+ <dd>This directory contains header files configured by the <tt>configure</tt>
+ script. They wrap "standard" UNIX and C header files. Source code can
+ include these header files which automatically take care of the conditional
+ #includes that the <tt>configure</tt> script generates.</dd>
+</dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="lib"><tt>llvm/lib</tt></a></div>
+<div class="doc_text">
+
+<p>This directory contains most of the source files of the LLVM system. In LLVM,
+almost all code exists in libraries, making it very easy to share code among the
+different <a href="#tools">tools</a>.</p>
+
+<dl>
+ <dt><tt><b>llvm/lib/VMCore/</b></tt></dt>
+ <dd> This directory holds the core LLVM source files that implement core
+ classes like Instruction and BasicBlock.</dd>
+
+ <dt><tt><b>llvm/lib/AsmParser/</b></tt></dt>
+ <dd>This directory holds the source code for the LLVM assembly language parser
+ library.</dd>
+
+ <dt><tt><b>llvm/lib/BitCode/</b></tt></dt>
+ <dd>This directory holds code for reading and write LLVM bitcode.</dd>
+
+ <dt><tt><b>llvm/lib/Analysis/</b></tt><dd>This directory contains a variety of
+ different program analyses, such as Dominator Information, Call Graphs,
+ Induction Variables, Interval Identification, Natural Loop Identification,
+ etc.</dd>
+
+ <dt><tt><b>llvm/lib/Transforms/</b></tt></dt>
+ <dd> This directory contains the source code for the LLVM to LLVM program
+ transformations, such as Aggressive Dead Code Elimination, Sparse Conditional
+ Constant Propagation, Inlining, Loop Invariant Code Motion, Dead Global
+ Elimination, and many others.</dd>
+
+ <dt><tt><b>llvm/lib/Target/</b></tt></dt>
+ <dd> This directory contains files that describe various target architectures
+ for code generation. For example, the <tt>llvm/lib/Target/X86</tt>
+ directory holds the X86 machine description while
+ <tt>llvm/lib/Target/CBackend</tt> implements the LLVM-to-C converter.</dd>
+
+ <dt><tt><b>llvm/lib/CodeGen/</b></tt></dt>
+ <dd> This directory contains the major parts of the code generator: Instruction
+ Selector, Instruction Scheduling, and Register Allocation.</dd>
+
+ <dt><tt><b>llvm/lib/Debugger/</b></tt></dt>
+ <dd> This directory contains the source level debugger library that makes
+ it possible to instrument LLVM programs so that a debugger could identify
+ source code locations at which the program is executing.</dd>
+
+ <dt><tt><b>llvm/lib/ExecutionEngine/</b></tt></dt>
+ <dd> This directory contains libraries for executing LLVM bitcode directly
+ at runtime in both interpreted and JIT compiled fashions.</dd>
+
+ <dt><tt><b>llvm/lib/Support/</b></tt></dt>
+ <dd> This directory contains the source code that corresponds to the header
+ files located in <tt>llvm/include/Support/</tt>.</dd>
+
+ <dt><tt><b>llvm/lib/System/</b></tt></dt>
+ <dd>This directory contains the operating system abstraction layer that
+ shields LLVM from platform-specific coding.</dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="projects"><tt>llvm/projects</tt></a></div>
+<div class="doc_text">
+ <p>This directory contains projects that are not strictly part of LLVM but are
+ shipped with LLVM. This is also the directory where you should create your own
+ LLVM-based projects. See <tt>llvm/projects/sample</tt> for an example of how
+ to set up your own project. See <tt>llvm/projects/Stacker</tt> for a fully
+ functional example of a compiler front end.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="runtime"><tt>llvm/runtime</tt></a></div>
+<div class="doc_text">
+
+<p>This directory contains libraries which are compiled into LLVM bitcode and
+used when linking programs with the GCC front end. Most of these libraries are
+skeleton versions of real libraries; for example, libc is a stripped down
+version of glibc.</p>
+
+<p>Unlike the rest of the LLVM suite, this directory needs the LLVM GCC front
+end to compile.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="test"><tt>llvm/test</tt></a></div>
+<div class="doc_text">
+ <p>This directory contains feature and regression tests and other basic sanity
+ checks on the LLVM infrastructure. These are intended to run quickly and cover
+ a lot of territory without being exhaustive.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="llvmtest"><tt>test-suite</tt></a></div>
+<div class="doc_text">
+ <p>This is not a directory in the normal llvm module; it is a separate
+ Subversion
+ module that must be checked out (usually to <tt>projects/test-suite</tt>).
+ This
+ module contains a comprehensive correctness, performance, and benchmarking
+ test
+ suite for LLVM. It is a separate Subversion module because not every LLVM
+ user is
+ interested in downloading or building such a comprehensive test suite. For
+ further details on this test suite, please see the
+ <a href="TestingGuide.html">Testing Guide</a> document.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="tools"><tt>llvm/tools</tt></a></div>
+<div class="doc_text">
+
+<p>The <b>tools</b> directory contains the executables built out of the
+libraries above, which form the main part of the user interface. You can
+always get help for a tool by typing <tt>tool_name --help</tt>. The
+following is a brief introduction to the most important tools. More detailed
+information is in the <a href="CommandGuide/index.html">Command Guide</a>.</p>
+
+<dl>
+
+ <dt><tt><b>bugpoint</b></tt></dt>
+ <dd><tt>bugpoint</tt> is used to debug
+ optimization passes or code generation backends by narrowing down the
+ given test case to the minimum number of passes and/or instructions that
+ still cause a problem, whether it is a crash or miscompilation. See <a
+ href="HowToSubmitABug.html">HowToSubmitABug.html</a> for more information
+ on using <tt>bugpoint</tt>.</dd>
+
+ <dt><tt><b>llvmc</b></tt></dt>
+ <dd>The LLVM Compiler Driver. This program can
+ be configured to utilize both LLVM and non-LLVM compilation tools to enable
+ pre-processing, translation, optimization, assembly, and linking of programs
+ all from one command line. <tt>llvmc</tt> also takes care of processing the
+ dependent libraries found in bitcode. This reduces the need to get the
+ traditional <tt>-l&lt;name&gt;</tt> options right on the command line. Please
+ note that this tool, while functional, is still experimental and not feature
+ complete.</dd>
+
+ <dt><tt><b>llvm-ar</b></tt></dt>
+ <dd>The archiver produces an archive containing
+ the given LLVM bitcode files, optionally with an index for faster
+ lookup.</dd>
+
+ <dt><tt><b>llvm-as</b></tt></dt>
+ <dd>The assembler transforms the human readable LLVM assembly to LLVM
+ bitcode.</dd>
+
+ <dt><tt><b>llvm-dis</b></tt></dt>
+ <dd>The disassembler transforms the LLVM bitcode to human readable
+ LLVM assembly.</dd>
+
+ <dt><tt><b>llvm-ld</b></tt></dt>
+ <dd><tt>llvm-ld</tt> is a general purpose and extensible linker for LLVM.
+ This is the linker invoked by <tt>llvmc</tt>. It performsn standard link time
+ optimizations and allows optimization modules to be loaded and run so that
+ language specific optimizations can be applied at link time.</dd>
+
+ <dt><tt><b>llvm-link</b></tt></dt>
+ <dd><tt>llvm-link</tt>, not surprisingly, links multiple LLVM modules into
+ a single program.</dd>
+
+ <dt><tt><b>lli</b></tt></dt>
+ <dd><tt>lli</tt> is the LLVM interpreter, which
+ can directly execute LLVM bitcode (although very slowly...). In addition
+ to a simple interpreter, <tt>lli</tt> also has a tracing mode (entered by
+ specifying <tt>-trace</tt> on the command line). Finally, for
+ architectures that support it (currently x86, Sparc, and PowerPC), by default,
+ <tt>lli</tt> will function as a Just-In-Time compiler (if the
+ functionality was compiled in), and will execute the code <i>much</i>
+ faster than the interpreter.</dd>
+
+ <dt><tt><b>llc</b></tt></dt>
+ <dd> <tt>llc</tt> is the LLVM backend compiler, which
+ translates LLVM bitcode to a native code assembly file or to C code (with
+ the -march=c option).</dd>
+
+ <dt><tt><b>llvm-gcc</b></tt></dt>
+ <dd><tt>llvm-gcc</tt> is a GCC-based C frontend that has been retargeted to
+ use LLVM as its backend instead of GCC's RTL backend. It can also emit LLVM
+ bitcode or assembly (with the <tt>-emit-llvm</tt> option) instead of the
+ usual machine code output. It works just like any other GCC compiler,
+ taking the typical <tt>-c, -S, -E, -o</tt> options that are typically used.
+ Additionally, the the source code for <tt>llvm-gcc</tt> is available as a
+ separate Subversion module.</dd>
+
+ <dt><tt><b>opt</b></tt></dt>
+ <dd><tt>opt</tt> reads LLVM bitcode, applies a series of LLVM to LLVM
+ transformations (which are specified on the command line), and then outputs
+ the resultant bitcode. The '<tt>opt --help</tt>' command is a good way to
+ get a list of the program transformations available in LLVM.<br/>
+ <dd><tt>opt</tt> can also be used to run a specific analysis on an input
+ LLVM bitcode file and print out the results. It is primarily useful for
+ debugging analyses, or familiarizing yourself with what an analysis does.</dd>
+</dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="utils"><tt>llvm/utils</tt></a></div>
+<div class="doc_text">
+
+<p>This directory contains utilities for working with LLVM source code, and some
+of the utilities are actually required as part of the build process because they
+are code generators for parts of LLVM infrastructure.</p>
+
+<dl>
+ <dt><tt><b>codegen-diff</b></tt> <dd><tt>codegen-diff</tt> is a script
+ that finds differences between code that LLC generates and code that LLI
+ generates. This is a useful tool if you are debugging one of them,
+ assuming that the other generates correct output. For the full user
+ manual, run <tt>`perldoc codegen-diff'</tt>.<br><br>
+
+ <dt><tt><b>emacs/</b></tt> <dd>The <tt>emacs</tt> directory contains
+ syntax-highlighting files which will work with Emacs and XEmacs editors,
+ providing syntax highlighting support for LLVM assembly files and TableGen
+ description files. For information on how to use the syntax files, consult
+ the <tt>README</tt> file in that directory.<br><br>
+
+ <dt><tt><b>getsrcs.sh</b></tt> <dd>The <tt>getsrcs.sh</tt> script finds
+ and outputs all non-generated source files, which is useful if one wishes
+ to do a lot of development across directories and does not want to
+ individually find each file. One way to use it is to run, for example:
+ <tt>xemacs `utils/getsources.sh`</tt> from the top of your LLVM source
+ tree.<br><br>
+
+ <dt><tt><b>llvmgrep</b></tt></dt>
+ <dd>This little tool performs an "egrep -H -n" on each source file in LLVM and
+ passes to it a regular expression provided on <tt>llvmgrep</tt>'s command
+ line. This is a very efficient way of searching the source base for a
+ particular regular expression.</dd>
+
+ <dt><tt><b>makellvm</b></tt> <dd>The <tt>makellvm</tt> script compiles all
+ files in the current directory and then compiles and links the tool that
+ is the first argument. For example, assuming you are in the directory
+ <tt>llvm/lib/Target/Sparc</tt>, if <tt>makellvm</tt> is in your path,
+ simply running <tt>makellvm llc</tt> will make a build of the current
+ directory, switch to directory <tt>llvm/tools/llc</tt> and build it,
+ causing a re-linking of LLC.<br><br>
+
+ <dt><tt><b>NewNightlyTest.pl</b></tt> and
+ <tt><b>NightlyTestTemplate.html</b></tt> <dd>These files are used in a
+ cron script to generate nightly status reports of the functionality of
+ tools, and the results can be seen by following the appropriate link on
+ the <a href="http://llvm.org/">LLVM homepage</a>.<br><br>
+
+ <dt><tt><b>TableGen/</b></tt> <dd>The <tt>TableGen</tt> directory contains
+ the tool used to generate register descriptions, instruction set
+ descriptions, and even assemblers from common TableGen description
+ files.<br><br>
+
+ <dt><tt><b>vim/</b></tt> <dd>The <tt>vim</tt> directory contains
+ syntax-highlighting files which will work with the VIM editor, providing
+ syntax highlighting support for LLVM assembly files and TableGen
+ description files. For information on how to use the syntax files, consult
+ the <tt>README</tt> file in that directory.<br><br>
+
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="win32"><tt>llvm/win32</tt></a></div>
+<div class="doc_text">
+ <p>This directory contains build scripts and project files for use with
+ Visual C++. This allows developers on Windows to build LLVM without the need
+ for Cygwin. The contents of this directory should be considered experimental
+ at this time.
+ </p>
+</div>
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="tutorial">An Example Using the LLVM Tool Chain</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>This section gives an example of using LLVM. llvm-gcc3 is now obsolete,
+so we only include instructiosn for llvm-gcc4.
+</p>
+
+<p><b>Note:</b> The <i>gcc4</i> frontend's invocation is <b><i>considerably different</i></b>
+from the previous <i>gcc3</i> frontend. In particular, the <i>gcc4</i> frontend <b><i>does not</i></b>
+create bitcode by default: <i>gcc4</i> produces native code. As the example below illustrates,
+the '--emit-llvm' flag is needed to produce LLVM bitcode output. For <i>makefiles</i> and
+<i>configure</i> scripts, the CFLAGS variable needs '--emit-llvm' to produce bitcode
+output.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="tutorial4">Example with llvm-gcc4</a></div>
+
+<div class="doc_text">
+
+<ol>
+ <li><p>First, create a simple C file, name it 'hello.c':</p>
+
+<div class="doc_code">
+<pre>
+#include &lt;stdio.h&gt;
+
+int main() {
+ printf("hello world\n");
+ return 0;
+}
+</pre></div></li>
+
+ <li><p>Next, compile the C file into a native executable:</p>
+
+ <div class="doc_code"><pre>% llvm-gcc hello.c -o hello</pre></div>
+
+ <p>Note that llvm-gcc works just like GCC by default. The standard -S and
+ -c arguments work as usual (producing a native .s or .o file,
+ respectively).</p></li>
+
+ <li><p>Next, compile the C file into a LLVM bitcode file:</p>
+
+ <div class="doc_code">
+ <pre>% llvm-gcc -O3 -emit-llvm hello.c -c -o hello.bc</pre></div>
+
+ <p>The -emit-llvm option can be used with the -S or -c options to emit an
+ LLVM ".ll" or ".bc" file (respectively) for the code. This allows you
+ to use the <a href="CommandGuide/index.html">standard LLVM tools</a> on
+ the bitcode file.</p>
+
+ <p>Unlike llvm-gcc3, llvm-gcc4 correctly responds to -O[0123] arguments.
+ </p></li>
+
+ <li><p>Run the program in both forms. To run the program, use:</p>
+
+ <div class="doc_code"><pre>% ./hello</pre></div>
+
+ <p>and</p>
+
+ <div class="doc_code"><pre>% lli hello.bc</pre></div>
+
+ <p>The second examples shows how to invoke the LLVM JIT, <a
+ href="CommandGuide/html/lli.html">lli</a>.</p></li>
+
+ <li><p>Use the <tt>llvm-dis</tt> utility to take a look at the LLVM assembly
+ code:</p>
+
+<div class="doc_code">
+<pre>llvm-dis &lt; hello.bc | less</pre>
+</div></li>
+
+ <li><p>Compile the program to native assembly using the LLC code
+ generator:</p>
+
+ <div class="doc_code"><pre>% llc hello.bc -o hello.s</pre></div></li>
+
+ <li><p>Assemble the native assembly language file into a program:</p>
+
+<div class="doc_code">
+<pre>
+<b>Solaris:</b> % /opt/SUNWspro/bin/cc -xarch=v9 hello.s -o hello.native
+
+<b>Others:</b> % gcc hello.s -o hello.native
+</pre>
+</div></li>
+
+ <li><p>Execute the native code program:</p>
+
+ <div class="doc_code"><pre>% ./hello.native</pre></div>
+
+ <p>Note that using llvm-gcc to compile directly to native code (i.e. when
+ the -emit-llvm option is not present) does steps 6/7/8 for you.</p>
+ </li>
+
+</ol>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="problems">Common Problems</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you are having problems building or using LLVM, or if you have any other
+general questions about LLVM, please consult the <a href="FAQ.html">Frequently
+Asked Questions</a> page.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="links">Links</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is just an <b>introduction</b> to how to use LLVM to do
+some simple things... there are many more interesting and complicated things
+that you can do that aren't documented here (but we'll gladly accept a patch
+if you want to write something up!). For more information about LLVM, check
+out:</p>
+
+<ul>
+ <li><a href="http://llvm.org/">LLVM homepage</a></li>
+ <li><a href="http://llvm.org/doxygen/">LLVM doxygen tree</a></li>
+ <li><a href="http://llvm.org/docs/Projects.html">Starting a Project
+ that Uses LLVM</a></li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.x10sys.com/rspencer/">Reid Spencer</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/GettingStartedVS.html b/docs/GettingStartedVS.html
new file mode 100644
index 0000000..0580378
--- /dev/null
+++ b/docs/GettingStartedVS.html
@@ -0,0 +1,354 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <title>Getting Started with LLVM System for Microsoft Visual Studio</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ Getting Started with the LLVM System using Microsoft Visual Studio
+</div>
+
+<ul>
+ <li><a href="#overview">Overview</a>
+ <li><a href="#quickstart">Getting Started Quickly (A Summary)</a>
+ <li><a href="#requirements">Requirements</a>
+ <ol>
+ <li><a href="#hardware">Hardware</a>
+ <li><a href="#software">Software</a>
+ </ol></li>
+
+ <li><a href="#starting">Getting Started with LLVM</a>
+ <ol>
+ <li><a href="#terminology">Terminology and Notation</a>
+ <li><a href="#objfiles">The Location of LLVM Object Files</a>
+ </ol></li>
+
+ <li><a href="#tutorial">An Example Using the LLVM Tool Chain</a>
+ <li><a href="#problems">Common Problems</a>
+ <li><a href="#links">Links</a>
+</ul>
+
+<div class="doc_author">
+ <p>Written by:
+ <a href="mailto:jeffc@jolt-lang.org">Jeff Cohen</a>
+ </p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="overview"><b>Overview</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+ <p>The Visual Studio port at this time is experimental. It is suitable for
+ use only if you are writing your own compiler front end or otherwise have a
+ need to dynamically generate machine code. The JIT and interpreter are
+ functional, but it is currently not possible to generate assembly code which
+ is then assembled into an executable. You can indirectly create executables
+ by using the C back end.</p>
+
+ <p>To emphasize, there is no C/C++ front end currently available.
+ <tt>llvm-gcc</tt> is based on GCC, which cannot be bootstrapped using VC++.
+ Eventually there should be a <tt>llvm-gcc</tt> based on Cygwin or MinGW that
+ is usable. There is also the option of generating bitcode files on Unix and
+ copying them over to Windows. But be aware the odds of linking C++ code
+ compiled with <tt>llvm-gcc</tt> with code compiled with VC++ is essentially
+ zero.</p>
+
+ <p>The LLVM test suite cannot be run on the Visual Studio port at this
+ time.</p>
+
+ <p>Most of the tools build and work. <tt>llvm-db</tt> does not build at this
+ time. <tt>bugpoint</tt> does build, but does not work.
+
+ <p>Additional information about the LLVM directory structure and tool chain
+ can be found on the main <a href="GettingStarted.html">Getting Started</a>
+ page.</P>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="quickstart"><b>Getting Started Quickly (A Summary)</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Here's the short story for getting up and running quickly with LLVM:</p>
+
+<ol>
+ <li>Read the documentation.</li>
+ <li>Read the documentation.</li>
+ <li>Remember that you were warned twice about reading the documentation.</li>
+
+ <li>Get the Source Code
+ <ul>
+ <li>With the distributed files:
+ <ol>
+ <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+ <li><tt>gunzip --stdout llvm-<i>version</i>.tar.gz | tar -xvf -</tt>
+ <i>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;or use WinZip</i>
+ <li><tt>cd llvm</tt></li>
+ </ol></li>
+
+ <li>With anonymous Subversion access:
+ <ol>
+ <li><tt>cd <i>where-you-want-llvm-to-live</i></tt></li>
+ <li><tt>svn co http://llvm.org/svn/llvm-project/llvm-top/trunk llvm-top
+ </tt></li>
+ <li><tt>make checkout MODULE=llvm</tt>
+ <li><tt>cd llvm</tt></li>
+ </ol></li>
+ </ul></li>
+
+ <li>Start Visual Studio
+ <ol>
+ <li>Simply double click on the solution file <tt>llvm/win32/llvm.sln</tt>.
+ </li>
+ </ol></li>
+
+ <li>Build the LLVM Suite:
+ <ol>
+ <li>Simply build the solution.</li>
+ <li>The Fibonacci project is a sample program that uses the JIT. Modify
+ the project's debugging properties to provide a numeric command line
+ argument. The program will print the corresponding fibonacci value.</li>
+ </ol></li>
+
+</ol>
+
+<p>It is strongly encouraged that you get the latest version from Subversion as
+changes are continually making the VS support better.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="requirements"><b>Requirements</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+ <p>Before you begin to use the LLVM system, review the requirements given
+ below. This may save you some trouble by knowing ahead of time what hardware
+ and software you will need.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="hardware"><b>Hardware</b></a>
+</div>
+
+<div class="doc_text">
+
+ <p>Any system that can adequately run Visual Studio .NET 2003 is fine. The
+ LLVM source tree and object files, libraries and executables will consume
+ approximately 3GB.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="software"><b>Software</b></a></div>
+<div class="doc_text">
+
+ <p>You will need Visual Studio .NET 2003. Earlier versions cannot open the
+ solution/project files. The VS 2005 beta can, but will migrate these files
+ to its own format in the process. While it should work with the VS 2005
+ beta, there are no guarantees and there is no support for it at this time.
+ It has been reported that VC++ Express also works.</p>
+
+ <p>If you plan to modify any .y or .l files, you will need to have bison
+ and/or flex installed where Visual Studio can find them. Otherwise, you do
+ not need them and the pre-generated files that come with the source tree
+ will be used.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="starting"><b>Getting Started with LLVM</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The remainder of this guide is meant to get you up and running with
+LLVM using Visual Studio and to give you some basic information about the LLVM
+environment.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="terminology">Terminology and Notation</a>
+</div>
+
+<div class="doc_text">
+
+<p>Throughout this manual, the following names are used to denote paths
+specific to the local system and working environment. <i>These are not
+environment variables you need to set but just strings used in the rest
+of this document below</i>. In any of the examples below, simply replace
+each of these names with the appropriate pathname on your local system.
+All these paths are absolute:</p>
+
+<dl>
+ <dt>SRC_ROOT
+ <dd>
+ This is the top level directory of the LLVM source tree.
+ <p>
+
+ <dt>OBJ_ROOT
+ <dd>
+ This is the top level directory of the LLVM object tree (i.e. the
+ tree where object files and compiled programs will be placed. It
+ is fixed at SRC_ROOT/win32).
+ <p>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="objfiles">The Location of LLVM Object Files</a>
+</div>
+
+<div class="doc_text">
+
+ <p>The object files are placed under <tt>OBJ_ROOT/Debug</tt> for debug builds
+ and <tt>OBJ_ROOT/Release</tt> for release (optimized) builds. These include
+ both executables and libararies that your application can link against.
+
+ <p>The files that <tt>configure</tt> would create when building on Unix are
+ created by the <tt>Configure</tt> project and placed in
+ <tt>OBJ_ROOT/llvm</tt>. You application must have OBJ_ROOT in its include
+ search path just before <tt>SRC_ROOT/include</tt>.
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="tutorial">An Example Using the LLVM Tool Chain</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<ol>
+ <li>First, create a simple C file, name it 'hello.c':
+ <pre>
+ #include &lt;stdio.h&gt;
+ int main() {
+ printf("hello world\n");
+ return 0;
+ }
+ </pre></li>
+
+ <li><p>Next, compile the C file into a LLVM bitcode file:</p>
+ <p><tt>% llvm-gcc -c hello.c -emit-llvm -o hello.bc</tt></p>
+
+ <p>This will create the result file <tt>hello.bc</tt> which is the LLVM
+ bitcode that corresponds the the compiled program and the library
+ facilities that it required. You can execute this file directly using
+ <tt>lli</tt> tool, compile it to native assembly with the <tt>llc</tt>,
+ optimize or analyze it further with the <tt>opt</tt> tool, etc.</p>
+
+ <p><b>Note: while you cannot do this step on Windows, you can do it on a
+ Unix system and transfer <tt>hello.bc</tt> to Windows. Important:
+ transfer as a binary file!</b></p></li>
+
+ <li><p>Run the program using the just-in-time compiler:</p>
+
+ <p><tt>% lli hello.bc</tt></p></li>
+
+ <p>Note: this will only work for trivial C programs. Non-trivial programs
+ (and any C++ program) will have dependencies on the GCC runtime that
+ won't be satisfied by the Microsoft runtime libraries.</p>
+
+ <li><p>Use the <tt>llvm-dis</tt> utility to take a look at the LLVM assembly
+ code:</p>
+
+ <p><tt>% llvm-dis &lt; hello.bc | more</tt><p></li>
+
+ <li><p>Compile the program to C using the LLC code generator:</p>
+
+ <p><tt>% llc -march=c hello.bc</tt></p></li>
+
+ <li><p>Compile to binary using Microsoft C:</p>
+
+ <p><tt>% cl hello.cbe.c</tt></p></li>
+
+ <p>Note: this will only work for trivial C programs. Non-trivial programs
+ (and any C++ program) will have dependencies on the GCC runtime that
+ won't be satisfied by the Microsoft runtime libraries.</p>
+
+ <li><p>Execute the native code program:</p>
+
+ <p><tt>% hello.cbe.exe</tt></p></li>
+
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="problems">Common Problems</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you are having problems building or using LLVM, or if you have any other
+general questions about LLVM, please consult the <a href="FAQ.html">Frequently
+Asked Questions</a> page.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="links">Links</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is just an <b>introduction</b> to how to use LLVM to do
+some simple things... there are many more interesting and complicated things
+that you can do that aren't documented here (but we'll gladly accept a patch
+if you want to write something up!). For more information about LLVM, check
+out:</p>
+
+<ul>
+ <li><a href="http://llvm.org/">LLVM homepage</a></li>
+ <li><a href="http://llvm.org/doxygen/">LLVM doxygen tree</a></li>
+ <li><a href="http://llvm.org/docs/Projects.html">Starting a Project
+ that Uses LLVM</a></li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:jeffc@jolt-lang.org">Jeff Cohen</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt
new file mode 100644
index 0000000..f086181
--- /dev/null
+++ b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt
@@ -0,0 +1,74 @@
+Date: Sat, 18 Nov 2000 09:19:35 -0600 (CST)
+From: Vikram Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <lattner@cs.uiuc.edu>
+Subject: a few thoughts
+
+I've been mulling over the virtual machine problem and I had some
+thoughts about some things for us to think about discuss:
+
+1. We need to be clear on our goals for the VM. Do we want to emphasize
+ portability and safety like the Java VM? Or shall we focus on the
+ architecture interface first (i.e., consider the code generation and
+ processor issues), since the architecture interface question is also
+ important for portable Java-type VMs?
+
+ This is important because the audiences for these two goals are very
+ different. Architects and many compiler people care much more about
+ the second question. The Java compiler and OS community care much more
+ about the first one.
+
+ Also, while the architecture interface question is important for
+ Java-type VMs, the design constraints are very different.
+
+
+2. Design issues to consider (an initial list that we should continue
+ to modify). Note that I'm not trying to suggest actual solutions here,
+ but just various directions we can pursue:
+
+ a. A single-assignment VM, which we've both already been thinking about.
+
+ b. A strongly-typed VM. One question is do we need the types to be
+ explicitly declared or should they be inferred by the dynamic compiler?
+
+ c. How do we get more high-level information into the VM while keeping
+ to a low-level VM design?
+
+ o Explicit array references as operands? An alternative is
+ to have just an array type, and let the index computations be
+ separate 3-operand instructions.
+
+ o Explicit instructions to handle aliasing, e.g.s:
+ -- an instruction to say "I speculate that these two values are not
+ aliased, but check at runtime", like speculative execution in
+ EPIC?
+ -- or an instruction to check whether two values are aliased and
+ execute different code depending on the answer, somewhat like
+ predicated code in EPIC
+
+ o (This one is a difficult but powerful idea.)
+ A "thread-id" field on every instruction that allows the static
+ compiler to generate a set of parallel threads, and then have
+ the runtime compiler and hardware do what they please with it.
+ This has very powerful uses, but thread-id on every instruction
+ is expensive in terms of instruction size and code size.
+ We would need to compactly encode it somehow.
+
+ Also, this will require some reading on at least two other
+ projects:
+ -- Multiscalar architecture from Wisconsin
+ -- Simultaneous multithreading architecture from Washington
+
+ o Or forget all this and stick to a traditional instruction set?
+
+
+BTW, on an unrelated note, after the meeting yesterday, I did remember
+that you had suggested doing instruction scheduling on SSA form instead
+of a dependence DAG earlier in the semester. When we talked about
+it yesterday, I didn't remember where the idea had come from but I
+remembered later. Just giving credit where its due...
+
+Perhaps you can save the above as a file under RCS so you and I can
+continue to expand on this.
+
+--Vikram
+
diff --git a/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt
new file mode 100644
index 0000000..1c725f5
--- /dev/null
+++ b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt
@@ -0,0 +1,199 @@
+Date: Sun, 19 Nov 2000 16:23:57 -0600 (CST)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram Adve <vadve@cs.uiuc.edu>
+Subject: Re: a few thoughts
+
+Okay... here are a few of my thoughts on this (it's good to know that we
+think so alike!):
+
+> 1. We need to be clear on our goals for the VM. Do we want to emphasize
+> portability and safety like the Java VM? Or shall we focus on the
+> architecture interface first (i.e., consider the code generation and
+> processor issues), since the architecture interface question is also
+> important for portable Java-type VMs?
+
+I forsee the architecture looking kinda like this: (which is completely
+subject to change)
+
+1. The VM code is NOT guaranteed safe in a java sense. Doing so makes it
+ basically impossible to support C like languages. Besides that,
+ certifying a register based language as safe at run time would be a
+ pretty expensive operation to have to do. Additionally, we would like
+ to be able to statically eliminate many bounds checks in Java
+ programs... for example.
+
+ 2. Instead, we can do the following (eventually):
+ * Java bytecode is used as our "safe" representation (to avoid
+ reinventing something that we don't add much value to). When the
+ user chooses to execute Java bytecodes directly (ie, not
+ precompiled) the runtime compiler can do some very simple
+ transformations (JIT style) to convert it into valid input for our
+ VM. Performance is not wonderful, but it works right.
+ * The file is scheduled to be compiled (rigorously) at a later
+ time. This could be done by some background process or by a second
+ processor in the system during idle time or something...
+ * To keep things "safe" ie to enforce a sandbox on Java/foreign code,
+ we could sign the generated VM code with a host specific private
+ key. Then before the code is executed/loaded, we can check to see if
+ the trusted compiler generated the code. This would be much quicker
+ than having to validate consistency (especially if bounds checks have
+ been removed, for example)
+
+> This is important because the audiences for these two goals are very
+> different. Architects and many compiler people care much more about
+> the second question. The Java compiler and OS community care much more
+> about the first one.
+
+3. By focusing on a more low level virtual machine, we have much more room
+ for value add. The nice safe "sandbox" VM can be provided as a layer
+ on top of it. It also lets us focus on the more interesting compilers
+ related projects.
+
+> 2. Design issues to consider (an initial list that we should continue
+> to modify). Note that I'm not trying to suggest actual solutions here,
+> but just various directions we can pursue:
+
+Understood. :)
+
+> a. A single-assignment VM, which we've both already been thinking
+> about.
+
+Yup, I think that this makes a lot of sense. I am still intrigued,
+however, by the prospect of a minimally allocated VM representation... I
+think that it could have definate advantages for certain applications
+(think very small machines, like PDAs). I don't, however, think that our
+initial implementations should focus on this. :)
+
+Here are some other auxilliary goals that I think we should consider:
+
+1. Primary goal: Support a high performance dynamic compilation
+ system. This means that we have an "ideal" division of labor between
+ the runtime and static compilers. Of course, the other goals of the
+ system somewhat reduce the importance of this point (f.e. portability
+ reduces performance, but hopefully not much)
+2. Portability to different processors. Since we are most familiar with
+ x86 and solaris, I think that these two are excellent candidates when
+ we get that far...
+3. Support for all languages & styles of programming (general purpose
+ VM). This is the point that disallows java style bytecodes, where all
+ array refs are checked for bounds, etc...
+4. Support linking between different language families. For example, call
+ C functions directly from Java without using the nasty/slow/gross JNI
+ layer. This involves several subpoints:
+ A. Support for languages that require garbage collectors and integration
+ with languages that don't. As a base point, we could insist on
+ always using a conservative GC, but implement free as a noop, f.e.
+
+> b. A strongly-typed VM. One question is do we need the types to be
+> explicitly declared or should they be inferred by the dynamic
+> compiler?
+
+ B. This is kind of similar to another idea that I have: make OOP
+ constructs (virtual function tables, class heirarchies, etc) explicit
+ in the VM representation. I believe that the number of additional
+ constructs would be fairly low, but would give us lots of important
+ information... something else that would/could be important is to
+ have exceptions as first class types so that they would be handled in
+ a uniform way for the entire VM... so that C functions can call Java
+ functions for example...
+
+> c. How do we get more high-level information into the VM while keeping
+> to a low-level VM design?
+> o Explicit array references as operands? An alternative is
+> to have just an array type, and let the index computations be
+> separate 3-operand instructions.
+
+ C. In the model I was thinking of (subject to change of course), we
+ would just have an array type (distinct from the pointer
+ types). This would allow us to have arbitrarily complex index
+ expressions, while still distinguishing "load" from "Array load",
+ for example. Perhaps also, switch jump tables would be first class
+ types as well? This would allow better reasoning about the program.
+
+5. Support dynamic loading of code from various sources. Already
+ mentioned above was the example of loading java bytecodes, but we want
+ to support dynamic loading of VM code as well. This makes the job of
+ the runtime compiler much more interesting: it can do interprocedural
+ optimizations that the static compiler can't do, because it doesn't
+ have all of the required information (for example, inlining from
+ shared libraries, etc...)
+
+6. Define a set of generally useful annotations to add to the VM
+ representation. For example, a function can be analysed to see if it
+ has any sideeffects when run... also, the MOD/REF sets could be
+ calculated, etc... we would have to determine what is reasonable. This
+ would generally be used to make IP optimizations cheaper for the
+ runtime compiler...
+
+> o Explicit instructions to handle aliasing, e.g.s:
+> -- an instruction to say "I speculate that these two values are not
+> aliased, but check at runtime", like speculative execution in
+> EPIC?
+> -- or an instruction to check whether two values are aliased and
+> execute different code depending on the answer, somewhat like
+> predicated code in EPIC
+
+These are also very good points... if this can be determined at compile
+time. I think that an epic style of representation (not the instruction
+packing, just the information presented) could be a very interesting model
+to use... more later...
+
+> o (This one is a difficult but powerful idea.)
+> A "thread-id" field on every instruction that allows the static
+> compiler to generate a set of parallel threads, and then have
+> the runtime compiler and hardware do what they please with it.
+> This has very powerful uses, but thread-id on every instruction
+> is expensive in terms of instruction size and code size.
+> We would need to compactly encode it somehow.
+
+Yes yes yes! :) I think it would be *VERY* useful to include this kind
+of information (which EPIC architectures *implicitly* encode. The trend
+that we are seeing supports this greatly:
+
+1. Commodity processors are getting massive SIMD support:
+ * Intel/Amd MMX/MMX2
+ * AMD's 3Dnow!
+ * Intel's SSE/SSE2
+ * Sun's VIS
+2. SMP is becoming much more common, especially in the server space.
+3. Multiple processors on a die are right around the corner.
+
+If nothing else, not designing this in would severely limit our future
+expansion of the project...
+
+> Also, this will require some reading on at least two other
+> projects:
+> -- Multiscalar architecture from Wisconsin
+> -- Simultaneous multithreading architecture from Washington
+>
+> o Or forget all this and stick to a traditional instruction set?
+
+Heh... :) Well, from a pure research point of view, it is almost more
+attactive to go with the most extreme/different ISA possible. On one axis
+you get safety and conservatism, and on the other you get degree of
+influence that the results have. Of course the problem with pure research
+is that often times there is no concrete product of the research... :)
+
+> BTW, on an unrelated note, after the meeting yesterday, I did remember
+> that you had suggested doing instruction scheduling on SSA form instead
+> of a dependence DAG earlier in the semester. When we talked about
+> it yesterday, I didn't remember where the idea had come from but I
+> remembered later. Just giving credit where its due...
+
+:) Thanks.
+
+> Perhaps you can save the above as a file under RCS so you and I can
+> continue to expand on this.
+
+I think it makes sense to do so when we get our ideas more formalized and
+bounce it back and forth a couple of times... then I'll do a more formal
+writeup of our goals and ideas. Obviously our first implementation will
+not want to do all of the stuff that I pointed out above... be we will
+want to design the project so that we do not artificially limit ourselves
+at sometime in the future...
+
+Anyways, let me know what you think about these ideas... and if they sound
+reasonable...
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2000-12-06-EncodingIdea.txt b/docs/HistoricalNotes/2000-12-06-EncodingIdea.txt
new file mode 100644
index 0000000..8c45292
--- /dev/null
+++ b/docs/HistoricalNotes/2000-12-06-EncodingIdea.txt
@@ -0,0 +1,30 @@
+From: Chris Lattner [mailto:sabre@nondot.org]
+Sent: Wednesday, December 06, 2000 6:41 PM
+To: Vikram S. Adve
+Subject: Additional idea with respect to encoding
+
+Here's another idea with respect to keeping the common case instruction
+size down (less than 32 bits ideally):
+
+Instead of encoding an instruction to operate on two register numbers,
+have it operate on two negative offsets based on the current register
+number. Therefore, instead of using:
+
+r57 = add r55, r56 (r57 is the implicit dest register, of course)
+
+We could use:
+
+r57 = add -2, -1
+
+My guess is that most SSA references are to recent values (especially if
+they correspond to expressions like (x+y*z+p*q/ ...), so the negative
+numbers would tend to stay small, even at the end of the procedure (where
+the implicit register destination number could be quite large). Of course
+the negative sign is reduntant, so you would be storing small integers
+almost all of the time, and 5-6 bits worth of register number would be
+plenty for most cases...
+
+What do you think?
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt b/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt
new file mode 100644
index 0000000..b66e185
--- /dev/null
+++ b/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt
@@ -0,0 +1,83 @@
+SUMMARY
+-------
+
+We met to discuss the LLVM instruction format and bytecode representation:
+
+ISSUES RESOLVED
+---------------
+
+1. We decided that we shall use a flat namespace to represent our
+ variables in SSA form, as opposed to having a two dimensional namespace
+ of the original variable and the SSA instance subscript.
+
+ARGUMENT AGAINST:
+ * A two dimensional namespace would be valuable when doing alias
+ analysis because the extra information can help limit the scope of
+ analysis.
+
+ARGUMENT FOR:
+ * Including this information would require that all users of the LLVM
+ bytecode would have to parse and handle it. This would slow down the
+ common case and inflate the instruction representation with another
+ infinite variable space.
+
+REASONING:
+ * It was decided that because original variable sources could be
+ reconstructed from SSA form in linear time, that it would be an
+ unjustified expense for the common case to include the extra
+ information for one optimization. Alias analysis itself is typically
+ greater than linear in asymptotic complexity, so this extra analaysis
+ would not affect the runtime of the optimization in a significant
+ way. Additionally, this would be an unlikely optimization to do at
+ runtime.
+
+
+IDEAS TO CONSIDER
+-----------------
+
+1. Including dominator information in the LLVM bytecode
+ representation. This is one example of an analysis result that may be
+ packaged with the bytecodes themselves. As a conceptual implementation
+ idea, we could include an immediate dominator number for each basic block
+ in the LLVM bytecode program. Basic blocks could be numbered according
+ to the order of occurance in the bytecode representation.
+
+2. Including loop header and body information. This would facilitate
+ detection of intervals and natural loops.
+
+UNRESOLVED ISSUES
+-----------------
+
+1. Will oSUIF provide enough of an infrastructure to support the research
+ that we will be doing? We know that it has less than stellar
+ performance, but hope that this will be of little importance for our
+ static compiler. This could affect us if we decided to do some IP
+ research. Also we do not yet understand the level of exception support
+ currently implemented.
+
+2. Should we consider the requirements of a direct hardware implementation
+ of the LLVM when we design it? If so, several design issues should
+ have their priorities shifted. The other option is to focus on a
+ software layer interpreting the LLVM in all cases.
+
+3. Should we use some form of packetized format to improve forward
+ compatibility? For example, we could design the system to encode a
+ packet type and length field before analysis information, to allow a
+ runtime to skip information that it didn't understand in a bytecode
+ stream. The obvious benefit would be for compatibility, the drawback
+ is that it would tend to splinter that 'standard' LLVM definition.
+
+4. Should we use fixed length instructions or variable length
+ instructions? Fetching variable length instructions is expensive (for
+ either hardware or software based LLVM runtimes), but we have several
+ 'infinite' spaces that instructions operate in (SSA register numbers,
+ type spaces, or packet length [if packets were implemented]). Several
+ options were mentioned including:
+ A. Using 16 or 32 bit numbers, which would be 'big enough'
+ B. A scheme similar to how UTF-8 works, to encode infinite numbers
+ while keeping small number small.
+ C. Use something similar to Huffman encoding, so that the most common
+ numbers are the smallest.
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt b/docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt
new file mode 100644
index 0000000..111706a
--- /dev/null
+++ b/docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt
@@ -0,0 +1,39 @@
+Date: Wed, 31 Jan 2001 12:04:33 -0600
+From: Vikram S. Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <lattner@cs.uiuc.edu>
+Subject: another thought
+
+I have a budding idea about making LLVM a little more ambitious: a
+customizable runtime system that can be used to implement language-specific
+virtual machines for many different languages. E.g., a C vm, a C++ vm, a
+Java vm, a Lisp vm, ..
+
+The idea would be that LLVM would provide a standard set of runtime features
+(some low-level like standard assembly instructions with code generation and
+static and runtime optimization; some higher-level like type-safety and
+perhaps a garbage collection library). Each language vm would select the
+runtime features needed for that language, extending or customizing them as
+needed. Most of the machine-dependent code-generation and optimization
+features as well as low-level machine-independent optimizations (like PRE)
+could be provided by LLVM and should be sufficient for any language,
+simplifying the language compiler. (This would also help interoperability
+between languages.) Also, some or most of the higher-level
+machine-independent features like type-safety and access safety should be
+reusable by different languages, with minor extensions. The language
+compiler could then focus on language-specific analyses and optimizations.
+
+The risk is that this sounds like a universal IR -- something that the
+compiler community has tried and failed to develop for decades, and is
+universally skeptical about. No matter what we say, we won't be able to
+convince anyone that we have a universal IR that will work. We need to
+think about whether LLVM is different or if has something novel that might
+convince people. E.g., the idea of providing a package of separable
+features that different languages select from. Also, using SSA with or
+without type-safety as the intermediate representation.
+
+One interesting starting point would be to discuss how a JVM would be
+implemented on top of LLVM a bit more. That might give us clues on how to
+structure LLVM to support one or more language VMs.
+
+--Vikram
+
diff --git a/docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt b/docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt
new file mode 100644
index 0000000..c09cf1f
--- /dev/null
+++ b/docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt
@@ -0,0 +1,67 @@
+Date: Tue, 6 Feb 2001 20:27:37 -0600 (CST)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: Type notation debate...
+
+This is the way that I am currently planning on implementing types:
+
+Primitive Types:
+type ::= void|bool|sbyte|ubyte|short|ushort|int|uint|long|ulong
+
+Method:
+typelist ::= typelisth | /*empty*/
+typelisth ::= type | typelisth ',' type
+type ::= type (typelist)
+
+Arrays (without and with size):
+type ::= '[' type ']' | '[' INT ',' type ']'
+
+Pointer:
+type ::= type '*'
+
+Structure:
+type ::= '{' typelist '}'
+
+Packed:
+type ::= '<' INT ',' type '>'
+
+Simple examples:
+
+[[ %4, int ]] - array of (array of 4 (int))
+[ { int, int } ] - Array of structure
+[ < %4, int > ] - Array of 128 bit SIMD packets
+int (int, [[int, %4]]) - Method taking a 2d array and int, returning int
+
+
+Okay before you comment, please look at:
+
+http://www.research.att.com/~bs/devXinterview.html
+
+Search for "In another interview, you defined the C declarator syntax as
+an experiment that failed. However, this syntactic construct has been
+around for 27 years and perhaps more; why do you consider it problematic
+(except for its cumbersome syntax)?" and read that response for me. :)
+
+Now with this syntax, his example would be represented as:
+
+[ %10, bool (int, int) * ] *
+
+vs
+
+bool (*(*)[10])(int, int)
+
+in C.
+
+Basically, my argument for this type construction system is that it is
+VERY simple to use and understand (although it IS different than C, it is
+very simple and straightforward, which C is NOT). In fact, I would assert
+that most programmers TODAY do not understand pointers to member
+functions, and have to look up an example when they have to write them.
+
+In my opinion, it is critically important to have clear and concise type
+specifications, because types are going to be all over the programs.
+
+Let me know your thoughts on this. :)
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt
new file mode 100644
index 0000000..8bfefbf
--- /dev/null
+++ b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt
@@ -0,0 +1,75 @@
+Date: Thu, 8 Feb 2001 08:42:04 -0600
+From: Vikram S. Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <sabre@nondot.org>
+Subject: RE: Type notation debate...
+
+Chris,
+
+> Okay before you comment, please look at:
+>
+> http://www.research.att.com/~bs/devXinterview.html
+
+I read this argument. Even before that, I was already in agreement with you
+and him that the C declarator syntax is difficult and confusing.
+
+But in fact, if you read the entire answer carefully, he came to the same
+conclusion I do: that you have to go with familiar syntax over logical
+syntax because familiarity is such a strong force:
+
+ "However, familiarity is a strong force. To compare, in English, we
+live
+more or less happily with the absurd rules for "to be" (am, are, is, been,
+was, were, ...) and all attempts to simplify are treated with contempt or
+(preferably) humor. It be a curious world and it always beed."
+
+> Basically, my argument for this type construction system is that it is
+> VERY simple to use and understand (although it IS different than C, it is
+> very simple and straightforward, which C is NOT). In fact, I would assert
+> that most programmers TODAY do not understand pointers to member
+> functions, and have to look up an example when they have to write them.
+
+Again, I don't disagree with this at all. But to some extent this
+particular problem is inherently difficult. Your syntax for the above
+example may be easier for you to read because this is the way you have been
+thinking about it. Honestly, I don't find it much easier than the C syntax.
+In either case, I would have to look up an example to write pointers to
+member functions.
+
+But pointers to member functions are nowhere near as common as arrays. And
+the old array syntax:
+ type [ int, int, ...]
+is just much more familiar and clear to people than anything new you
+introduce, no matter how logical it is. Introducing a new syntax that may
+make function pointers easier but makes arrays much more difficult seems
+very risky to me.
+
+> In my opinion, it is critically important to have clear and concise type
+> specifications, because types are going to be all over the programs.
+
+I absolutely agree. But the question is, what is more clear and concise?
+The syntax programmers are used to out of years of experience or a new
+syntax that they have never seen that has a more logical structure. I think
+the answer is the former. Sometimes, you have to give up a better idea
+because you can't overcome sociological barriers to it. Qwerty keyboards
+and Windows are two classic examples of bad technology that are difficult to
+root out.
+
+P.S. Also, while I agree that most your syntax is more logical, there is
+one part that isn't:
+
+Arrays (without and with size):
+type ::= '[' type ']' | '[' INT ',' type ']'.
+
+The arrays with size lists the dimensions and the type in a single list.
+That is just too confusing:
+ [10, 40, int]
+This seems to be a 3-D array where the third dimension is something strange.
+It is too confusing to have a list of 3 things, some of which are dimensions
+and one is a type. Either of the following would be better:
+
+ array [10, 40] of int
+or
+ int [10, 40]
+
+--Vikram
+
diff --git a/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt
new file mode 100644
index 0000000..6e97841
--- /dev/null
+++ b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt
@@ -0,0 +1,53 @@
+Date: Thu, 8 Feb 2001 14:31:05 -0600 (CST)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: RE: Type notation debate...
+
+> Arrays (without and with size):
+> type ::= '[' type ']' | '[' INT ',' type ']'.
+>
+> The arrays with size lists the dimensions and the type in a single list.
+> That is just too confusing:
+
+> [10, 40, int]
+> This seems to be a 3-D array where the third dimension is something strange.
+> It is too confusing to have a list of 3 things, some of which are dimensions
+> and one is a type.
+
+The above grammar indicates that there is only one integer parameter, ie
+the upper bound. The lower bound is always implied to be zero, for
+several reasons:
+
+* As a low level VM, we want to expose addressing computations
+ explicitly. Since the lower bound must always be known in a high level
+ language statically, the language front end can do the translation
+ automatically.
+* This fits more closely with what Java needs, ie what we need in the
+ short term. Java arrays are always zero based.
+
+If a two element list is too confusing, I would recommend an alternate
+syntax of:
+
+type ::= '[' type ']' | '[' INT 'x' type ']'.
+
+For example:
+ [12 x int]
+ [12x int]
+ [ 12 x [ 4x int ]]
+
+Which is syntactically nicer, and more explicit.
+
+> Either of the following would be better:
+> array [10, 40] of int
+
+I considered this approach for arrays in general (ie array of int/ array
+of 12 int), but found that it made declarations WAY too long. Remember
+that because of the nature of llvm, you get a lot of types strewn all over
+the program, and using the 'typedef' like facility is not a wonderful
+option, because then types aren't explicit anymore.
+
+I find this email interesting, because you contradict the previous email
+you sent, where you recommend that we stick to C syntax....
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt
new file mode 100644
index 0000000..7b90327
--- /dev/null
+++ b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt
@@ -0,0 +1,89 @@
+> But in fact, if you read the entire answer carefully, he came to the same
+> conclusion I do: that you have to go with familiar syntax over logical
+> syntax because familiarity is such a strong force:
+> "However, familiarity is a strong force. To compare, in English, we
+live
+> more or less happily with the absurd rules for "to be" (am, are, is, been,
+> was, were, ...) and all attempts to simplify are treated with contempt or
+> (preferably) humor. It be a curious world and it always beed."
+
+Although you have to remember that his situation was considerably
+different than ours. He was in a position where he was designing a high
+level language that had to be COMPATIBLE with C. Our language is such
+that a new person would have to learn the new, different, syntax
+anyways. Making them learn about the type system does not seem like much
+of a stretch from learning the opcodes and how SSA form works, and how
+everything ties together...
+
+> > Basically, my argument for this type construction system is that it is
+> > VERY simple to use and understand (although it IS different than C, it is
+> > very simple and straightforward, which C is NOT). In fact, I would assert
+> > that most programmers TODAY do not understand pointers to member
+> > functions, and have to look up an example when they have to write them.
+
+> Again, I don't disagree with this at all. But to some extent this
+> particular problem is inherently difficult. Your syntax for the above
+> example may be easier for you to read because this is the way you have been
+> thinking about it. Honestly, I don't find it much easier than the C syntax.
+> In either case, I would have to look up an example to write pointers to
+> member functions.
+
+I would argue that because the lexical structure of the language is self
+consistent, any person who spent a significant amount of time programming
+in LLVM directly would understand how to do it without looking it up in a
+manual. The reason this does not work for C is because you rarely have to
+declare these pointers, and the syntax is inconsistent with the method
+declaration and calling syntax.
+
+> But pointers to member functions are nowhere near as common as arrays.
+
+Very true. If you're implementing an object oriented language, however,
+remember that you have to do all the pointer to member function stuff
+yourself.... so everytime you invoke a virtual method one is involved
+(instead of having C++ hide it for you behind "syntactic sugar").
+
+> And the old array syntax:
+> type [ int, int, ...]
+> is just much more familiar and clear to people than anything new you
+> introduce, no matter how logical it is.
+
+Erm... excuse me but how is this the "old array syntax"? If you are
+arguing for consistency with C, you should be asking for 'type int []',
+which is significantly different than the above (beside the above
+introduces a new operator and duplicates information
+needlessly). Basically what I am suggesting is exactly the above without
+the fluff. So instead of:
+
+ type [ int, int, ...]
+
+you use:
+
+ type [ int ]
+
+> Introducing a new syntax that may
+> make function pointers easier but makes arrays much more difficult seems
+> very risky to me.
+
+This is not about function pointers. This is about consistency in the
+type system, and consistency with the rest of the language. The point
+above does not make arrays any more difficult to use, and makes the
+structure of types much more obvious than the "c way".
+
+> > In my opinion, it is critically important to have clear and concise type
+> > specifications, because types are going to be all over the programs.
+>
+> I absolutely agree. But the question is, what is more clear and concise?
+> The syntax programmers are used to out of years of experience or a new
+> syntax that they have never seen that has a more logical structure. I think
+> the answer is the former. Sometimes, you have to give up a better idea
+> because you can't overcome sociological barriers to it. Qwerty keyboards
+> and Windows are two classic examples of bad technology that are difficult to
+> root out.
+
+Very true, but you seem to be advocating a completely different Type
+system than C has, in addition to it not offering the advantages of clear
+structure that the system I recommended does... so you seem to not have a
+problem with changing this, just with what I change it to. :)
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2001-02-09-AdveComments.txt b/docs/HistoricalNotes/2001-02-09-AdveComments.txt
new file mode 100644
index 0000000..5503233
--- /dev/null
+++ b/docs/HistoricalNotes/2001-02-09-AdveComments.txt
@@ -0,0 +1,120 @@
+Ok, here are my comments and suggestions about the LLVM instruction set.
+We should discuss some now, but can discuss many of them later, when we
+revisit synchronization, type inference, and other issues.
+(We have discussed some of the comments already.)
+
+
+o We should consider eliminating the type annotation in cases where it is
+ essentially obvious from the instruction type, e.g., in br, it is obvious
+ that the first arg. should be a bool and the other args should be labels:
+
+ br bool <cond>, label <iftrue>, label <iffalse>
+
+ I think your point was that making all types explicit improves clarity
+ and readability. I agree to some extent, but it also comes at the cost
+ of verbosity. And when the types are obvious from people's experience
+ (e.g., in the br instruction), it doesn't seem to help as much.
+
+
+o On reflection, I really like your idea of having the two different switch
+ types (even though they encode implementation techniques rather than
+ semantics). It should simplify building the CFG and my guess is it could
+ enable some significant optimizations, though we should think about which.
+
+
+o In the lookup-indirect form of the switch, is there a reason not to make
+ the val-type uint? Most HLL switch statements (including Java and C++)
+ require that anyway. And it would also make the val-type uniform
+ in the two forms of the switch.
+
+ I did see the switch-on-bool examples and, while cute, we can just use
+ the branch instructions in that particular case.
+
+
+o I agree with your comment that we don't need 'neg'.
+
+
+o There's a trade-off with the cast instruction:
+ + it avoids having to define all the upcasts and downcasts that are
+ valid for the operands of each instruction (you probably have thought
+ of other benefits also)
+ - it could make the bytecode significantly larger because there could
+ be a lot of cast operations
+
+
+o Making the second arg. to 'shl' a ubyte seems good enough to me.
+ 255 positions seems adequate for several generations of machines
+ and is more compact than uint.
+
+
+o I still have some major concerns about including malloc and free in the
+ language (either as builtin functions or instructions). LLVM must be
+ able to represent code from many different languages. Languages such as
+ C, C++ Java and Fortran 90 would not be able to use our malloc anyway
+ because each of them will want to provide a library implementation of it.
+
+ This gets even worse when code from different languages is linked
+ into a single executable (which is fairly common in large apps).
+ Having a single malloc would just not suffice, and instead would simply
+ complicate the picture further because it adds an extra variant in
+ addition to the one each language provides.
+
+ Instead, providing a default library version of malloc and free
+ (and perhaps a malloc_gc with garbage collection instead of free)
+ would make a good implementation available to anyone who wants it.
+
+ I don't recall all your arguments in favor so let's discuss this again,
+ and soon.
+
+
+o 'alloca' on the other hand sounds like a good idea, and the
+ implementation seems fairly language-independent so it doesn't have the
+ problems with malloc listed above.
+
+
+o About indirect call:
+ Your option #2 sounded good to me. I'm not sure I understand your
+ concern about an explicit 'icall' instruction?
+
+
+o A pair of important synchronization instr'ns to think about:
+ load-linked
+ store-conditional
+
+
+o Other classes of instructions that are valuable for pipeline performance:
+ conditional-move
+ predicated instructions
+
+
+o I believe tail calls are relatively easy to identify; do you know why
+ .NET has a tailcall instruction?
+
+
+o I agree that we need a static data space. Otherwise, emulating global
+ data gets unnecessarily complex.
+
+
+o About explicit parallelism:
+
+ We once talked about adding a symbolic thread-id field to each
+ instruction. (It could be optional so single-threaded codes are
+ not penalized.) This could map well to multi-threaded architectures
+ while providing easy ILP for single-threaded onces. But it is probably
+ too radical an idea to include in a base version of LLVM. Instead, it
+ could a great topic for a separate study.
+
+ What is the semantics of the IA64 stop bit?
+
+
+
+
+o And finally, another thought about the syntax for arrays :-)
+
+ Although this syntax:
+ array <dimension-list> of <type>
+ is verbose, it will be used only in the human-readable assembly code so
+ size should not matter. I think we should consider it because I find it
+ to be the clearest syntax. It could even make arrays of function
+ pointers somewhat readable.
+
diff --git a/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt b/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt
new file mode 100644
index 0000000..5c87330
--- /dev/null
+++ b/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt
@@ -0,0 +1,245 @@
+From: Chris Lattner <sabre@nondot.org>
+To: "Vikram S. Adve" <vadve@cs.uiuc.edu>
+Subject: Re: LLVM Feedback
+
+I've included your feedback in the /home/vadve/lattner/llvm/docs directory
+so that it will live in CVS eventually with the rest of LLVM. I've
+significantly updated the documentation to reflect the changes you
+suggested, as specified below:
+
+> We should consider eliminating the type annotation in cases where it is
+> essentially obvious from the instruction type:
+> br bool <cond>, label <iftrue>, label <iffalse>
+> I think your point was that making all types explicit improves clarity
+> and readability. I agree to some extent, but it also comes at the
+> cost of verbosity. And when the types are obvious from people's
+> experience (e.g., in the br instruction), it doesn't seem to help as
+> much.
+
+Very true. We should discuss this more, but my reasoning is more of a
+consistency argument. There are VERY few instructions that can have all
+of the types eliminated, and doing so when available unnecesarily makes
+the language more difficult to handle. Especially when you see 'int
+%this' and 'bool %that' all over the place, I think it would be
+disorienting to see:
+
+ br %predicate, %iftrue, %iffalse
+
+for branches. Even just typing that once gives me the creeps. ;) Like I
+said, we should probably discuss this further in person...
+
+> On reflection, I really like your idea of having the two different
+> switch types (even though they encode implementation techniques rather
+> than semantics). It should simplify building the CFG and my guess is it
+> could enable some significant optimizations, though we should think
+> about which.
+
+Great. I added a note to the switch section commenting on how the VM
+should just use the instruction type as a hint, and that the
+implementation may choose altermate representations (such as predicated
+branches).
+
+> In the lookup-indirect form of the switch, is there a reason not to
+> make the val-type uint?
+
+No. This was something I was debating for a while, and didn't really feel
+strongly about either way. It is common to switch on other types in HLL's
+(for example signed int's are particually common), but in this case, all
+that will be added is an additional 'cast' instruction. I removed that
+from the spec.
+
+> I agree with your comment that we don't need 'neg'
+
+Removed.
+
+> There's a trade-off with the cast instruction:
+> + it avoids having to define all the upcasts and downcasts that are
+> valid for the operands of each instruction (you probably have
+> thought of other benefits also)
+> - it could make the bytecode significantly larger because there could
+> be a lot of cast operations
+
+ + You NEED casts to represent things like:
+ void foo(float);
+ ...
+ int x;
+ ...
+ foo(x);
+ in a language like C. Even in a Java like language, you need upcasts
+ and some way to implement dynamic downcasts.
+ + Not all forms of instructions take every type (for example you can't
+ shift by a floating point number of bits), thus SOME programs will need
+ implicit casts.
+
+To be efficient and to avoid your '-' point above, we just have to be
+careful to specify that the instructions shall operate on all common
+types, therefore casting should be relatively uncommon. For example all
+of the arithmetic operations work on almost all data types.
+
+> Making the second arg. to 'shl' a ubyte seems good enough to me.
+> 255 positions seems adequate for several generations of machines
+
+Okay, that comment is removed.
+
+> and is more compact than uint.
+
+No, it isn't. Remember that the bytecode encoding saves value slots into
+the bytecode instructions themselves, not constant values. This is
+another case where we may introduce more cast instructions (but we will
+also reduce the number of opcode variants that must be supported by a
+virtual machine). Because most shifts are by constant values, I don't
+think that we'll have to cast many shifts. :)
+
+> I still have some major concerns about including malloc and free in the
+> language (either as builtin functions or instructions).
+
+Agreed. How about this proposal:
+
+malloc/free are either built in functions or actual opcodes. They provide
+all of the type safety that the document would indicate, blah blah
+blah. :)
+
+Now, because of all of the excellent points that you raised, an
+implementation may want to override the default malloc/free behavior of
+the program. To do this, they simply implement a "malloc" and
+"free" function. The virtual machine will then be defined to use the user
+defined malloc/free function (which return/take void*'s, not type'd
+pointers like the builtin function would) if one is available, otherwise
+fall back on a system malloc/free.
+
+Does this sound like a good compromise? It would give us all of the
+typesafety/elegance in the language while still allowing the user to do
+all the cool stuff they want to...
+
+> 'alloca' on the other hand sounds like a good idea, and the
+> implementation seems fairly language-independent so it doesn't have the
+> problems with malloc listed above.
+
+Okay, once we get the above stuff figured out, I'll put it all in the
+spec.
+
+> About indirect call:
+> Your option #2 sounded good to me. I'm not sure I understand your
+> concern about an explicit 'icall' instruction?
+
+I worry too much. :) The other alternative has been removed. 'icall' is
+now up in the instruction list next to 'call'.
+
+> I believe tail calls are relatively easy to identify; do you know why
+> .NET has a tailcall instruction?
+
+Although I am just guessing, I believe it probably has to do with the fact
+that they want languages like Haskell and lisp to be efficiently runnable
+on their VM. Of course this means that the VM MUST implement tail calls
+'correctly', or else life will suck. :) I would put this into a future
+feature bin, because it could be pretty handy...
+
+> A pair of important synchronization instr'ns to think about:
+> load-linked
+> store-conditional
+
+What is 'load-linked'? I think that (at least for now) I should add these
+to the 'possible extensions' section, because they are not immediately
+needed...
+
+> Other classes of instructions that are valuable for pipeline
+> performance:
+> conditional-move
+> predicated instructions
+
+Conditional move is effectly a special case of a predicated
+instruction... and I think that all predicated instructions can possibly
+be implemented later in LLVM. It would significantly change things, and
+it doesn't seem to be very necessary right now. It would seem to
+complicate flow control analysis a LOT in the virtual machine. I would
+tend to prefer that a predicated architecture like IA64 convert from a
+"basic block" representation to a predicated rep as part of it's dynamic
+complication phase. Also, if a basic block contains ONLY a move, then
+that can be trivally translated into a conditional move...
+
+> I agree that we need a static data space. Otherwise, emulating global
+> data gets unnecessarily complex.
+
+Definately. Also a later item though. :)
+
+> We once talked about adding a symbolic thread-id field to each
+> ..
+> Instead, it could a great topic for a separate study.
+
+Agreed. :)
+
+> What is the semantics of the IA64 stop bit?
+
+Basically, the IA64 writes instructions like this:
+mov ...
+add ...
+sub ...
+op xxx
+op xxx
+;;
+mov ...
+add ...
+sub ...
+op xxx
+op xxx
+;;
+
+Where the ;; delimits a group of instruction with no dependencies between
+them, which can all be executed concurrently (to the limits of the
+available functional units). The ;; gets translated into a bit set in one
+of the opcodes.
+
+The advantages of this representation is that you don't have to do some
+kind of 'thread id scheduling' pass by having to specify ahead of time how
+many threads to use, and the representation doesn't have a per instruction
+overhead...
+
+> And finally, another thought about the syntax for arrays :-)
+> Although this syntax:
+> array <dimension-list> of <type>
+> is verbose, it will be used only in the human-readable assembly code so
+> size should not matter. I think we should consider it because I find it
+> to be the clearest syntax. It could even make arrays of function
+> pointers somewhat readable.
+
+My only comment will be to give you an example of why this is a bad
+idea. :)
+
+Here is an example of using the switch statement (with my recommended
+syntax):
+
+switch uint %val, label %otherwise,
+ [%3 x {uint, label}] [ { uint %57, label %l1 },
+ { uint %20, label %l2 },
+ { uint %14, label %l3 } ]
+
+Here it is with the syntax you are proposing:
+
+switch uint %val, label %otherwise,
+ array %3 of {uint, label}
+ array of {uint, label}
+ { uint %57, label %l1 },
+ { uint %20, label %l2 },
+ { uint %14, label %l3 }
+
+Which is ambiguous and very verbose. It would be possible to specify
+constants with [] brackets as in my syntax, which would look like this:
+
+switch uint %val, label %otherwise,
+ array %3 of {uint, label} [ { uint %57, label %l1 },
+ { uint %20, label %l2 },
+ { uint %14, label %l3 } ]
+
+But then the syntax is inconsistent between type definition and constant
+definition (why do []'s enclose the constants but not the types??).
+
+Anyways, I'm sure that there is much debate still to be had over
+this... :)
+
+-Chris
+
+http://www.nondot.org/~sabre/os/
+http://www.nondot.org/MagicStats/
+http://korbit.sourceforge.net/
+
+
diff --git a/docs/HistoricalNotes/2001-02-13-Reference-Memory.txt b/docs/HistoricalNotes/2001-02-13-Reference-Memory.txt
new file mode 100644
index 0000000..2c7534d
--- /dev/null
+++ b/docs/HistoricalNotes/2001-02-13-Reference-Memory.txt
@@ -0,0 +1,39 @@
+Date: Tue, 13 Feb 2001 13:29:52 -0600 (CST)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: LLVM Concerns...
+
+
+I've updated the documentation to include load store and allocation
+instructions (please take a look and let me know if I'm on the right
+track):
+
+file:/home/vadve/lattner/llvm/docs/LangRef.html#memoryops
+
+I have a couple of concerns I would like to bring up:
+
+1. Reference types
+ Right now, I've spec'd out the language to have a pointer type, which
+ works fine for lots of stuff... except that Java really has
+ references: constrained pointers that cannot be manipulated: added and
+ subtracted, moved, etc... Do we want to have a type like this? It
+ could be very nice for analysis (pointer always points to the start of
+ an object, etc...) and more closely matches Java semantics. The
+ pointer type would be kept for C++ like semantics. Through analysis,
+ C++ pointers could be promoted to references in the LLVM
+ representation.
+
+2. Our "implicit" memory references in assembly language:
+ After thinking about it, this model has two problems:
+ A. If you do pointer analysis and realize that two stores are
+ independent and can share the same memory source object, there is
+ no way to represent this in either the bytecode or assembly.
+ B. When parsing assembly/bytecode, we effectively have to do a full
+ SSA generation/PHI node insertion pass to build the dependencies
+ when we don't want the "pinned" representation. This is not
+ cool.
+ I'm tempted to make memory references explicit in both the assembly and
+ bytecode to get around this... what do you think?
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt b/docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt
new file mode 100644
index 0000000..5053433
--- /dev/null
+++ b/docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt
@@ -0,0 +1,47 @@
+Date: Tue, 13 Feb 2001 18:25:42 -0600
+From: Vikram S. Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <sabre@nondot.org>
+Subject: RE: LLVM Concerns...
+
+> 1. Reference types
+> Right now, I've spec'd out the language to have a pointer type, which
+> works fine for lots of stuff... except that Java really has
+> references: constrained pointers that cannot be manipulated: added and
+> subtracted, moved, etc... Do we want to have a type like this? It
+> could be very nice for analysis (pointer always points to the start of
+> an object, etc...) and more closely matches Java semantics. The
+> pointer type would be kept for C++ like semantics. Through analysis,
+> C++ pointers could be promoted to references in the LLVM
+> representation.
+
+
+You're right, having references would be useful. Even for C++ the *static*
+compiler could generate references instead of pointers with fairly
+straightforward analysis. Let's include a reference type for now. But I'm
+also really concerned that LLVM is becoming big and complex and (perhaps)
+too high-level. After we get some initial performance results, we may have
+a clearer idea of what our goals should be and we should revisit this
+question then.
+
+> 2. Our "implicit" memory references in assembly language:
+> After thinking about it, this model has two problems:
+> A. If you do pointer analysis and realize that two stores are
+> independent and can share the same memory source object,
+
+not sure what you meant by "share the same memory source object"
+
+> there is
+> no way to represent this in either the bytecode or assembly.
+> B. When parsing assembly/bytecode, we effectively have to do a full
+> SSA generation/PHI node insertion pass to build the dependencies
+> when we don't want the "pinned" representation. This is not
+> cool.
+
+I understand the concern. But again, let's focus on the performance first
+and then look at the language design issues. E.g., it would be good to know
+how big the bytecode files are before expanding them further. I am pretty
+keen to explore the implications of LLVM for mobile devices. Both bytecode
+size and power consumption are important to consider there.
+
+--Vikram
+
diff --git a/docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt b/docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt
new file mode 100644
index 0000000..5f7843a
--- /dev/null
+++ b/docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt
@@ -0,0 +1,49 @@
+By Chris:
+
+LLVM has been designed with two primary goals in mind. First we strive to
+enable the best possible division of labor between static and dynamic
+compilers, and second, we need a flexible and powerful interface
+between these two complementary stages of compilation. We feel that
+providing a solution to these two goals will yield an excellent solution
+to the performance problem faced by modern architectures and programming
+languages.
+
+A key insight into current compiler and runtime systems is that a
+compiler may fall in anywhere in a "continuum of compilation" to do its
+job. On one side, scripting languages statically compile nothing and
+dynamically compile (or equivalently, interpret) everything. On the far
+other side, traditional static compilers process everything statically and
+nothing dynamically. These approaches have typically been seen as a
+tradeoff between performance and portability. On a deeper level, however,
+there are two reasons that optimal system performance may be obtained by a
+system somewhere in between these two extremes: Dynamic application
+behavior and social constraints.
+
+From a technical perspective, pure static compilation cannot ever give
+optimal performance in all cases, because applications have varying dynamic
+behavior that the static compiler cannot take into consideration. Even
+compilers that support profile guided optimization generate poor code in
+the real world, because using such optimization tunes that application
+to one particular usage pattern, whereas real programs (as opposed to
+benchmarks) often have several different usage patterns.
+
+On a social level, static compilation is a very shortsighted solution to
+the performance problem. Instruction set architectures (ISAs) continuously
+evolve, and each implementation of an ISA (a processor) must choose a set
+of tradeoffs that make sense in the market context that it is designed for.
+With every new processor introduced, the vendor faces two fundamental
+problems: First, there is a lag time between when a processor is introduced
+to when compilers generate quality code for the architecture. Secondly,
+even when compilers catch up to the new architecture there is often a large
+body of legacy code that was compiled for previous generations and will
+not or can not be upgraded. Thus a large percentage of code running on a
+processor may be compiled quite sub-optimally for the current
+characteristics of the dynamic execution environment.
+
+For these reasons, LLVM has been designed from the beginning as a long-term
+solution to these problems. Its design allows the large body of platform
+independent, static, program optimizations currently in compilers to be
+reused unchanged in their current form. It also provides important static
+type information to enable powerful dynamic and link time optimizations
+to be performed quickly and efficiently. This combination enables an
+increase in effective system performance for real world environments.
diff --git a/docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt b/docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt
new file mode 100644
index 0000000..b546301
--- /dev/null
+++ b/docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt
@@ -0,0 +1,202 @@
+Meeting notes: Implementation idea: Exception Handling in C++/Java
+
+The 5/18/01 meeting discussed ideas for implementing exceptions in LLVM.
+We decided that the best solution requires a set of library calls provided by
+the VM, as well as an extension to the LLVM function invocation syntax.
+
+The LLVM function invocation instruction previously looks like this (ignoring
+types):
+
+ call func(arg1, arg2, arg3)
+
+The extension discussed today adds an optional "with" clause that
+associates a label with the call site. The new syntax looks like this:
+
+ call func(arg1, arg2, arg3) with funcCleanup
+
+This funcHandler always stays tightly associated with the call site (being
+encoded directly into the call opcode itself), and should be used whenever
+there is cleanup work that needs to be done for the current function if
+an exception is thrown by func (or if we are in a try block).
+
+To support this, the VM/Runtime provide the following simple library
+functions (all syntax in this document is very abstract):
+
+typedef struct { something } %frame;
+ The VM must export a "frame type", that is an opaque structure used to
+ implement different types of stack walking that may be used by various
+ language runtime libraries. We imagine that it would be typical to
+ represent a frame with a PC and frame pointer pair, although that is not
+ required.
+
+%frame getStackCurrentFrame();
+ Get a frame object for the current function. Note that if the current
+ function was inlined into its caller, the "current" frame will belong to
+ the "caller".
+
+bool isFirstFrame(%frame f);
+ Returns true if the specified frame is the top level (first activated) frame
+ for this thread. For the main thread, this corresponds to the main()
+ function, for a spawned thread, it corresponds to the thread function.
+
+%frame getNextFrame(%frame f);
+ Return the previous frame on the stack. This function is undefined if f
+ satisfies the predicate isFirstFrame(f).
+
+Label *getFrameLabel(%frame f);
+ If a label was associated with f (as discussed below), this function returns
+ it. Otherwise, it returns a null pointer.
+
+doNonLocalBranch(Label *L);
+ At this point, it is not clear whether this should be a function or
+ intrinsic. It should probably be an intrinsic in LLVM, but we'll deal with
+ this issue later.
+
+
+Here is a motivating example that illustrates how these facilities could be
+used to implement the C++ exception model:
+
+void TestFunction(...) {
+ A a; B b;
+ foo(); // Any function call may throw
+ bar();
+ C c;
+
+ try {
+ D d;
+ baz();
+ } catch (int) {
+ ...int Stuff...
+ // execution continues after the try block: the exception is consumed
+ } catch (double) {
+ ...double stuff...
+ throw; // Exception is propogated
+ }
+}
+
+This function would compile to approximately the following code (heavy
+pseudo code follows):
+
+Func:
+ %a = alloca A
+ A::A(%a) // These ctors & dtors could throw, but we ignore this
+ %b = alloca B // minor detail for this example
+ B::B(%b)
+
+ call foo() with fooCleanup // An exception in foo is propogated to fooCleanup
+ call bar() with barCleanup // An exception in bar is propogated to barCleanup
+
+ %c = alloca C
+ C::C(c)
+ %d = alloca D
+ D::D(d)
+ call baz() with bazCleanup // An exception in baz is propogated to bazCleanup
+ d->~D();
+EndTry: // This label corresponds to the end of the try block
+ c->~C() // These could also throw, these are also ignored
+ b->~B()
+ a->~A()
+ return
+
+Note that this is a very straight forward and literal translation: exactly
+what we want for zero cost (when unused) exception handling. Especially on
+platforms with many registers (ie, the IA64) setjmp/longjmp style exception
+handling is *very* impractical. Also, the "with" clauses describe the
+control flow paths explicitly so that analysis is not adversly effected.
+
+The foo/barCleanup labels are implemented as:
+
+TryCleanup: // Executed if an exception escapes the try block
+ c->~C()
+barCleanup: // Executed if an exception escapes from bar()
+ // fall through
+fooCleanup: // Executed if an exception escapes from foo()
+ b->~B()
+ a->~A()
+ Exception *E = getThreadLocalException()
+ call throw(E) // Implemented by the C++ runtime, described below
+
+Which does the work one would expect. getThreadLocalException is a function
+implemented by the C++ support library. It returns the current exception
+object for the current thread. Note that we do not attempt to recycle the
+shutdown code from before, because performance of the mainline code is
+critically important. Also, obviously fooCleanup and barCleanup may be
+merged and one of them eliminated. This just shows how the code generator
+would most likely emit code.
+
+The bazCleanup label is more interesting. Because the exception may be caught
+by the try block, we must dispatch to its handler... but it does not exist
+on the call stack (it does not have a VM Call->Label mapping installed), so
+we must dispatch statically with a goto. The bazHandler thus appears as:
+
+bazHandler:
+ d->~D(); // destruct D as it goes out of scope when entering catch clauses
+ goto TryHandler
+
+In general, TryHandler is not the same as bazHandler, because multiple
+function calls could be made from the try block. In this case, trivial
+optimization could merge the two basic blocks. TryHandler is the code
+that actually determines the type of exception, based on the Exception object
+itself. For this discussion, assume that the exception object contains *at
+least*:
+
+1. A pointer to the RTTI info for the contained object
+2. A pointer to the dtor for the contained object
+3. The contained object itself
+
+Note that it is necessary to maintain #1 & #2 in the exception object itself
+because objects without virtual function tables may be thrown (as in this
+example). Assuming this, TryHandler would look something like this:
+
+TryHandler:
+ Exception *E = getThreadLocalException();
+ switch (E->RTTIType) {
+ case IntRTTIInfo:
+ ...int Stuff... // The action to perform from the catch block
+ break;
+ case DoubleRTTIInfo:
+ ...double Stuff... // The action to perform from the catch block
+ goto TryCleanup // This catch block rethrows the exception
+ break; // Redundant, eliminated by the optimizer
+ default:
+ goto TryCleanup // Exception not caught, rethrow
+ }
+
+ // Exception was consumed
+ if (E->dtor)
+ E->dtor(E->object) // Invoke the dtor on the object if it exists
+ goto EndTry // Continue mainline code...
+
+And that is all there is to it.
+
+The throw(E) function would then be implemented like this (which may be
+inlined into the caller through standard optimization):
+
+function throw(Exception *E) {
+ // Get the start of the stack trace...
+ %frame %f = call getStackCurrentFrame()
+
+ // Get the label information that corresponds to it
+ label * %L = call getFrameLabel(%f)
+ while (%L == 0 && !isFirstFrame(%f)) {
+ // Loop until a cleanup handler is found
+ %f = call getNextFrame(%f)
+ %L = call getFrameLabel(%f)
+ }
+
+ if (%L != 0) {
+ call setThreadLocalException(E) // Allow handlers access to this...
+ call doNonLocalBranch(%L)
+ }
+ // No handler found!
+ call BlowUp() // Ends up calling the terminate() method in use
+}
+
+That's a brief rundown of how C++ exception handling could be implemented in
+llvm. Java would be very similar, except it only uses destructors to unlock
+synchronized blocks, not to destroy data. Also, it uses two stack walks: a
+nondestructive walk that builds a stack trace, then a destructive walk that
+unwinds the stack as shown here.
+
+It would be trivial to get exception interoperability between C++ and Java.
+
diff --git a/docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt b/docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt
new file mode 100644
index 0000000..3375365
--- /dev/null
+++ b/docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt
@@ -0,0 +1,45 @@
+Date: Sat, 19 May 2001 19:09:13 -0500 (CDT)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: RE: Meeting writeup
+
+> I read it through and it looks great!
+
+Thanks!
+
+> The finally clause in Java may need more thought. The code for this clause
+> is like a subroutine because it needs to be entered from many points (end of
+> try block and beginning of each catch block), and then needs to *return to
+> the place from where the code was entered*. That's why JVM has the
+> jsr/jsr_w instruction.
+
+Hrm... I guess that is an implementation decision. It can either be
+modelled as a subroutine (as java bytecodes do), which is really
+gross... or it can be modelled as code duplication (emitted once inline,
+then once in the exception path). Because this could, at worst,
+slightly less than double the amount of code in a function (it is
+bounded) I don't think this is a big deal. One of the really nice things
+about the LLVM representation is that it still allows for runtime code
+generation for exception paths (exceptions paths are not compiled until
+needed). Obviously a static compiler couldn't do this though. :)
+
+In this case, only one copy of the code would be compiled... until the
+other one is needed on demand. Also this strategy fits with the "zero
+cost" exception model... the standard case is not burdened with extra
+branches or "call"s.
+
+> I suppose you could save the return address in a particular register
+> (specific to this finally block), jump to the finally block, and then at the
+> end of the finally block, jump back indirectly through this register. It
+> will complicate building the CFG but I suppose that can be handled. It is
+> also unsafe in terms of checking where control returns (which is I suppose
+> why the JVM doesn't use this).
+
+I think that a code duplication method would be cleaner, and would avoid
+the caveats that you mention. Also, it does not slow down the normal case
+with an indirect branch...
+
+Like everything, we can probably defer a final decision until later. :)
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt b/docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt
new file mode 100644
index 0000000..97af16a
--- /dev/null
+++ b/docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt
@@ -0,0 +1,63 @@
+Date: Fri, 1 Jun 2001 16:38:17 -0500 (CDT)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: Interesting: GCC passes
+
+
+Take a look at this document (which describes the order of optimizations
+that GCC performs):
+
+http://gcc.gnu.org/onlinedocs/gcc_17.html
+
+The rundown is that after RTL generation, the following happens:
+
+1 . [t] jump optimization (jumps to jumps, etc)
+2 . [t] Delete unreachable code
+3 . Compute live ranges for CSE
+4 . [t] Jump threading (jumps to jumps with identical or inverse conditions)
+5 . [t] CSE
+6 . *** Conversion to SSA
+7 . [t] SSA Based DCE
+8 . *** Conversion to LLVM
+9 . UnSSA
+10. GCSE
+11. LICM
+12. Strength Reduction
+13. Loop unrolling
+14. [t] CSE
+15. [t] DCE
+16. Instruction combination, register movement, scheduling... etc.
+
+I've marked optimizations with a [t] to indicate things that I believe to
+be relatively trivial to implement in LLVM itself. The time consuming
+things to reimplement would be SSA based PRE, Strength reduction & loop
+unrolling... these would be the major things we would miss out on if we
+did LLVM creation from tree code [inlining and other high level
+optimizations are done on the tree representation].
+
+Given the lack of "strong" optimizations that would take a long time to
+reimplement, I am leaning a bit more towards creating LLVM from the tree
+code. Especially given that SGI has GPL'd their compiler, including many
+SSA based optimizations that could be adapted (besides the fact that their
+code looks MUCH nicer than GCC :)
+
+Even if we choose to do LLVM code emission from RTL, we will almost
+certainly want to move LLVM emission from step 8 down until at least CSE
+has been rerun... which causes me to wonder if the SSA generation code
+will still work (due to global variable dependencies and stuff). I assume
+that it can be made to work, but might be a little more involved than we
+would like.
+
+I'm continuing to look at the Tree -> RTL code. It is pretty gross
+because they do some of the translation a statement at a time, and some
+of it a function at a time... I'm not quite clear why and how the
+distinction is drawn, but it does not appear that there is a wonderful
+place to attach extra info.
+
+Anyways, I'm proceeding with the RTL -> LLVM conversion phase for now. We
+can talk about this more on Monday.
+
+Wouldn't it be nice if there were a obvious decision to be made? :)
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt b/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt
new file mode 100644
index 0000000..6c9e097
--- /dev/null
+++ b/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt
@@ -0,0 +1,71 @@
+Date: Fri, 1 Jun 2001 17:08:44 -0500 (CDT)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: RE: Interesting: GCC passes
+
+> That is very interesting. I agree that some of these could be done on LLVM
+> at link-time, but it is the extra time required that concerns me. Link-time
+> optimization is severely time-constrained.
+
+If we were to reimplement any of these optimizations, I assume that we
+could do them a translation unit at a time, just as GCC does now. This
+would lead to a pipeline like this:
+
+Static optimizations, xlation unit at a time:
+.c --GCC--> .llvm --llvmopt--> .llvm
+
+Link time optimizations:
+.llvm --llvm-ld--> .llvm --llvm-link-opt--> .llvm
+
+Of course, many optimizations could be shared between llvmopt and
+llvm-link-opt, but the wouldn't need to be shared... Thus compile time
+could be faster, because we are using a "smarter" IR (SSA based).
+
+> BTW, about SGI, "borrowing" SSA-based optimizations from one compiler and
+> putting it into another is not necessarily easier than re-doing it.
+> Optimization code is usually heavily tied in to the specific IR they use.
+
+Understood. The only reason that I brought this up is because SGI's IR is
+more similar to LLVM than it is different in many respects (SSA based,
+relatively low level, etc), and could be easily adapted. Also their
+optimizations are written in C++ and are actually somewhat
+structured... of course it would be no walk in the park, but it would be
+much less time consuming to adapt, say, SSA-PRE than to rewrite it.
+
+> But your larger point is valid that adding SSA based optimizations is
+> feasible and should be fun. (Again, link time cost is the issue.)
+
+Assuming linktime cost wasn't an issue, the question is:
+Does using GCC's backend buy us anything?
+
+> It also occurs to me that GCC is probably doing quite a bit of back-end
+> optimization (step 16 in your list). Do you have a breakdown of that?
+
+Not really. The irritating part of GCC is that it mixes it all up and
+doesn't have a clean seperation of concerns. A lot of the "back end
+optimization" happens right along with other data optimizations (ie, CSE
+of machine specific things).
+
+As far as REAL back end optimizations go, it looks something like this:
+
+1. Instruction combination: try to make CISCy instructions, if available
+2. Register movement: try to get registers in the right places for the
+architecture to avoid register to register moves. For example, try to get
+the first argument of a function to naturally land in %o0 for sparc.
+3. Instruction scheduling: 'nuff said :)
+4. Register class preferencing: ??
+5. Local register allocation
+6. global register allocation
+7. Spilling
+8. Local regalloc
+9. Jump optimization
+10. Delay slot scheduling
+11. Branch shorting for CISC machines
+12. Instruction selection & peephole optimization
+13. Debug info output
+
+But none of this would be usable for LLVM anyways, unless we were using
+GCC as a static compiler.
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2001-06-20-.NET-Differences.txt b/docs/HistoricalNotes/2001-06-20-.NET-Differences.txt
new file mode 100644
index 0000000..1bc2eae
--- /dev/null
+++ b/docs/HistoricalNotes/2001-06-20-.NET-Differences.txt
@@ -0,0 +1,30 @@
+Date: Wed, 20 Jun 2001 12:32:22 -0500
+From: Vikram Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <lattner@cs.uiuc.edu>
+Subject: .NET vs. our VM
+
+One significant difference between .NET CLR and our VM is that the CLR
+includes full information about classes and inheritance. In fact, I just
+sat through the paper on adding templates to .NET CLR, and the speaker
+indicated that the goal seems to be to do simple static compilation (very
+little lowering or optimization). Also, the templates implementation in CLR
+"relies on dynamic class loading and JIT compilation".
+
+This is an important difference because I think there are some significant
+advantages to have a much lower level VM layer, and do significant static
+analysis and optimization.
+
+I also talked to the lead guy for KAI's C++ compiler (Arch Robison) and he
+said that SGI and other commercial compilers have included options to export
+their *IR* next to the object code (i.e., .il files) and use them for
+link-time code generation. In fact, he said that the .o file was nearly
+empty and was entirely generated from the .il at link-time. But he agreed
+that this limited the link-time interprocedural optimization to modules
+compiled by the same compiler, whereas our approach allows us to link and
+optimize modules from multiple different compilers. (Also, of course, they
+don't do anything for runtime optimization).
+
+All issues to bring up in Related Work.
+
+--Vikram
+
diff --git a/docs/HistoricalNotes/2001-07-06-LoweringIRForCodeGen.txt b/docs/HistoricalNotes/2001-07-06-LoweringIRForCodeGen.txt
new file mode 100644
index 0000000..3e10416
--- /dev/null
+++ b/docs/HistoricalNotes/2001-07-06-LoweringIRForCodeGen.txt
@@ -0,0 +1,31 @@
+Date: Fri, 6 Jul 2001 16:56:56 -0500
+From: Vikram S. Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <lattner@cs.uiuc.edu>
+Subject: lowering the IR
+
+BTW, I do think that we should consider lowering the IR as you said. I
+didn't get time to raise it today, but it comes up with the SPARC
+move-conditional instruction. I don't think we want to put that in the core
+VM -- it is a little too specialized. But without a corresponding
+conditional move instruction in the VM, it is pretty difficult to maintain a
+close mapping between VM and machine code. Other architectures may have
+other such instructions.
+
+What I was going to suggest was that for a particular processor, we define
+additional VM instructions that match some of the unusual opcodes on the
+processor but have VM semantics otherwise, i.e., all operands are in SSA
+form and typed. This means that we can re-generate core VM code from the
+more specialized code any time we want (so that portability is not lost).
+
+Typically, a static compiler like gcc would generate just the core VM, which
+is relatively portable. Anyone (an offline tool, the linker, etc., or even
+the static compiler itself if it chooses) can transform that into more
+specialized target-specific VM code for a particular architecture. If the
+linker does it, it can do it after all machine-independent optimizations.
+This would be the most convenient, but not necessary.
+
+The main benefit of lowering will be that we will be able to retain a close
+mapping between VM and machine code.
+
+--Vikram
+
diff --git a/docs/HistoricalNotes/2001-07-08-InstructionSelection.txt b/docs/HistoricalNotes/2001-07-08-InstructionSelection.txt
new file mode 100644
index 0000000..8cc75b8
--- /dev/null
+++ b/docs/HistoricalNotes/2001-07-08-InstructionSelection.txt
@@ -0,0 +1,51 @@
+Date: Sun, 8 Jul 2001 09:37:22 -0500
+From: Vikram S. Adve <vadve@cs.uiuc.edu>
+To: Ruchira Sasanka <sasanka@students.uiuc.edu>
+Cc: Chris Lattner <lattner@cs.uiuc.edu>
+Subject: machine instruction operands
+
+Ruchira,
+
+When generating machine instructions, I have to make several choices about
+operands. For cases were a register is required, there are 3 cases:
+
+1. The register is for a Value* that is already in the VM code.
+
+2. The register is for a value that is not in the VM code, usually because 2
+machine instructions get generated for a single VM instruction (and the
+register holds the result of the first m/c instruction and is used by the
+second m/c instruction).
+
+3. The register is a pre-determined machine register.
+
+E.g, for this VM instruction:
+ ptr = alloca type, numElements
+I have to generate 2 machine instructions:
+ reg = mul constant, numElements
+ ptr = add %sp, reg
+
+Each machine instruction is of class MachineInstr.
+It has a vector of operands. All register operands have type MO_REGISTER.
+The 3 types of register operands are marked using this enum:
+
+ enum VirtualRegisterType {
+ MO_VMVirtualReg, // virtual register for *value
+ MO_MInstrVirtualReg, // virtual register for result of *minstr
+ MO_MachineReg // pre-assigned machine register `regNum'
+ } vregType;
+
+Here's how this affects register allocation:
+
+1. MO_VMVirtualReg is the standard case: you just do the register
+allocation.
+
+2. MO_MInstrVirtualReg is the case where there is a hidden register being
+used. You should decide how you want to handle it, e.g., do you want do
+create a Value object during the preprocessing phase to make the value
+explicit (like for address register for the RETURN instruction).
+
+3. For case MO_MachineReg, you don't need to do anything, at least for
+SPARC. The only machine regs I am using so far are %g0 and %sp.
+
+--Vikram
+
diff --git a/docs/HistoricalNotes/2001-07-08-InstructionSelection2.txt b/docs/HistoricalNotes/2001-07-08-InstructionSelection2.txt
new file mode 100644
index 0000000..1ae006d
--- /dev/null
+++ b/docs/HistoricalNotes/2001-07-08-InstructionSelection2.txt
@@ -0,0 +1,25 @@
+Date: Sun, 8 Jul 2001 10:02:20 -0500
+From: Vikram S. Adve <vadve@cs.uiuc.edu>
+To: vadve@cs.uiuc.edu, Ruchira Sasanka <sasanka@students.uiuc.edu>
+Cc: Chris Lattner <lattner@cs.uiuc.edu>
+Subject: RE: machine instruction operands
+
+I got interrupted and forgot to explain the example. In that case:
+
+ reg will be the 3rd operand of MUL and it will be of type
+MO_MInstrVirtualReg. The field MachineInstr* minstr will point to the
+instruction that computes reg.
+
+ numElements will be an immediate constant, not a register.
+
+ %sp will be operand 1 of ADD and it will be of type MO_MachineReg. The
+field regNum identifies the register.
+
+ numElements will be operand 2 of ADD and it will be of type
+MO_VMVirtualReg. The field Value* value identifies the value.
+
+ ptr will be operand 3 of ADD will also be %sp, i.e., of
+ type MO_MachineReg. regNum identifies the register.
+
+--Vikram
+
diff --git a/docs/HistoricalNotes/2001-09-18-OptimizeExceptions.txt b/docs/HistoricalNotes/2001-09-18-OptimizeExceptions.txt
new file mode 100644
index 0000000..9379081
--- /dev/null
+++ b/docs/HistoricalNotes/2001-09-18-OptimizeExceptions.txt
@@ -0,0 +1,56 @@
+Date: Tue, 18 Sep 2001 00:38:37 -0500 (CDT)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: Idea for a simple, useful link time optimization
+
+
+In C++ programs, exceptions suck, and here's why:
+
+1. In virtually all function calls, you must assume that the function
+ throws an exception, unless it is defined as 'nothrow'. This means
+ that every function call has to have code to invoke dtors on objects
+ locally if one is thrown by the function. Most functions don't throw
+ exceptions, so this code is dead [with all the bad effects of dead
+ code, including icache pollution].
+2. Declaring a function nothrow causes catch blocks to be added to every
+ call that isnot provably nothrow. This makes them very slow.
+3. Extra extraneous exception edges reduce the opportunity for code
+ motion.
+4. EH is typically implemented with large lookup tables. Ours is going to
+ be much smaller (than the "standard" way of doing it) to start with,
+ but eliminating it entirely would be nice. :)
+5. It is physically impossible to correctly put (accurate, correct)
+ exception specifications on generic, templated code. But it is trivial
+ to analyze instantiations of said code.
+6. Most large C++ programs throw few exceptions. Most well designed
+ programs only throw exceptions in specific planned portions of the
+ code.
+
+Given our _planned_ model of handling exceptions, all of this would be
+pretty trivial to eliminate through some pretty simplistic interprocedural
+analysis. The DCE factor alone could probably be pretty significant. The
+extra code motion opportunities could also be exploited though...
+
+Additionally, this optimization can be implemented in a straight forward
+conservative manner, allowing libraries to be optimized or individual
+files even (if there are leaf functions visible in the translation unit
+that are called).
+
+I think it's a reasonable optimization that hasn't really been addressed
+(because assembly is way too low level for this), and could have decent
+payoffs... without being a overly complex optimization.
+
+After I wrote all of that, I found this page that is talking about
+basically the same thing I just wrote, except that it is translation unit
+at a time, tree based approach:
+http://www.ocston.org/~jls/ehopt.html
+
+but is very useful from "expected gain" and references perspective. Note
+that their compiler is apparently unable to inline functions that use
+exceptions, so there numbers are pretty worthless... also our results
+would (hopefully) be better because it's interprocedural...
+
+What do you think?
+
+-Chris
+
diff --git a/docs/HistoricalNotes/2002-05-12-InstListChange.txt b/docs/HistoricalNotes/2002-05-12-InstListChange.txt
new file mode 100644
index 0000000..004edb0
--- /dev/null
+++ b/docs/HistoricalNotes/2002-05-12-InstListChange.txt
@@ -0,0 +1,55 @@
+Date: Sun, 12 May 2002 17:12:53 -0500 (CDT)
+From: Chris Lattner <sabre@nondot.org>
+To: "Vikram S. Adve" <vadve@cs.uiuc.edu>
+Subject: LLVM change
+
+There is a fairly fundemental change that I would like to make to the LLVM
+infrastructure, but I'd like to know if you see any drawbacks that I
+don't...
+
+Basically right now at the basic block level, each basic block contains an
+instruction list (returned by getInstList()) that is a ValueHolder of
+instructions. To iterate over instructions, we must actually iterate over
+the instlist, and access the instructions through the instlist.
+
+To add or remove an instruction from a basic block, we need to get an
+iterator to an instruction, which, given just an Instruction*, requires a
+linear search of the basic block the instruction is contained in... just
+to insert an instruction before another instruction, or to delete an
+instruction! This complicates algorithms that should be very simple (like
+simple constant propogation), because they aren't actually sparse anymore,
+they have to traverse basic blocks to remove constant propogated
+instructions.
+
+Additionally, adding or removing instructions to a basic block
+_invalidates all iterators_ pointing into that block, which is really
+irritating.
+
+To fix these problems (and others), I would like to make the ordering of
+the instructions be represented with a doubly linked list in the
+instructions themselves, instead of an external data structure. This is
+how many other representations do it, and frankly I can't remember why I
+originally implemented it the way I did.
+
+Long term, all of the code that depends on the nasty features in the
+instruction list (which can be found by grep'ing for getInstList()) will
+be changed to do nice local transformations. In the short term, I'll
+change the representation, but preserve the interface (including
+getInstList()) so that all of the code doesn't have to change.
+
+Iteration over the instructions in a basic block remains the simple:
+for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) ...
+
+But we will also support:
+for (Instruction *I = BB->front(); I; I = I->getNext()) ...
+
+After converting instructions over, I'll convert basic blocks and
+functions to have a similar interface.
+
+The only negative aspect of this change that I see is that it increases
+the amount of memory consumed by one pointer per instruction. Given the
+benefits, I think this is a very reasonable tradeoff.
+
+What do you think?
+
+-Chris
diff --git a/docs/HistoricalNotes/2002-06-25-MegaPatchInfo.txt b/docs/HistoricalNotes/2002-06-25-MegaPatchInfo.txt
new file mode 100644
index 0000000..2ca4611
--- /dev/null
+++ b/docs/HistoricalNotes/2002-06-25-MegaPatchInfo.txt
@@ -0,0 +1,72 @@
+Changes:
+* Change the casting code to be const correct. Now, doing this is invalid:
+ const Value *V = ...;
+ Instruction *I = dyn_cast<Instruction>(V);
+ instead, the second line should be:
+ const Instruction *I = dyn_cast<Instruction>(V);
+
+* Change the casting code to allow casting a reference value thus:
+ const Value &V = ...;
+ Instruction &I = cast<Instruction>(V);
+
+ dyn_cast does not work with references, because it must return a null pointer
+ on failure.
+
+* Fundamentally change how instructions and other values are represented.
+ Before, every llvm container was an instance of the ValueHolder template,
+ instantiated for each container type. This ValueHolder was effectively a
+ wrapper around a vector of pointers to the sub-objects.
+
+ Now, instead of having a vector to pointers of objects, the objects are
+ maintained in a doubly linked list of values (ie each Instruction now has
+ Next & Previous fields). The containers are now instances of ilist (intrusive
+ linked list class), which use the next and previous fields to chain them
+ together. The advantage of this implementation is that iterators can be
+ formed directly from pointers to the LLVM value, and invalidation is much
+ easier to handle.
+
+* As part of the above change, dereferencing an iterator (for example:
+ BasicBlock::iterator) now produces a reference to the underlying type (same
+ example: Instruction&) instead of a pointer to the underlying object. This
+ makes it much easier to write nested loops that iterator over things, changing
+ this:
+
+ for (Function::iterator BI = Func->begin(); BI != Func->end(); ++BI)
+ for (BasicBlock::iterator II = (*BI)->begin(); II != (*BI)->end(); ++II)
+ (*II)->dump();
+
+ into:
+
+ for (Function::iterator BI = Func->begin(); BI != Func->end(); ++BI)
+ for (BasicBlock::iterator II = BI->begin(); II != BI->end(); ++II)
+ II->dump();
+
+ which is much more natural and what users expect.
+
+* Simplification of #include's: Before, it was necessary for a .cpp file to
+ include every .h file that it used. Now things are batched a little bit more
+ to make it easier to use. Specifically, the include graph now includes these
+ edges:
+ Module.h -> Function.h, GlobalVariable.h
+ Function.h -> BasicBlock.h, Argument.h
+ BasicBlock.h -> Instruction.h
+
+ Which means that #including Function.h is usually sufficient for getting the
+ lower level #includes.
+
+* Printing out a Value* has now changed: Printing a Value* will soon print out
+ the address of the value instead of the contents of the Value. To print out
+ the contents, you must convert it to a reference with (for example)
+ 'cout << *I' instead of 'cout << I;'. This conversion is not yet complete,
+ but will be eventually. In the mean time, both forms print out the contents.
+
+* References are used much more throughout the code base. In general, if a
+ pointer is known to never be null, it is passed in as a reference instead of a
+ pointer. For example, the instruction visitor class uses references instead
+ of pointers, and that Pass subclasses now all receive references to Values
+ instead of pointers, because they may never be null.
+
+* The Function class now has helper functions for accessing the Arguments list.
+ Instead of having to go through getArgumentList for simple things like
+ iterator over the arguments, now the a*() methods can be used to access them.
+
diff --git a/docs/HistoricalNotes/2003-01-23-CygwinNotes.txt b/docs/HistoricalNotes/2003-01-23-CygwinNotes.txt
new file mode 100644
index 0000000..fbe811d
--- /dev/null
+++ b/docs/HistoricalNotes/2003-01-23-CygwinNotes.txt
@@ -0,0 +1,28 @@
+Date: Mon, 20 Jan 2003 00:00:28 -0600
+From: Brian R. Gaeke <gaeke@uiuc.edu>
+Subject: windows vs. llvm
+
+If you're interested, here are some of the major problems compiling LLVM
+under Cygwin and/or Mingw.
+
+1. Cygwin doesn't have <inttypes.h> or <stdint.h>, so all the INT*_MAX
+ symbols and standard int*_t types are off in limbo somewhere. Mingw has
+ <stdint.h>, but Cygwin doesn't like it.
+
+2. Mingw doesn't have <dlfcn.h> (because Windows doesn't have it.)
+
+3. SA_SIGINFO and friends are not around; only signal() seems to work.
+
+4. Relink, aka ld -r, doesn't work (probably an ld bug); you need
+ DONT_BUILD_RELINKED. This breaks all the tools makefiles; you just need to
+ change them to have .a's.
+
+5. There isn't a <values.h>.
+
+6. There isn't a mallinfo() (or, at least, it's documented, but it doesn't seem
+ to link).
+
+7. The version of Bison that cygwin (and newer Linux versions) comes with
+ does not like = signs in rules. Burg's gram.yc source file uses them. I think
+ you can just take them out.
+
diff --git a/docs/HistoricalNotes/2003-06-25-Reoptimizer1.txt b/docs/HistoricalNotes/2003-06-25-Reoptimizer1.txt
new file mode 100644
index 0000000..a745784
--- /dev/null
+++ b/docs/HistoricalNotes/2003-06-25-Reoptimizer1.txt
@@ -0,0 +1,137 @@
+Wed Jun 25 15:13:51 CDT 2003
+
+First-level instrumentation
+---------------------------
+
+We use opt to do Bytecode-to-bytecode instrumentation. Look at
+back-edges and insert llvm_first_trigger() function call which takes
+no arguments and no return value. This instrumentation is designed to
+be easy to remove, for instance by writing a NOP over the function
+call instruction.
+
+Keep count of every call to llvm_first_trigger(), and maintain
+counters in a map indexed by return address. If the trigger count
+exceeds a threshold, we identify a hot loop and perform second-level
+instrumentation on the hot loop region (the instructions between the
+target of the back-edge and the branch that causes the back-edge). We
+do not move code across basic-block boundaries.
+
+
+Second-level instrumentation
+---------------------------
+
+We remove the first-level instrumentation by overwriting the CALL to
+llvm_first_trigger() with a NOP.
+
+The reoptimizer maintains a map between machine-code basic blocks and
+LLVM BasicBlock*s. We only keep track of paths that start at the
+first machine-code basic block of the hot loop region.
+
+How do we keep track of which edges to instrument, and which edges are
+exits from the hot region? 3 step process.
+
+1) Do a DFS from the first machine-code basic block of the hot loop
+region and mark reachable edges.
+
+2) Do a DFS from the last machine-code basic block of the hot loop
+region IGNORING back edges, and mark the edges which are reachable in
+1) and also in 2) (i.e., must be reachable from both the start BB and
+the end BB of the hot region).
+
+3) Mark BBs which end in edges that exit the hot region; we need to
+instrument these differently.
+
+Assume that there is 1 free register. On SPARC we use %g1, which LLC
+has agreed not to use. Shift a 1 into it at the beginning. At every
+edge which corresponds to a conditional branch, we shift 0 for not
+taken and 1 for taken into a register. This uniquely numbers the paths
+through the hot region. Silently fail if we need more than 64 bits.
+
+At the end BB we call countPath and increment the counter based on %g1
+and the return address of the countPath call. We keep track of the
+number of iterations and the number of paths. We only run this
+version 30 or 40 times.
+
+Find the BBs that total 90% or more of execution, and aggregate them
+together to form our trace. But we do not allow more than 5 paths; if
+we have more than 5 we take the ones that are executed the most. We
+verify our assumption that we picked a hot back-edge in first-level
+instrumentation, by making sure that the number of times we took an
+exit edge from the hot trace is less than 10% of the number of
+iterations.
+
+LLC has been taught to recognize llvm_first_trigger() calls and NOT
+generate saves and restores of caller-saved registers around these
+calls.
+
+
+Phase behavior
+--------------
+
+We turn off llvm_first_trigger() calls with NOPs, but this would hide
+phase behavior from us (when some funcs/traces stop being hot and
+others become hot.)
+
+We have a SIGALRM timer that counts time for us. Every time we get a
+SIGALRM we look at our priority queue of locations where we have
+removed llvm_first_trigger() calls. Each location is inserted along
+with a time when we will next turn instrumentation back on for that
+call site. If the time has arrived for a particular call site, we pop
+that off the prio. queue and turn instrumentation back on for that
+call site.
+
+
+Generating traces
+-----------------
+
+When we finally generate an optimized trace we first copy the code
+into the trace cache. This leaves us with 3 copies of the code: the
+original code, the instrumented code, and the optimized trace. The
+optimized trace does not have instrumentation. The original code and
+the instrumented code are modified to have a branch to the trace
+cache, where the optimized traces are kept.
+
+We copy the code from the original to the instrumentation version
+by tracing the LLVM-to-Machine code basic block map and then copying
+each machine code basic block we think is in the hot region into the
+trace cache. Then we instrument that code. The process is similar for
+generating the final optimized trace; we copy the same basic blocks
+because we might need to put in fixup code for exit BBs.
+
+LLVM basic blocks are not typically used in the Reoptimizer except
+for the mapping information.
+
+We are restricted to using single instructions to branch between the
+original code, trace, and instrumented code. So we have to keep the
+code copies in memory near the original code (they can't be far enough
+away that a single pc-relative branch would not work.) Malloc() or
+data region space is too far away. this impacts the design of the
+trace cache.
+
+We use a dummy function that is full of a bunch of for loops which we
+overwrite with trace-cache code. The trace manager keeps track of
+whether or not we have enough space in the trace cache, etc.
+
+The trace insertion routine takes an original start address, a vector
+of machine instructions representing the trace, index of branches and
+their corresponding absolute targets, and index of calls and their
+corresponding absolute targets.
+
+The trace insertion routine is responsible for inserting branches from
+the beginning of the original code to the beginning of the optimized
+trace. This is because at some point the trace cache may run out of
+space and it may have to evict a trace, at which point the branch to
+the trace would also have to be removed. It uses a round-robin
+replacement policy; we have found that this is almost as good as LRU
+and better than random (especially because of problems fitting the new
+trace in.)
+
+We cannot deal with discontiguous trace cache areas. The trace cache
+is supposed to be cache-line-aligned, but it is not page-aligned.
+
+We generate instrumentation traces and optimized traces into separate
+trace caches. We keep the instrumented code around because you don't
+want to delete a trace when you still might have to return to it
+(i.e., return from a llvm_first_trigger() or countPath() call.)
+
+
diff --git a/docs/HistoricalNotes/2003-06-26-Reoptimizer2.txt b/docs/HistoricalNotes/2003-06-26-Reoptimizer2.txt
new file mode 100644
index 0000000..ec4b93f
--- /dev/null
+++ b/docs/HistoricalNotes/2003-06-26-Reoptimizer2.txt
@@ -0,0 +1,110 @@
+Thu Jun 26 14:43:04 CDT 2003
+
+Information about BinInterface
+------------------------------
+
+Take in a set of instructions with some particular register
+allocation. It allows you to add, modify, or delete some instructions,
+in SSA form (kind of like LLVM's MachineInstrs.) Then re-allocate
+registers. It assumes that the transformations you are doing are safe.
+It does not update the mapping information or the LLVM representation
+for the modified trace (so it would not, for instance, support
+multiple optimization passes; passes have to be aware of and update
+manually the mapping information.)
+
+The way you use it is you take the original code and provide it to
+BinInterface; then you do optimizations to it, then you put it in the
+trace cache.
+
+The BinInterface tries to find live-outs for traces so that it can do
+register allocation on just the trace, and stitch the trace back into
+the original code. It has to preserve the live-ins and live-outs when
+it does its register allocation. (On exits from the trace we have
+epilogues that copy live-outs back into the right registers, but
+live-ins have to be in the right registers.)
+
+
+Limitations of BinInterface
+---------------------------
+
+It does copy insertions for PHIs, which it infers from the machine
+code. The mapping info inserted by LLC is not sufficient to determine
+the PHIs.
+
+It does not handle integer or floating-point condition codes and it
+does not handle floating-point register allocation.
+
+It is not aggressively able to use lots of registers.
+
+There is a problem with alloca: we cannot find our spill space for
+spilling registers, normally allocated on the stack, if the trace
+follows an alloca(). What might be an acceptable solution would be to
+disable trace generation on functions that have variable-sized
+alloca()s. Variable-sized allocas in the trace would also probably
+screw things up.
+
+Because of the FP and alloca limitations, the BinInterface is
+completely disabled right now.
+
+
+Demo
+----
+
+This is a demo of the Ball & Larus version that does NOT use 2-level
+profiling.
+
+1. Compile program with llvm-gcc.
+2. Run opt -lowerswitch -paths -emitfuncs on the bytecode.
+ -lowerswitch change switch statements to branches
+ -paths Ball & Larus path-profiling algorithm
+ -emitfuncs emit the table of functions
+3. Run llc to generate SPARC assembly code for the result of step 2.
+4. Use g++ to link the (instrumented) assembly code.
+
+We use a script to do all this:
+------------------------------------------------------------------------------
+#!/bin/sh
+llvm-gcc $1.c -o $1
+opt -lowerswitch -paths -emitfuncs $1.bc > $1.run.bc
+llc -f $1.run.bc
+LIBS=$HOME/llvm_sparc/lib/Debug
+GXX=/usr/dcs/software/evaluation/bin/g++
+$GXX -g -L $LIBS $1.run.s -o $1.run.llc \
+$LIBS/tracecache.o \
+$LIBS/mapinfo.o \
+$LIBS/trigger.o \
+$LIBS/profpaths.o \
+$LIBS/bininterface.o \
+$LIBS/support.o \
+$LIBS/vmcore.o \
+$LIBS/transformutils.o \
+$LIBS/bcreader.o \
+-lscalaropts -lscalaropts -lanalysis \
+-lmalloc -lcpc -lm -ldl
+------------------------------------------------------------------------------
+
+5. Run the resulting binary. You will see output from BinInterface
+(described below) intermixed with the output from the program.
+
+
+Output from BinInterface
+------------------------
+
+BinInterface's debugging code prints out the following stuff in order:
+
+1. Initial code provided to BinInterface with original register
+allocation.
+
+2. Section 0 is the trace prolog, consisting mainly of live-ins and
+register saves which will be restored in epilogs.
+
+3. Section 1 is the trace itself, in SSA form used by BinInterface,
+along with the PHIs that are inserted.
+PHIs are followed by the copies that implement them.
+Each branch (i.e., out of the trace) is annotated with the
+section number that represents the epilog it branches to.
+
+4. All the other sections starting with Section 2 are trace epilogs.
+Every branch from the trace has to go to some epilog.
+
+5. After the last section is the register allocation output.
diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html
new file mode 100644
index 0000000..b8461f7
--- /dev/null
+++ b/docs/HowToReleaseLLVM.html
@@ -0,0 +1,526 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>How To Release LLVM To The Public</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">How To Release LLVM To The Public</div>
+<p class="doc_warning">NOTE: THIS DOCUMENT IS A WORK IN PROGRESS!</p>
+<ol>
+ <li><a href="#introduction">Introduction</a></li>
+ <li><a href="#process">Release Process</a></li>
+ <li><a href="#dist_targets">Distribution Targets</a></li>
+</ol>
+<div class="doc_author">
+ <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>,
+ <a href="mailto:criswell@cs.uiuc.edu">John Criswell</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>This document collects information about successfully releasing LLVM to the
+public. It is the release manager's guide to ensuring that a high quality build
+of LLVM is released. Mostly, it's just a bunch of reminders of things to do at
+release time so we don't inadvertently ship something that is utility
+deficient.</p>
+
+<p>
+There are three main tasks for building a release of LLVM:
+<ol>
+ <li>Create the LLVM source distribution.</li>
+ <li>Create the LLVM GCC source distribtuion.</li>
+ <li>Create a set of LLVM GCC binary distribtuions for each supported
+ platform. These binary distributions must include compiled versions
+ of the libraries found in <tt>llvm/runtime</tt> from the LLVM
+ source distribution created in Step 1.</li>
+</ol>
+</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="process">Release Process</a></div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="overview">Process Overview</a></div>
+<div class="doc_text">
+ <ol>
+ <li><a href="#updocs">Update Documentation</a></li>
+ <li><a href="#merge">Merge Branches</a></li>
+ <li><a href="#deps">Make LibDeps.txt</a></li>
+ <li><a href="#settle">Settle LLVM HEAD</a></li>
+ <li><a href="#tag">Tag LLVM and Create the Release Branch</a></li>
+ <li><a href="#verchanges">Update LLVM Version </a></li>
+ <li><a href="#build">Build LLVM</a></li>
+ <li><a href="#check">Run 'make check'</a></li>
+ <li><a href="#test">Run LLVM Test Suite</a></li>
+ <li><a href="#dist">Build the LLVM Source Distributions</a></li>
+ <li><a href="#rpm">Build RPM Packages (optional)</a></li>
+ <li><a href="#llvmgccbin">Build the LLVM GCC Binary Distribution</a></li>
+ <li><a href="#webupdates">Update the LLVM Website</a></li>
+ </ol>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="updocs">Update Documentation</a></div>
+<div class="doc_text">
+ <p>
+ Review the documentation and ensure that it is up to date. The Release Notes
+ must be updated to reflect bug fixes, new known issues, and changes in the
+ list of supported platforms. The Getting Started Guide should be updated to
+ reflect the new release version number tag avaiable from CVS and changes in
+ basic system requirements.
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="merge">Merge Branches</a></div>
+<div class="doc_text">
+<p>
+Merge any work done on branches intended for release into mainline. Finish and
+commit all new features or bug fixes that are scheduled to go into the release.
+Work that is not to be incorporated into the release should not be merged from
+branchs or commited from developer's working directories.
+</p>
+
+<p>
+From this point until the release branch is created, developers should
+<em>not</em>
+commit changes to the llvm and llvm-gcc CVS repositories unless it is a bug
+fix <em>for the release</em>.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="deps">Make LibDeps.txt</a></div>
+<div class="doc_text">
+ <p>Rebuild the <tt>LibDeps.txt</tt> target in <tt>utils/llvm-config</tt>. This
+ makes sure that the <tt>llvm-config</tt> utility remains relevant for the
+ release, reflecting any changes in the library dependencies.</p>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="settle">Settle CVS HEAD</a></div>
+<div class="doc_text">
+ <p>
+ Use the nightly test reports and 'make check' (deja-gnu based tests) to
+ ensure that recent changes and merged branches have not destabilized LLVM.
+ Platforms which are used less often should be given special attention as they
+ are the most likely to break from commits from the previous step.
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="tag">CVS Tag And Branch</a></div>
+<div class="doc_text">
+ <p>Tag and branch the CVS HEAD using the following procedure:</p>
+ <ol>
+ <li>
+ Request all developers to refrain from committing. Offenders get commit
+ rights taken away (temporarily).
+ </li>
+
+ <li>
+ The Release Manager updates his/her llvm, llvm-test, and llvm-gcc source
+ trees with the
+ latest sources from mainline CVS. The Release Manage may want to consider
+ using a new working directory for this to keep current uncommitted work
+ separate from release work.
+ </li>
+
+ <li>
+ The Release Manager tags his/her llvm, llvm-test, and llvm-gcc working
+ directories with
+ "ROOT_RELEASE_XX" where XX is the major and minor
+ release numbers. So, for Release 1.2, XX=12 and for Release 1.10, XX=110.
+
+ <p>
+ <tt>cvs tag ROOT_RELEASE_XX</tt><br>
+ </p>
+ </li>
+
+ <li>
+ Immediately create cvs branches based on the ROOT_RELEASE_XX tag. The tag
+ should be "release_XX" (where XX matches that used for the ROOT_RELEASE_XX
+ tag). This is where the release distribution will be created.
+
+ <p>
+ cvs tag -b -r ROOT_RELEASE_XX release_XX
+ </p>
+ </li>
+
+ <li>
+ Advise developers they can work on CVS HEAD again.
+ </li>
+
+ <li>
+ The Release Manager and any developers working on the release should switch
+ to the release branch (as all changes to the release will now be done in
+ the branch). The easiest way to do this is to grab another working copy
+ using the following commands:
+
+ <p>
+ <tt>cvs -d &lt;CVS Repository&gt; co -r release_XX llvm</tt><br>
+ <tt>cvs -d &lt;CVS Repository&gt; co -r release_XX llvm-test</tt><br>
+ <tt>cvs -d &lt;CVS Repository&gt; co -r release_XX llvm-gcc</tt><br>
+ </p>
+ </li>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="verchanges">Update LLVM Version</a></div>
+<div class="doc_text">
+ <p>
+ After creating the llvm release branch, update the release branch's autoconf/configure.ac
+ version from X.Xsvn to just X.X. Update it on mainline as well to be the next version
+ (X.X+1svn).
+ </p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="build">Build LLVM</a></div>
+<div class="doc_text">
+ <p>
+ Build both debug and release (optimized) versions of LLVM on all
+ platforms. Ensure the build is warning and error free on each platform.
+ </p>
+
+ <p>
+ Build a new version of the LLVM GCC front-end after building the LLVM tools.
+ Once that is complete, go back to the LLVM source tree and build and install
+ the <tt>llvm/runtime</tt> libraries.
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="check">Run 'make check'</a></div>
+<div class="doc_text">
+ <p>Run <tt>make check</tt> and ensure there are no unexpected failures. If
+ there are, resolve the failures, commit them back into the release branch,
+ and restart testing by <a href="#build">re-building LLVM</a>.
+ </p>
+
+ <p>
+ Ensure that 'make check' passes on all platforms for all targets. If certain
+ failures cannot be resolved before release time, determine if marking them
+ XFAIL is appropriate. If not, fix the bug and go back. The test suite must
+ complete with "0 unexpected failures" for release.
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="test">LLVM Test Suite</a></div>
+<div class="doc_text">
+ <p>Run the llvm-test suite and ensure there are no unacceptable failures.
+ If there are, resolve the failures and go back to
+ <a href="#build">re-building LLVM</a>. The test suite
+ should be run in Nightly Test mode. All tests must pass.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="dist">Build the LLVM Source Distributions</a></div>
+<div class="doc_text">
+ <p>
+ Create source distributions for LLVM, LLVM GCC, and the LLVM Test Suite by
+ exporting the source
+ from CVS and archiving it. This can be done with the following commands:
+ </p>
+
+ <p>
+ <tt>cvs -d &lt;CVS Repository&gt; export -r release_XX llvm</tt><br>
+ <tt>cvs -d &lt;CVS Repository&gt; export -r release_XX llvm-test</tt><br>
+ <tt>cvs -d &lt;CVS Repository&gt; export -r release_XX llvm-gcc</tt><br>
+ <tt>mkdir cfrontend; mv llvm-gcc cfrontend/src</tt><br>
+ <tt>tar -cvf - llvm | gzip &gt; llvm-X.X.tar.gz</tt><br>
+ <tt>tar -cvf - llvm-test | gzip &gt; llvm-test-X.X.tar.gz</tt><br>
+ <tt>tar -cvf - cfrontend/src | gzip &gt; cfrontend-X.X.source.tar.gz</tt><br>
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="rpm">Building RPM packages (optional)</a></div>
+<div class="doc_text">
+ <p>You can, optionally, create source and binary RPM packages for LLVM. These
+ may make it easier to get LLVM into a distribution. This can be done with
+ the following commands:</p>
+ <pre>
+ make dist # Build the distribution source tarball
+ make dist-check # Check that the source tarball can build itself.
+ cp llvm-M.m.tar.gz /usr/src/redhat/SOURCES # Required by rpmbuild
+ make srpm # for source rpm
+ make rpm # for binary rpm
+ </pre>
+ <p>First, use "make dist" to simply build the distribution. Any
+ failures need to be corrected (on the branch). Once "make dist" can be
+ successful, do "make dist-check". This target will do the same thing as the
+ 'dist' target but also test that distribution to make sure it can build itself
+ and runs "make check" as well. This ensures that needed files are not
+ missing and that the src tarball can be successfully unpacked, built,
+ installed, and cleaned. Once you have a reliable tarball, you need to copy
+ it to the /usr/src/redhat/SOURCES directory which is a requirement of the
+ rpmbuild tool. The last two "make" invocations just run rpmbuild to build
+ either a source (<tt>srpm</tt>) or binary (<tt>rpm</tt>) RPM package.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="llvmgccbin">Build the LLVM GCC Binary Distribution</a></div>
+<div class="doc_text">
+ <p>
+ Creating the LLVM GCC binary distribution requires performing the following
+ steps for each supported platform:
+ </p>
+
+ <ol>
+ <li>
+ Build the LLVM GCC front-end. The LLVM GCC front-end must be installed in
+ a directory named <tt>cfrontend/&lt;platform&gt;/llvm-gcc</tt>. For
+ example, the Sparc/Solaris directory is named
+ <tt>cfrontend/sparc/llvm-gcc</tt>.
+ </li>
+
+ <li>
+ Build the libraries in <tt>llvm/runtime</tt> and install them into the
+ created LLVM GCC installation directory.
+ </li>
+
+ <li>
+ For systems with non-distributable header files (e.g. Solaris), manually
+ remove header files that the GCC build process has "fixed." This process
+ is admittedly painful, but not as bad as it looks; these header files are
+ almost always easily identifiable with simple grep expressions and are
+ installed in only a few directories in the GCC installation directory.
+ </li>
+
+ <li>
+ Add the copyright files and header file fix script.
+ </li>
+
+ <li>
+ Archive and compress the installation directory. These can be found in
+ previous releases of the LLVM-GCC front-end.
+ </li>
+ </ol>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="webupdates">Update the LLVM Website</a></div>
+<div class="doc_text">
+ <p>
+ Check out the <tt>website</tt> module from Subversion. Create a new
+ subdirectory X.X in the releases directory. Place the llvm, llvm-test,
+ llvm-gcc source, and llvm-gcc
+ binaries in this new directory. Copy the llvm/docs and LICENSE.txt files
+ into this new directory. Update the releases/download.html file with the new release.
+ Update the releases/index.html with the new release. Finally, update the main page (
+ index.html and sidebar) to point to the new release and release announcement. Make
+ sure this all gets commited back into cvs.
+ </p>
+</div>
+
+<!--
+<div class="doc_subsection"><a name="release">Release</a></div>
+<div class="doc_text">
+ <p>Release the distribution tarball to the public. This consists of generating
+ several tarballs. The first set, the source distributions, are automatically
+ generated by the "make dist" and "make dist-check". There are gzip, bzip2, and
+ zip versions of these bundles.</p>
+ <p>The second set of tarballs is the binary release. When "make dist-check"
+ succeeds, it will have created an _install directory into which it installed
+ the binary release. You need to rename that directory as "llvm" and then
+ create tarballs from the contents of that "llvm" directory.</p>
+ <p>Finally, use rpm to make an rpm package based on the llvm.spec file. Don't
+ forget to update the version number, documentation, etc. in the llvm.spec
+ file.</p>
+</div>
+-->
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="dist_targets">Distribution Targets</a></div>
+<!-- *********************************************************************** -->
+<!-- ======================================================================= -->
+<div class="doc_subsection">Overview</div>
+<div class="doc_text">
+<p>The first thing you need to understand is that there are multiple make
+targets to support this feature. Here's an overview, we'll delve into the
+details later.</p>
+<ul>
+ <li><b>distdir</b> - builds the distribution directory from which the
+ distribution will be packaged</li>
+ <li><b>dist</b> - builds each of the distribution tarballs (tar.gz,
+ tar.bzip2, .zip). These can be built individually as well, with separate
+ targets.</li>
+ <li><b>dist-check</b> - this is identical to <tt>dist</tt> but includes a
+ check on the distribution that ensures the tarball can: unpack successfully,
+ compile correctly, pass 'make check', and pass 'make clean'.</li>
+ <li><b>dist-clean</b>- this just does a normal clean but also cleans up the
+ stuff generated by the other three <tt>dist</tt> targets (above).</li>
+</ul>
+<p>Okay, that's the basic functionality. When making a release, we want to
+ensure that the tree you build the distribution from passes
+<tt>dist-check</tt>. Beyond fixing the usual bugs, there is generally one
+impediment to making the release in this fashion: missing files. The
+<tt>dist-check</tt> process guards against that possibility. It will either
+fail and that failure will indicate what's missing, or it will succeed
+meaning that it has proved that the tarballs can actually succeed in
+building LLVM correctly and that it passes <tt>make check</tt>.</p>
+<!-- ======================================================================= -->
+<div class="doc_subsection">distdir</div>
+<p>This target builds the distribution directory which is the directory from
+which the tarballs are generated. The distribution directory has the same
+name as the release, e.g. LLVM-1.7). This target goes through the following
+process:
+<ol>
+ <li>First, if there was an old distribution directory (for the current
+ release), it is removed in its entirety and you see <tt>Removing old
+ LLVM-1.7</tt></li>
+ <li>Second, it issues a <tt>make all ENABLE_OPTIMIZED=3D1</tt> to ensure
+ that the everything in your tree can be built in release mode. Often times
+ there are discrepancies in building between debug and release modes so it
+ enforces release mode first. If that fails, the <tt>distdir</tt> target
+ fails too. This is preceded by the message <tt>Making 'all' to verify
+ build</tt>.</li>
+ <li>Next, it traverses your source tree and copies it to a new directory
+ that has the name of the release (<tt>LLVM-M.m</tt> in our current case).
+ This is the directory that will get tar'd. It contains all the software
+ that needs to be in the distribution. During the copying process, it omits
+ generated files, CVS directories, and any other "cruft" that's in your
+ build tree. This is done to eliminate the possibility of huge distribution
+ tarballs that include useless or irrelevant stuff in them. This is the
+ trickiest part of making the distribution. Done manually you will either
+ include stuff that shouldn't be in the distribution or exclude stuff that
+ should. This step is preceded by the message <tt>Building Distribution
+ Directory LLVM-1.7</tt></li>
+ <li>The distribution directory is then traversed and all <tt>CVS</tt> or
+ <tt>.svn</tt> directories are removed. You see: <tt>Eliminating CVS/.svn
+ directories from distribution</tt></li>
+ <li>The recursive <tt>dist-hook</tt> target is executed. This gives each
+ directory a chance to modify the distribution in some way (more on this
+ below).</li>
+ <li>The distribution directory is traversed and the correct file
+ permissions and modes are set based on the type of file.</li>
+</ol>
+<p>To control the process of making the distribution directory correctly,
+each Makefile can utilize two features:</p>
+<ol>
+ <li><b><tt>EXTRA_DIST</tt></B> - this make variable specifies which files
+ it should distribute. By default, all source files are automatically
+ included for distribution as well as certain <tt>well known</tt> files
+ (see DistAlways variable in Makefile.rules for details). Each Makefile
+ specifies, via the <tt>EXTRA_DIST</tt> variable, which additional files
+ need to be distributed. Only those files that are needed to build LLVM
+ should be added to <tt>EXTRA_DIST</tt>. <tt>EXTRA_DIST</tt> contains a
+ list of file or directory names that should be distributed. For example,
+ the top level Makefile contains
+ <tt>EXTRA_DIST := test llvm.spec include</tt>.
+ This means that in addition to regular things that are distributed at the
+ top level (<tt>CREDITS.txt, LICENSE.txt</tt>, etc.) the distribution should
+ contain the entire <tt>test</tt> and <tt>include</tt> directories as well
+ as the <tt>llvm.spec</tt> file.</li>
+ <li><b><tt>dist-hook</tt></B> - this make target can be used to alter the
+ content of the distribution directory. For example, in the top level
+ Makefile there is some logic to eliminate files in the <tt>include</tt>
+ subtree that are generated by the configure script. These should not be
+ distributed. Similarly, any <tt>dist-hook</tt> target found in any
+ directory can add or remove or modify things just before it gets packaged.
+ Any transformation is permitted. Generally, not much is needed.
+</ol>
+<p>You will see various messages if things go wrong:</p>
+<ol>
+ <li>During the copying process, any files that are missing will be flagged
+ with: <tt>===== WARNING: Distribution Source 'dir/file' Not Found!</tt>
+ These must be corrected by either adding the file or removing it from
+ <tt>EXTRA_DIST</tt>.
+ <li>If you build the distribution with <tt>VERBOSE=1</tt>, then you might
+ also see: <tt>Skipping non-existent 'dir/file'</tt> in certain cases where
+ its okay to skip the file.</li>
+ <li>The target can fail if any of the things it does fail. Error messages
+ should indicate what went wrong.</li>
+</ol>
+<!-- ======================================================================= -->
+<div class="doc_subsection">dist</div>
+<p>This target does exactly what <tt>distdir</tt> target does, but also
+includes assembling the tarballs. There are actually four related targets
+here:<p>
+ <ul>
+ <li><b><tt>dist-gzip</tt></b>: package the gzipped distribution tar
+ file. The distribution directory is packaged into a single file ending in
+ <tt>.tar.gz</tt> which is gzip compressed.</li>
+ <li><b><tt>dist-bzip2</tt></b>: package the bzip2 distribution tar file.
+ The distribution directory is packaged into a single file ending in
+ <tt>.tar.bzip2</tt> which is bzip2 compressed.</li>
+ <li><b><tt>dist-zip</tt></b>: package the zip distribution file. The
+ distribution directory is packaged into a single file ending in
+ <tt>.zip</tt> which is zip compressed.</li>
+ <li><b><tt>dist</tt></b>: does all three, dist-gzip, dist-bzip2,
+ dist-zip</li>
+ </ul>
+<!-- ======================================================================= -->
+<div class="doc_subsection">dist-check</div>
+<p>This target checks the distribution. The basic idea is that it unpacks the
+distribution tarball and ensures that it can build. It takes the following
+actions:</p>
+<ol>
+ <li>It depends on the <tt>dist-gzip</tt> target which, if it hasn't already
+ been built, builds the gzip tar bundle (see dist and distdir above).</li>
+ <li>removes any pre-existing <tt>_distcheckdir</tt> at the top level.</li>
+ <li>creates a new <tt>_distcheckdir</tt> directory at the top level.</li>
+ <li>creates a <tt>build</tt> subdirectory and an <tt>install</tt>
+ subdirectory under <tt>_distcheckdir</tt>.</li>
+ <li>unzips and untars the release tarball into <tt>_distcheckdir</tt>,
+ creating <tt>LLVM-1.7</tt> directory (from the tarball).</li>
+ <li>in the build subdirectory, it configures with appropriate options to build
+ from the unpacked source tarball into the <tt>build</tt> directory with
+ installation in the <tt>install</tt> directory.</li>
+ <li>runs <tt>make all</tt></li>
+ <li>runs <tt>make </tt><tt>check</tt></li>
+ <li>runs <tt>make install</tt></li>
+ <li>runs <tt>make uninstall</tt></li>
+ <li>runs <tt>make dist</tt></li>
+ <li>runs <tt>make clean</tt></li>
+ <li>runs <tt>make dist-clean</tt></li>
+</ol>
+<p>If it can pass all that, the distribution will be deemed distribution
+worth y and you will see:<p>
+<pre>===== LLVM-1.7.tar.gz Ready For Distribution =====</pre>
+<p>This means the tarball should then be tested on other platforms and have the
+nightly test run against it. If those all pass, THEN it is ready for
+distribution.</p>
+<p>
+A note about disk space: using <tt>dist-check</tt> will easily triple the
+amount of disk space your build tree is using. You might want to check
+available space before you begin.</p>
+<!-- ======================================================================= -->
+<div class="doc_subsection">dist-clean</div>
+<h2>dist-clean</h2>
+<p>In addition to doing a normal <tt>clean</tt>, this target will clean up the
+files and directories created by the distribution targets. In particular the
+distribution directory <tt>(LLVM-X.X</tt>), check directory
+(<tt>_distcheckdir</tt>), and the various tarballs will be removed. You do
+this after the release has shipped and you no longer need this stuff in your
+build tree.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
+ <a href="http://llvm.cs.uiuc.edu">The LLVM Compiler Infrastructure</a>
+ <br/>
+ Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html
new file mode 100644
index 0000000..93e0d21
--- /dev/null
+++ b/docs/HowToSubmitABug.html
@@ -0,0 +1,355 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>How to submit an LLVM bug report</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ How to submit an LLVM bug report
+</div>
+
+<table class="layout" style="width: 90%" >
+<tr class="layout">
+ <td class="left">
+<ol>
+ <li><a href="#introduction">Introduction - Got bugs?</a></li>
+ <li><a href="#crashers">Crashing Bugs</a>
+ <ul>
+ <li><a href="#front-end">Front-end bugs</a>
+ <li><a href="#ct_optimizer">Compile-time optimization bugs</a>
+ <li><a href="#ct_codegen">Code generator bugs</a>
+ </ul></li>
+ <li><a href="#miscompilations">Miscompilations</a></li>
+ <li><a href="#codegen">Incorrect code generation (JIT and LLC)</a></li>
+</ol>
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
+ <a href="http://misha.brukman.net">Misha Brukman</a></p>
+</div>
+</td>
+<td class="right">
+ <img src="img/Debugging.gif" alt="Debugging" width="444" height="314">
+</td>
+</tr>
+</table>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="introduction">Introduction - Got bugs?</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you're working with LLVM and run into a bug, we definitely want to know
+about it. This document describes what you can do to increase the odds of
+getting it fixed quickly.</p>
+
+<p>Basically you have to do two things at a minimum. First, decide whether the
+bug <a href="#crashers">crashes the compiler</a> (or an LLVM pass), or if the
+compiler is <a href="#miscompilations">miscompiling</a> the program (i.e., the
+compiler successfully produces an executable, but it doesn't run right). Based
+on
+what type of bug it is, follow the instructions in the linked section to narrow
+down the bug so that the person who fixes it will be able to find the problem
+more easily.</p>
+
+<p>Once you have a reduced test-case, go to <a
+href="http://llvm.org/bugs/enter_bug.cgi">the LLVM Bug Tracking
+System</a> and fill out the form with the necessary details (note that you don't
+need to pick a catagory, just use the "new-bugs" catagory if you're not sure).
+The bug description should contain the following
+information:</p>
+
+<ul>
+ <li>All information necessary to reproduce the problem.</li>
+ <li>The reduced test-case that triggers the bug.</li>
+ <li>The location where you obtained LLVM (if not from our Subversion
+ repository).</li>
+</ul>
+
+<p>Thanks for helping us make LLVM better!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="crashers">Crashing Bugs</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>More often than not, bugs in the compiler cause it to crash&mdash;often due
+to an assertion failure of some sort. The most important
+piece of the puzzle is to figure out if it is crashing in the GCC front-end
+or if it is one of the LLVM libraries (e.g. the optimizer or code generator)
+that has problems.</p>
+
+<p>To figure out which component is crashing (the front-end,
+optimizer or code generator), run the
+<tt><b>llvm-gcc</b></tt> command line as you were when the crash occurred, but
+with the following extra command line options:</p>
+
+<ul>
+ <li><tt><b>-O0 -emit-llvm</b></tt>: If <tt>llvm-gcc</tt> still crashes when
+ passed these options (which disable the optimizer and code generator), then
+ the crash is in the front-end. Jump ahead to the section on <a
+ href="#front-end">front-end bugs</a>.</li>
+
+ <li><tt><b>-emit-llvm</b></tt>: If <tt>llvm-gcc</tt> crashes with this option
+ (which disables the code generator), you found an optimizer bug. Jump ahead
+ to <a href="#ct_optimizer"> compile-time optimization bugs</a>.</li>
+
+ <li>Otherwise, you have a code generator crash. Jump ahead to <a
+ href="#ct_codegen">code generator bugs</a>.</li>
+
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="front-end">Front-end bugs</a>
+</div>
+
+<div class="doc_text">
+
+<p>If the problem is in the front-end, you should re-run the same
+<tt>llvm-gcc</tt> command that resulted in the crash, but add the
+<tt>-save-temps</tt> option. The compiler will crash again, but it will leave
+behind a <tt><i>foo</i>.i</tt> file (containing preprocessed C source code) and
+possibly <tt><i>foo</i>.s</tt> for each
+compiled <tt><i>foo</i>.c</tt> file. Send us the <tt><i>foo</i>.i</tt> file,
+along with the options you passed to llvm-gcc, and a brief description of the
+error it caused.</p>
+
+<p>The <a href="http://delta.tigris.org/">delta</a> tool helps to reduce the
+preprocessed file down to the smallest amount of code that still replicates the
+problem. You're encouraged to use delta to reduce the code to make the
+developers' lives easier. <a
+href="http://gcc.gnu.org/wiki/A_guide_to_testcase_reduction">This website</a>
+has instructions on the best way to use delta.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ct_optimizer">Compile-time optimization bugs</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you find that a bug crashes in the optimizer, compile your test-case to a
+<tt>.bc</tt> file by passing "<tt><b>-emit-llvm -O0 -c -o foo.bc</b></tt>".
+Then run:</p>
+
+<div class="doc_code">
+<p><tt><b>opt</b> -std-compile-opts -debug-pass=Arguments foo.bc
+ -disable-output</tt></p>
+</div>
+
+<p>This command should do two things: it should print out a list of passes, and
+then it should crash in the same was as llvm-gcc. If it doesn't crash, please
+follow the instructions for a <a href="#front-end">front-end bug</a>.</p>
+
+<p>If this does crash, then you should be able to debug this with the following
+bugpoint command:</p>
+
+<div class="doc_code">
+<p><tt><b>bugpoint</b> foo.bc &lt;list of passes printed by
+<b>opt</b>&gt;</tt></p>
+</div>
+
+<p>Please run this, then file a bug with the instructions and reduced .bc files
+that bugpoint emits. If something goes wrong with bugpoint, please submit the
+"foo.bc" file and the list of passes printed by <b>opt</b>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ct_codegen">Code generator bugs</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you find a bug that crashes llvm-gcc in the code generator, compile your
+source file to a .bc file by passing "<tt><b>-emit-llvm -c -o foo.bc</b></tt>"
+to llvm-gcc (in addition to the options you already pass). Once your have
+foo.bc, one of the following commands should fail:</p>
+
+<ol>
+<li><tt><b>llc</b> foo.bc -f</tt></li>
+<li><tt><b>llc</b> foo.bc -f -relocation-model=pic</tt></li>
+<li><tt><b>llc</b> foo.bc -f -relocation-model=static</tt></li>
+<li><tt><b>llc</b> foo.bc -f -enable-eh</tt></li>
+<li><tt><b>llc</b> foo.bc -f -relocation-model=pic -enable-eh</tt></li>
+<li><tt><b>llc</b> foo.bc -f -relocation-model=static -enable-eh</tt></li>
+</ol>
+
+<p>If none of these crash, please follow the instructions for a
+<a href="#front-end">front-end bug</a>. If one of these do crash, you should
+be able to reduce this with one of the following bugpoint command lines (use
+the one corresponding to the command above that failed):</p>
+
+<ol>
+<li><tt><b>bugpoint</b> -run-llc foo.bc</tt></li>
+<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
+ -relocation-model=pic</tt></li>
+<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
+ -relocation-model=static</tt></li>
+<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args -enable-eh</tt></li>
+<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
+ -relocation-model=pic -enable-eh</tt></li>
+<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
+ -relocation-model=static -enable-eh</tt></li>
+</ol>
+
+<p>Please run this, then file a bug with the instructions and reduced .bc file
+that bugpoint emits. If something goes wrong with bugpoint, please submit the
+"foo.bc" file and the option that llc crashes with.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="miscompilations">Miscompilations</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If llvm-gcc successfully produces an executable, but that executable doesn't
+run right, this is either a bug in the code or a bug in the
+compiler. The first thing to check is to make sure it is not using undefined
+behavior (e.g. reading a variable before it is defined). In particular, check
+to see if the program <a href="http://valgrind.org/">valgrind</a>s clean,
+passes purify, or some other memory checker tool. Many of the "LLVM bugs" that
+we have chased down ended up being bugs in the program being compiled, not
+ LLVM.</p>
+
+<p>Once you determine that the program itself is not buggy, you should choose
+which code generator you wish to compile the program with (e.g. C backend, the
+JIT, or LLC) and optionally a series of LLVM passes to run. For example:</p>
+
+<div class="doc_code">
+<p><tt>
+<b>bugpoint</b> -run-cbe [... optzn passes ...] file-to-test.bc --args -- [program arguments]</tt></p>
+</div>
+
+<p><tt>bugpoint</tt> will try to narrow down your list of passes to the one pass
+that causes an error, and simplify the bitcode file as much as it can to assist
+you. It will print a message letting you know how to reproduce the resulting
+error.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="codegen">Incorrect code generation</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Similarly to debugging incorrect compilation by mis-behaving passes, you can
+debug incorrect code generation by either LLC or the JIT, using
+<tt>bugpoint</tt>. The process <tt>bugpoint</tt> follows in this case is to try
+to narrow the code down to a function that is miscompiled by one or the other
+method, but since for correctness, the entire program must be run,
+<tt>bugpoint</tt> will compile the code it deems to not be affected with the C
+Backend, and then link in the shared object it generates.</p>
+
+<p>To debug the JIT:</p>
+
+<div class="doc_code">
+<pre>
+bugpoint -run-jit -output=[correct output file] [bitcode file] \
+ --tool-args -- [arguments to pass to lli] \
+ --args -- [program arguments]
+</pre>
+</div>
+
+<p>Similarly, to debug the LLC, one would run:</p>
+
+<div class="doc_code">
+<pre>
+bugpoint -run-llc -output=[correct output file] [bitcode file] \
+ --tool-args -- [arguments to pass to llc] \
+ --args -- [program arguments]
+</pre>
+</div>
+
+<p><b>Special note:</b> if you are debugging MultiSource or SPEC tests that
+already exist in the <tt>llvm/test</tt> hierarchy, there is an easier way to
+debug the JIT, LLC, and CBE, using the pre-written Makefile targets, which
+will pass the program options specified in the Makefiles:</p>
+
+<div class="doc_code">
+<p><tt>
+cd llvm/test/../../program<br>
+make bugpoint-jit
+</tt></p>
+</div>
+
+<p>At the end of a successful <tt>bugpoint</tt> run, you will be presented
+with two bitcode files: a <em>safe</em> file which can be compiled with the C
+backend and the <em>test</em> file which either LLC or the JIT
+mis-codegenerates, and thus causes the error.</p>
+
+<p>To reproduce the error that <tt>bugpoint</tt> found, it is sufficient to do
+the following:</p>
+
+<ol>
+
+<li><p>Regenerate the shared object from the safe bitcode file:</p>
+
+<div class="doc_code">
+<p><tt>
+<b>llc</b> -march=c safe.bc -o safe.c<br>
+<b>gcc</b> -shared safe.c -o safe.so
+</tt></p>
+</div></li>
+
+<li><p>If debugging LLC, compile test bitcode native and link with the shared
+ object:</p>
+
+<div class="doc_code">
+<p><tt>
+<b>llc</b> test.bc -o test.s -f<br>
+<b>gcc</b> test.s safe.so -o test.llc<br>
+./test.llc [program options]
+</tt></p>
+</div></li>
+
+<li><p>If debugging the JIT, load the shared object and supply the test
+ bitcode:</p>
+
+<div class="doc_code">
+<p><tt><b>lli</b> -load=safe.so test.bc [program options]</tt></p>
+</div></li>
+
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+ <br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/LangRef.html b/docs/LangRef.html
new file mode 100644
index 0000000..a57f242
--- /dev/null
+++ b/docs/LangRef.html
@@ -0,0 +1,4882 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM Assembly Language Reference Manual</title>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <meta name="author" content="Chris Lattner">
+ <meta name="description"
+ content="LLVM Assembly Language Reference Manual.">
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title"> LLVM Language Reference Manual </div>
+<ol>
+ <li><a href="#abstract">Abstract</a></li>
+ <li><a href="#introduction">Introduction</a></li>
+ <li><a href="#identifiers">Identifiers</a></li>
+ <li><a href="#highlevel">High Level Structure</a>
+ <ol>
+ <li><a href="#modulestructure">Module Structure</a></li>
+ <li><a href="#linkage">Linkage Types</a></li>
+ <li><a href="#callingconv">Calling Conventions</a></li>
+ <li><a href="#globalvars">Global Variables</a></li>
+ <li><a href="#functionstructure">Functions</a></li>
+ <li><a href="#aliasstructure">Aliases</a>
+ <li><a href="#paramattrs">Parameter Attributes</a></li>
+ <li><a href="#moduleasm">Module-Level Inline Assembly</a></li>
+ <li><a href="#datalayout">Data Layout</a></li>
+ </ol>
+ </li>
+ <li><a href="#typesystem">Type System</a>
+ <ol>
+ <li><a href="#t_primitive">Primitive Types</a>
+ <ol>
+ <li><a href="#t_classifications">Type Classifications</a></li>
+ </ol>
+ </li>
+ <li><a href="#t_derived">Derived Types</a>
+ <ol>
+ <li><a href="#t_array">Array Type</a></li>
+ <li><a href="#t_function">Function Type</a></li>
+ <li><a href="#t_pointer">Pointer Type</a></li>
+ <li><a href="#t_struct">Structure Type</a></li>
+ <li><a href="#t_pstruct">Packed Structure Type</a></li>
+ <li><a href="#t_vector">Vector Type</a></li>
+ <li><a href="#t_opaque">Opaque Type</a></li>
+ </ol>
+ </li>
+ </ol>
+ </li>
+ <li><a href="#constants">Constants</a>
+ <ol>
+ <li><a href="#simpleconstants">Simple Constants</a>
+ <li><a href="#aggregateconstants">Aggregate Constants</a>
+ <li><a href="#globalconstants">Global Variable and Function Addresses</a>
+ <li><a href="#undefvalues">Undefined Values</a>
+ <li><a href="#constantexprs">Constant Expressions</a>
+ </ol>
+ </li>
+ <li><a href="#othervalues">Other Values</a>
+ <ol>
+ <li><a href="#inlineasm">Inline Assembler Expressions</a>
+ </ol>
+ </li>
+ <li><a href="#instref">Instruction Reference</a>
+ <ol>
+ <li><a href="#terminators">Terminator Instructions</a>
+ <ol>
+ <li><a href="#i_ret">'<tt>ret</tt>' Instruction</a></li>
+ <li><a href="#i_br">'<tt>br</tt>' Instruction</a></li>
+ <li><a href="#i_switch">'<tt>switch</tt>' Instruction</a></li>
+ <li><a href="#i_invoke">'<tt>invoke</tt>' Instruction</a></li>
+ <li><a href="#i_unwind">'<tt>unwind</tt>' Instruction</a></li>
+ <li><a href="#i_unreachable">'<tt>unreachable</tt>' Instruction</a></li>
+ </ol>
+ </li>
+ <li><a href="#binaryops">Binary Operations</a>
+ <ol>
+ <li><a href="#i_add">'<tt>add</tt>' Instruction</a></li>
+ <li><a href="#i_sub">'<tt>sub</tt>' Instruction</a></li>
+ <li><a href="#i_mul">'<tt>mul</tt>' Instruction</a></li>
+ <li><a href="#i_udiv">'<tt>udiv</tt>' Instruction</a></li>
+ <li><a href="#i_sdiv">'<tt>sdiv</tt>' Instruction</a></li>
+ <li><a href="#i_fdiv">'<tt>fdiv</tt>' Instruction</a></li>
+ <li><a href="#i_urem">'<tt>urem</tt>' Instruction</a></li>
+ <li><a href="#i_srem">'<tt>srem</tt>' Instruction</a></li>
+ <li><a href="#i_frem">'<tt>frem</tt>' Instruction</a></li>
+ </ol>
+ </li>
+ <li><a href="#bitwiseops">Bitwise Binary Operations</a>
+ <ol>
+ <li><a href="#i_shl">'<tt>shl</tt>' Instruction</a></li>
+ <li><a href="#i_lshr">'<tt>lshr</tt>' Instruction</a></li>
+ <li><a href="#i_ashr">'<tt>ashr</tt>' Instruction</a></li>
+ <li><a href="#i_and">'<tt>and</tt>' Instruction</a></li>
+ <li><a href="#i_or">'<tt>or</tt>' Instruction</a></li>
+ <li><a href="#i_xor">'<tt>xor</tt>' Instruction</a></li>
+ </ol>
+ </li>
+ <li><a href="#vectorops">Vector Operations</a>
+ <ol>
+ <li><a href="#i_extractelement">'<tt>extractelement</tt>' Instruction</a></li>
+ <li><a href="#i_insertelement">'<tt>insertelement</tt>' Instruction</a></li>
+ <li><a href="#i_shufflevector">'<tt>shufflevector</tt>' Instruction</a></li>
+ </ol>
+ </li>
+ <li><a href="#memoryops">Memory Access and Addressing Operations</a>
+ <ol>
+ <li><a href="#i_malloc">'<tt>malloc</tt>' Instruction</a></li>
+ <li><a href="#i_free">'<tt>free</tt>' Instruction</a></li>
+ <li><a href="#i_alloca">'<tt>alloca</tt>' Instruction</a></li>
+ <li><a href="#i_load">'<tt>load</tt>' Instruction</a></li>
+ <li><a href="#i_store">'<tt>store</tt>' Instruction</a></li>
+ <li><a href="#i_getelementptr">'<tt>getelementptr</tt>' Instruction</a></li>
+ </ol>
+ </li>
+ <li><a href="#convertops">Conversion Operations</a>
+ <ol>
+ <li><a href="#i_trunc">'<tt>trunc .. to</tt>' Instruction</a></li>
+ <li><a href="#i_zext">'<tt>zext .. to</tt>' Instruction</a></li>
+ <li><a href="#i_sext">'<tt>sext .. to</tt>' Instruction</a></li>
+ <li><a href="#i_fptrunc">'<tt>fptrunc .. to</tt>' Instruction</a></li>
+ <li><a href="#i_fpext">'<tt>fpext .. to</tt>' Instruction</a></li>
+ <li><a href="#i_fptoui">'<tt>fptoui .. to</tt>' Instruction</a></li>
+ <li><a href="#i_fptosi">'<tt>fptosi .. to</tt>' Instruction</a></li>
+ <li><a href="#i_uitofp">'<tt>uitofp .. to</tt>' Instruction</a></li>
+ <li><a href="#i_sitofp">'<tt>sitofp .. to</tt>' Instruction</a></li>
+ <li><a href="#i_ptrtoint">'<tt>ptrtoint .. to</tt>' Instruction</a></li>
+ <li><a href="#i_inttoptr">'<tt>inttoptr .. to</tt>' Instruction</a></li>
+ <li><a href="#i_bitcast">'<tt>bitcast .. to</tt>' Instruction</a></li>
+ </ol>
+ <li><a href="#otherops">Other Operations</a>
+ <ol>
+ <li><a href="#i_icmp">'<tt>icmp</tt>' Instruction</a></li>
+ <li><a href="#i_fcmp">'<tt>fcmp</tt>' Instruction</a></li>
+ <li><a href="#i_phi">'<tt>phi</tt>' Instruction</a></li>
+ <li><a href="#i_select">'<tt>select</tt>' Instruction</a></li>
+ <li><a href="#i_call">'<tt>call</tt>' Instruction</a></li>
+ <li><a href="#i_va_arg">'<tt>va_arg</tt>' Instruction</a></li>
+ </ol>
+ </li>
+ </ol>
+ </li>
+ <li><a href="#intrinsics">Intrinsic Functions</a>
+ <ol>
+ <li><a href="#int_varargs">Variable Argument Handling Intrinsics</a>
+ <ol>
+ <li><a href="#int_va_start">'<tt>llvm.va_start</tt>' Intrinsic</a></li>
+ <li><a href="#int_va_end">'<tt>llvm.va_end</tt>' Intrinsic</a></li>
+ <li><a href="#int_va_copy">'<tt>llvm.va_copy</tt>' Intrinsic</a></li>
+ </ol>
+ </li>
+ <li><a href="#int_gc">Accurate Garbage Collection Intrinsics</a>
+ <ol>
+ <li><a href="#int_gcroot">'<tt>llvm.gcroot</tt>' Intrinsic</a></li>
+ <li><a href="#int_gcread">'<tt>llvm.gcread</tt>' Intrinsic</a></li>
+ <li><a href="#int_gcwrite">'<tt>llvm.gcwrite</tt>' Intrinsic</a></li>
+ </ol>
+ </li>
+ <li><a href="#int_codegen">Code Generator Intrinsics</a>
+ <ol>
+ <li><a href="#int_returnaddress">'<tt>llvm.returnaddress</tt>' Intrinsic</a></li>
+ <li><a href="#int_frameaddress">'<tt>llvm.frameaddress</tt>' Intrinsic</a></li>
+ <li><a href="#int_stacksave">'<tt>llvm.stacksave</tt>' Intrinsic</a></li>
+ <li><a href="#int_stackrestore">'<tt>llvm.stackrestore</tt>' Intrinsic</a></li>
+ <li><a href="#int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a></li>
+ <li><a href="#int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a></li>
+ <li><a href="#int_readcyclecounter"><tt>llvm.readcyclecounter</tt>' Intrinsic</a></li>
+ </ol>
+ </li>
+ <li><a href="#int_libc">Standard C Library Intrinsics</a>
+ <ol>
+ <li><a href="#int_memcpy">'<tt>llvm.memcpy.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_memmove">'<tt>llvm.memmove.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_memset">'<tt>llvm.memset.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_sqrt">'<tt>llvm.sqrt.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_powi">'<tt>llvm.powi.*</tt>' Intrinsic</a></li>
+ </ol>
+ </li>
+ <li><a href="#int_manip">Bit Manipulation Intrinsics</a>
+ <ol>
+ <li><a href="#int_bswap">'<tt>llvm.bswap.*</tt>' Intrinsics</a></li>
+ <li><a href="#int_ctpop">'<tt>llvm.ctpop.*</tt>' Intrinsic </a></li>
+ <li><a href="#int_ctlz">'<tt>llvm.ctlz.*</tt>' Intrinsic </a></li>
+ <li><a href="#int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic </a></li>
+ <li><a href="#int_part_select">'<tt>llvm.part.select.*</tt>' Intrinsic </a></li>
+ <li><a href="#int_part_set">'<tt>llvm.part.set.*</tt>' Intrinsic </a></li>
+ </ol>
+ </li>
+ <li><a href="#int_debugger">Debugger intrinsics</a></li>
+ <li><a href="#int_eh">Exception Handling intrinsics</a></li>
+ <li><a href="#int_general">General intrinsics</a></li>
+ <ol>
+ <li><a href="#int_var_annotation">'<tt>llvm.var.annotation</tt>'
+ Intrinsic</a></li>
+ </ol>
+ </li>
+ </ol>
+ </li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+ and <a href="mailto:vadve@cs.uiuc.edu">Vikram Adve</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="abstract">Abstract </a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>This document is a reference manual for the LLVM assembly language.
+LLVM is an SSA based representation that provides type safety,
+low-level operations, flexibility, and the capability of representing
+'all' high-level languages cleanly. It is the common code
+representation used throughout all phases of the LLVM compilation
+strategy.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="introduction">Introduction</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM code representation is designed to be used in three
+different forms: as an in-memory compiler IR, as an on-disk bitcode
+representation (suitable for fast loading by a Just-In-Time compiler),
+and as a human readable assembly language representation. This allows
+LLVM to provide a powerful intermediate representation for efficient
+compiler transformations and analysis, while providing a natural means
+to debug and visualize the transformations. The three different forms
+of LLVM are all equivalent. This document describes the human readable
+representation and notation.</p>
+
+<p>The LLVM representation aims to be light-weight and low-level
+while being expressive, typed, and extensible at the same time. It
+aims to be a "universal IR" of sorts, by being at a low enough level
+that high-level ideas may be cleanly mapped to it (similar to how
+microprocessors are "universal IR's", allowing many source languages to
+be mapped to them). By providing type information, LLVM can be used as
+the target of optimizations: for example, through pointer analysis, it
+can be proven that a C automatic variable is never accessed outside of
+the current function... allowing it to be promoted to a simple SSA
+value instead of a memory location.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="wellformed">Well-Formedness</a> </div>
+
+<div class="doc_text">
+
+<p>It is important to note that this document describes 'well formed'
+LLVM assembly language. There is a difference between what the parser
+accepts and what is considered 'well formed'. For example, the
+following instruction is syntactically okay, but not well formed:</p>
+
+<div class="doc_code">
+<pre>
+%x = <a href="#i_add">add</a> i32 1, %x
+</pre>
+</div>
+
+<p>...because the definition of <tt>%x</tt> does not dominate all of
+its uses. The LLVM infrastructure provides a verification pass that may
+be used to verify that an LLVM module is well formed. This pass is
+automatically run by the parser after parsing input assembly and by
+the optimizer before it outputs bitcode. The violations pointed out
+by the verifier pass indicate bugs in transformation passes or input to
+the parser.</p>
+</div>
+
+<!-- Describe the typesetting conventions here. --> </div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="identifiers">Identifiers</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM uses three different forms of identifiers, for different
+purposes:</p>
+
+<ol>
+ <li>Named values are represented as a string of characters with a '%' prefix.
+ For example, %foo, %DivisionByZero, %a.really.long.identifier. The actual
+ regular expression used is '<tt>%[a-zA-Z$._][a-zA-Z$._0-9]*</tt>'.
+ Identifiers which require other characters in their names can be surrounded
+ with quotes. In this way, anything except a <tt>&quot;</tt> character can be used
+ in a name.</li>
+
+ <li>Unnamed values are represented as an unsigned numeric value with a '%'
+ prefix. For example, %12, %2, %44.</li>
+
+ <li>Constants, which are described in a <a href="#constants">section about
+ constants</a>, below.</li>
+</ol>
+
+<p>LLVM requires that values start with a '%' sign for two reasons: Compilers
+don't need to worry about name clashes with reserved words, and the set of
+reserved words may be expanded in the future without penalty. Additionally,
+unnamed identifiers allow a compiler to quickly come up with a temporary
+variable without having to avoid symbol table conflicts.</p>
+
+<p>Reserved words in LLVM are very similar to reserved words in other
+languages. There are keywords for different opcodes
+('<tt><a href="#i_add">add</a></tt>',
+ '<tt><a href="#i_bitcast">bitcast</a></tt>',
+ '<tt><a href="#i_ret">ret</a></tt>', etc...), for primitive type names ('<tt><a
+href="#t_void">void</a></tt>', '<tt><a href="#t_primitive">i32</a></tt>', etc...),
+and others. These reserved words cannot conflict with variable names, because
+none of them start with a '%' character.</p>
+
+<p>Here is an example of LLVM code to multiply the integer variable
+'<tt>%X</tt>' by 8:</p>
+
+<p>The easy way:</p>
+
+<div class="doc_code">
+<pre>
+%result = <a href="#i_mul">mul</a> i32 %X, 8
+</pre>
+</div>
+
+<p>After strength reduction:</p>
+
+<div class="doc_code">
+<pre>
+%result = <a href="#i_shl">shl</a> i32 %X, i8 3
+</pre>
+</div>
+
+<p>And the hard way:</p>
+
+<div class="doc_code">
+<pre>
+<a href="#i_add">add</a> i32 %X, %X <i>; yields {i32}:%0</i>
+<a href="#i_add">add</a> i32 %0, %0 <i>; yields {i32}:%1</i>
+%result = <a href="#i_add">add</a> i32 %1, %1
+</pre>
+</div>
+
+<p>This last way of multiplying <tt>%X</tt> by 8 illustrates several
+important lexical features of LLVM:</p>
+
+<ol>
+
+ <li>Comments are delimited with a '<tt>;</tt>' and go until the end of
+ line.</li>
+
+ <li>Unnamed temporaries are created when the result of a computation is not
+ assigned to a named value.</li>
+
+ <li>Unnamed temporaries are numbered sequentially</li>
+
+</ol>
+
+<p>...and it also shows a convention that we follow in this document. When
+demonstrating instructions, we will follow an instruction with a comment that
+defines the type and name of value produced. Comments are shown in italic
+text.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="highlevel">High Level Structure</a> </div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="modulestructure">Module Structure</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM programs are composed of "Module"s, each of which is a
+translation unit of the input programs. Each module consists of
+functions, global variables, and symbol table entries. Modules may be
+combined together with the LLVM linker, which merges function (and
+global variable) definitions, resolves forward declarations, and merges
+symbol table entries. Here is an example of the "hello world" module:</p>
+
+<div class="doc_code">
+<pre><i>; Declare the string constant as a global constant...</i>
+<a href="#identifiers">@.LC0</a> = <a href="#linkage_internal">internal</a> <a
+ href="#globalvars">constant</a> <a href="#t_array">[13 x i8]</a> c"hello world\0A\00" <i>; [13 x i8]*</i>
+
+<i>; External declaration of the puts function</i>
+<a href="#functionstructure">declare</a> i32 @puts(i8 *) <i>; i32(i8 *)* </i>
+
+<i>; Definition of main function</i>
+define i32 @main() { <i>; i32()* </i>
+ <i>; Convert [13x i8 ]* to i8 *...</i>
+ %cast210 = <a
+ href="#i_getelementptr">getelementptr</a> [13 x i8 ]* @.LC0, i64 0, i64 0 <i>; i8 *</i>
+
+ <i>; Call puts function to write out the string to stdout...</i>
+ <a
+ href="#i_call">call</a> i32 @puts(i8 * %cast210) <i>; i32</i>
+ <a
+ href="#i_ret">ret</a> i32 0<br>}<br>
+</pre>
+</div>
+
+<p>This example is made up of a <a href="#globalvars">global variable</a>
+named "<tt>.LC0</tt>", an external declaration of the "<tt>puts</tt>"
+function, and a <a href="#functionstructure">function definition</a>
+for "<tt>main</tt>".</p>
+
+<p>In general, a module is made up of a list of global values,
+where both functions and global variables are global values. Global values are
+represented by a pointer to a memory location (in this case, a pointer to an
+array of char, and a pointer to a function), and have one of the following <a
+href="#linkage">linkage types</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="linkage">Linkage Types</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+All Global Variables and Functions have one of the following types of linkage:
+</p>
+
+<dl>
+
+ <dt><tt><b><a name="linkage_internal">internal</a></b></tt> </dt>
+
+ <dd>Global values with internal linkage are only directly accessible by
+ objects in the current module. In particular, linking code into a module with
+ an internal global value may cause the internal to be renamed as necessary to
+ avoid collisions. Because the symbol is internal to the module, all
+ references can be updated. This corresponds to the notion of the
+ '<tt>static</tt>' keyword in C.
+ </dd>
+
+ <dt><tt><b><a name="linkage_linkonce">linkonce</a></b></tt>: </dt>
+
+ <dd>Globals with "<tt>linkonce</tt>" linkage are merged with other globals of
+ the same name when linkage occurs. This is typically used to implement
+ inline functions, templates, or other code which must be generated in each
+ translation unit that uses it. Unreferenced <tt>linkonce</tt> globals are
+ allowed to be discarded.
+ </dd>
+
+ <dt><tt><b><a name="linkage_weak">weak</a></b></tt>: </dt>
+
+ <dd>"<tt>weak</tt>" linkage is exactly the same as <tt>linkonce</tt> linkage,
+ except that unreferenced <tt>weak</tt> globals may not be discarded. This is
+ used for globals that may be emitted in multiple translation units, but that
+ are not guaranteed to be emitted into every translation unit that uses them.
+ One example of this are common globals in C, such as "<tt>int X;</tt>" at
+ global scope.
+ </dd>
+
+ <dt><tt><b><a name="linkage_appending">appending</a></b></tt>: </dt>
+
+ <dd>"<tt>appending</tt>" linkage may only be applied to global variables of
+ pointer to array type. When two global variables with appending linkage are
+ linked together, the two global arrays are appended together. This is the
+ LLVM, typesafe, equivalent of having the system linker append together
+ "sections" with identical names when .o files are linked.
+ </dd>
+
+ <dt><tt><b><a name="linkage_externweak">extern_weak</a></b></tt>: </dt>
+ <dd>The semantics of this linkage follow the ELF model: the symbol is weak
+ until linked, if not linked, the symbol becomes null instead of being an
+ undefined reference.
+ </dd>
+
+ <dt><tt><b><a name="linkage_external">externally visible</a></b></tt>:</dt>
+
+ <dd>If none of the above identifiers are used, the global is externally
+ visible, meaning that it participates in linkage and can be used to resolve
+ external symbol references.
+ </dd>
+</dl>
+
+ <p>
+ The next two types of linkage are targeted for Microsoft Windows platform
+ only. They are designed to support importing (exporting) symbols from (to)
+ DLLs.
+ </p>
+
+ <dl>
+ <dt><tt><b><a name="linkage_dllimport">dllimport</a></b></tt>: </dt>
+
+ <dd>"<tt>dllimport</tt>" linkage causes the compiler to reference a function
+ or variable via a global pointer to a pointer that is set up by the DLL
+ exporting the symbol. On Microsoft Windows targets, the pointer name is
+ formed by combining <code>_imp__</code> and the function or variable name.
+ </dd>
+
+ <dt><tt><b><a name="linkage_dllexport">dllexport</a></b></tt>: </dt>
+
+ <dd>"<tt>dllexport</tt>" linkage causes the compiler to provide a global
+ pointer to a pointer in a DLL, so that it can be referenced with the
+ <tt>dllimport</tt> attribute. On Microsoft Windows targets, the pointer
+ name is formed by combining <code>_imp__</code> and the function or variable
+ name.
+ </dd>
+
+</dl>
+
+<p><a name="linkage_external"></a>For example, since the "<tt>.LC0</tt>"
+variable is defined to be internal, if another module defined a "<tt>.LC0</tt>"
+variable and was linked with this one, one of the two would be renamed,
+preventing a collision. Since "<tt>main</tt>" and "<tt>puts</tt>" are
+external (i.e., lacking any linkage declarations), they are accessible
+outside of the current module.</p>
+<p>It is illegal for a function <i>declaration</i>
+to have any linkage type other than "externally visible", <tt>dllimport</tt>,
+or <tt>extern_weak</tt>.</p>
+<p>Aliases can have only <tt>external</tt>, <tt>internal</tt> and <tt>weak</tt>
+linkages.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="callingconv">Calling Conventions</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM <a href="#functionstructure">functions</a>, <a href="#i_call">calls</a>
+and <a href="#i_invoke">invokes</a> can all have an optional calling convention
+specified for the call. The calling convention of any pair of dynamic
+caller/callee must match, or the behavior of the program is undefined. The
+following calling conventions are supported by LLVM, and more may be added in
+the future:</p>
+
+<dl>
+ <dt><b>"<tt>ccc</tt>" - The C calling convention</b>:</dt>
+
+ <dd>This calling convention (the default if no other calling convention is
+ specified) matches the target C calling conventions. This calling convention
+ supports varargs function calls and tolerates some mismatch in the declared
+ prototype and implemented declaration of the function (as does normal C).
+ </dd>
+
+ <dt><b>"<tt>fastcc</tt>" - The fast calling convention</b>:</dt>
+
+ <dd>This calling convention attempts to make calls as fast as possible
+ (e.g. by passing things in registers). This calling convention allows the
+ target to use whatever tricks it wants to produce fast code for the target,
+ without having to conform to an externally specified ABI. Implementations of
+ this convention should allow arbitrary tail call optimization to be supported.
+ This calling convention does not support varargs and requires the prototype of
+ all callees to exactly match the prototype of the function definition.
+ </dd>
+
+ <dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
+
+ <dd>This calling convention attempts to make code in the caller as efficient
+ as possible under the assumption that the call is not commonly executed. As
+ such, these calls often preserve all registers so that the call does not break
+ any live ranges in the caller side. This calling convention does not support
+ varargs and requires the prototype of all callees to exactly match the
+ prototype of the function definition.
+ </dd>
+
+ <dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
+
+ <dd>Any calling convention may be specified by number, allowing
+ target-specific calling conventions to be used. Target specific calling
+ conventions start at 64.
+ </dd>
+</dl>
+
+<p>More calling conventions can be added/defined on an as-needed basis, to
+support pascal conventions or any other well-known target-independent
+convention.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="visibility">Visibility Styles</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+All Global Variables and Functions have one of the following visibility styles:
+</p>
+
+<dl>
+ <dt><b>"<tt>default</tt>" - Default style</b>:</dt>
+
+ <dd>On ELF, default visibility means that the declaration is visible to other
+ modules and, in shared libraries, means that the declared entity may be
+ overridden. On Darwin, default visibility means that the declaration is
+ visible to other modules. Default visibility corresponds to "external
+ linkage" in the language.
+ </dd>
+
+ <dt><b>"<tt>hidden</tt>" - Hidden style</b>:</dt>
+
+ <dd>Two declarations of an object with hidden visibility refer to the same
+ object if they are in the same shared object. Usually, hidden visibility
+ indicates that the symbol will not be placed into the dynamic symbol table,
+ so no other module (executable or shared library) can reference it
+ directly.
+ </dd>
+
+ <dt><b>"<tt>protected</tt>" - Protected style</b>:</dt>
+
+ <dd>On ELF, protected visibility indicates that the symbol will be placed in
+ the dynamic symbol table, but that references within the defining module will
+ bind to the local symbol. That is, the symbol cannot be overridden by another
+ module.
+ </dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="globalvars">Global Variables</a>
+</div>
+
+<div class="doc_text">
+
+<p>Global variables define regions of memory allocated at compilation time
+instead of run-time. Global variables may optionally be initialized, may have
+an explicit section to be placed in, and may have an optional explicit alignment
+specified. A variable may be defined as "thread_local", which means that it
+will not be shared by threads (each thread will have a separated copy of the
+variable). A variable may be defined as a global "constant," which indicates
+that the contents of the variable will <b>never</b> be modified (enabling better
+optimization, allowing the global data to be placed in the read-only section of
+an executable, etc). Note that variables that need runtime initialization
+cannot be marked "constant" as there is a store to the variable.</p>
+
+<p>
+LLVM explicitly allows <em>declarations</em> of global variables to be marked
+constant, even if the final definition of the global is not. This capability
+can be used to enable slightly better optimization of the program, but requires
+the language definition to guarantee that optimizations based on the
+'constantness' are valid for the translation units that do not include the
+definition.
+</p>
+
+<p>As SSA values, global variables define pointer values that are in
+scope (i.e. they dominate) all basic blocks in the program. Global
+variables always define a pointer to their "content" type because they
+describe a region of memory, and all memory objects in LLVM are
+accessed through pointers.</p>
+
+<p>LLVM allows an explicit section to be specified for globals. If the target
+supports it, it will emit globals to the section specified.</p>
+
+<p>An explicit alignment may be specified for a global. If not present, or if
+the alignment is set to zero, the alignment of the global is set by the target
+to whatever it feels convenient. If an explicit alignment is specified, the
+global is forced to have at least that much alignment. All alignments must be
+a power of 2.</p>
+
+<p>For example, the following defines a global with an initializer, section,
+ and alignment:</p>
+
+<div class="doc_code">
+<pre>
+@G = constant float 1.0, section "foo", align 4
+</pre>
+</div>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="functionstructure">Functions</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM function definitions consist of the "<tt>define</tt>" keyord,
+an optional <a href="#linkage">linkage type</a>, an optional
+<a href="#visibility">visibility style</a>, an optional
+<a href="#callingconv">calling convention</a>, a return type, an optional
+<a href="#paramattrs">parameter attribute</a> for the return type, a function
+name, a (possibly empty) argument list (each with optional
+<a href="#paramattrs">parameter attributes</a>), an optional section, an
+optional alignment, an opening curly brace, a list of basic blocks, and a
+closing curly brace.
+
+LLVM function declarations consist of the "<tt>declare</tt>" keyword, an
+optional <a href="#linkage">linkage type</a>, an optional
+<a href="#visibility">visibility style</a>, an optional
+<a href="#callingconv">calling convention</a>, a return type, an optional
+<a href="#paramattrs">parameter attribute</a> for the return type, a function
+name, a possibly empty list of arguments, and an optional alignment.</p>
+
+<p>A function definition contains a list of basic blocks, forming the CFG for
+the function. Each basic block may optionally start with a label (giving the
+basic block a symbol table entry), contains a list of instructions, and ends
+with a <a href="#terminators">terminator</a> instruction (such as a branch or
+function return).</p>
+
+<p>The first basic block in a function is special in two ways: it is immediately
+executed on entrance to the function, and it is not allowed to have predecessor
+basic blocks (i.e. there can not be any branches to the entry block of a
+function). Because the block can have no predecessors, it also cannot have any
+<a href="#i_phi">PHI nodes</a>.</p>
+
+<p>LLVM allows an explicit section to be specified for functions. If the target
+supports it, it will emit functions to the section specified.</p>
+
+<p>An explicit alignment may be specified for a function. If not present, or if
+the alignment is set to zero, the alignment of the function is set by the target
+to whatever it feels convenient. If an explicit alignment is specified, the
+function is forced to have at least that much alignment. All alignments must be
+a power of 2.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="aliasstructure">Aliases</a>
+</div>
+<div class="doc_text">
+ <p>Aliases act as "second name" for the aliasee value (which can be either
+ function or global variable or bitcast of global value). Aliases may have an
+ optional <a href="#linkage">linkage type</a>, and an
+ optional <a href="#visibility">visibility style</a>.</p>
+
+ <h5>Syntax:</h5>
+
+<div class="doc_code">
+<pre>
+@&lt;Name&gt; = [Linkage] [Visibility] alias &lt;AliaseeTy&gt; @&lt;Aliasee&gt;
+</pre>
+</div>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="paramattrs">Parameter Attributes</a></div>
+<div class="doc_text">
+ <p>The return type and each parameter of a function type may have a set of
+ <i>parameter attributes</i> associated with them. Parameter attributes are
+ used to communicate additional information about the result or parameters of
+ a function. Parameter attributes are considered to be part of the function
+ type so two functions types that differ only by the parameter attributes
+ are different function types.</p>
+
+ <p>Parameter attributes are simple keywords that follow the type specified. If
+ multiple parameter attributes are needed, they are space separated. For
+ example:</p>
+
+<div class="doc_code">
+<pre>
+%someFunc = i16 (i8 sext %someParam) zext
+%someFunc = i16 (i8 zext %someParam) zext
+</pre>
+</div>
+
+ <p>Note that the two function types above are unique because the parameter has
+ a different attribute (sext in the first one, zext in the second). Also note
+ that the attribute for the function result (zext) comes immediately after the
+ argument list.</p>
+
+ <p>Currently, only the following parameter attributes are defined:</p>
+ <dl>
+ <dt><tt>zext</tt></dt>
+ <dd>This indicates that the parameter should be zero extended just before
+ a call to this function.</dd>
+ <dt><tt>sext</tt></dt>
+ <dd>This indicates that the parameter should be sign extended just before
+ a call to this function.</dd>
+ <dt><tt>inreg</tt></dt>
+ <dd>This indicates that the parameter should be placed in register (if
+ possible) during assembling function call. Support for this attribute is
+ target-specific</dd>
+ <dt><tt>sret</tt></dt>
+ <dd>This indicates that the parameter specifies the address of a structure
+ that is the return value of the function in the source program.</dd>
+ <dt><tt>noalias</tt></dt>
+ <dd>This indicates that the parameter not alias any other object or any
+ other "noalias" objects during the function call.
+ <dt><tt>noreturn</tt></dt>
+ <dd>This function attribute indicates that the function never returns. This
+ indicates to LLVM that every call to this function should be treated as if
+ an <tt>unreachable</tt> instruction immediately followed the call.</dd>
+ <dt><tt>nounwind</tt></dt>
+ <dd>This function attribute indicates that the function type does not use
+ the unwind instruction and does not allow stack unwinding to propagate
+ through it.</dd>
+ </dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="moduleasm">Module-Level Inline Assembly</a>
+</div>
+
+<div class="doc_text">
+<p>
+Modules may contain "module-level inline asm" blocks, which corresponds to the
+GCC "file scope inline asm" blocks. These blocks are internally concatenated by
+LLVM and treated as a single unit, but may be separated in the .ll file if
+desired. The syntax is very simple:
+</p>
+
+<div class="doc_code">
+<pre>
+module asm "inline asm code goes here"
+module asm "more can go here"
+</pre>
+</div>
+
+<p>The strings can contain any character by escaping non-printable characters.
+ The escape sequence used is simply "\xx" where "xx" is the two digit hex code
+ for the number.
+</p>
+
+<p>
+ The inline asm code is simply printed to the machine code .s file when
+ assembly code is generated.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="datalayout">Data Layout</a>
+</div>
+
+<div class="doc_text">
+<p>A module may specify a target specific data layout string that specifies how
+data is to be laid out in memory. The syntax for the data layout is simply:</p>
+<pre> target datalayout = "<i>layout specification</i>"</pre>
+<p>The <i>layout specification</i> consists of a list of specifications
+separated by the minus sign character ('-'). Each specification starts with a
+letter and may include other information after the letter to define some
+aspect of the data layout. The specifications accepted are as follows: </p>
+<dl>
+ <dt><tt>E</tt></dt>
+ <dd>Specifies that the target lays out data in big-endian form. That is, the
+ bits with the most significance have the lowest address location.</dd>
+ <dt><tt>e</tt></dt>
+ <dd>Specifies that hte target lays out data in little-endian form. That is,
+ the bits with the least significance have the lowest address location.</dd>
+ <dt><tt>p:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+ <dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and
+ <i>preferred</i> alignments. All sizes are in bits. Specifying the <i>pref</i>
+ alignment is optional. If omitted, the preceding <tt>:</tt> should be omitted
+ too.</dd>
+ <dt><tt>i<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+ <dd>This specifies the alignment for an integer type of a given bit
+ <i>size</i>. The value of <i>size</i> must be in the range [1,2^23).</dd>
+ <dt><tt>v<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+ <dd>This specifies the alignment for a vector type of a given bit
+ <i>size</i>.</dd>
+ <dt><tt>f<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+ <dd>This specifies the alignment for a floating point type of a given bit
+ <i>size</i>. The value of <i>size</i> must be either 32 (float) or 64
+ (double).</dd>
+ <dt><tt>a<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+ <dd>This specifies the alignment for an aggregate type of a given bit
+ <i>size</i>.</dd>
+</dl>
+<p>When constructing the data layout for a given target, LLVM starts with a
+default set of specifications which are then (possibly) overriden by the
+specifications in the <tt>datalayout</tt> keyword. The default specifications
+are given in this list:</p>
+<ul>
+ <li><tt>E</tt> - big endian</li>
+ <li><tt>p:32:64:64</tt> - 32-bit pointers with 64-bit alignment</li>
+ <li><tt>i1:8:8</tt> - i1 is 8-bit (byte) aligned</li>
+ <li><tt>i8:8:8</tt> - i8 is 8-bit (byte) aligned</li>
+ <li><tt>i16:16:16</tt> - i16 is 16-bit aligned</li>
+ <li><tt>i32:32:32</tt> - i32 is 32-bit aligned</li>
+ <li><tt>i64:32:64</tt> - i64 has abi alignment of 32-bits but preferred
+ alignment of 64-bits</li>
+ <li><tt>f32:32:32</tt> - float is 32-bit aligned</li>
+ <li><tt>f64:64:64</tt> - double is 64-bit aligned</li>
+ <li><tt>v64:64:64</tt> - 64-bit vector is 64-bit aligned</li>
+ <li><tt>v128:128:128</tt> - 128-bit vector is 128-bit aligned</li>
+ <li><tt>a0:0:1</tt> - aggregates are 8-bit aligned</li>
+</ul>
+<p>When llvm is determining the alignment for a given type, it uses the
+following rules:
+<ol>
+ <li>If the type sought is an exact match for one of the specifications, that
+ specification is used.</li>
+ <li>If no match is found, and the type sought is an integer type, then the
+ smallest integer type that is larger than the bitwidth of the sought type is
+ used. If none of the specifications are larger than the bitwidth then the the
+ largest integer type is used. For example, given the default specifications
+ above, the i7 type will use the alignment of i8 (next largest) while both
+ i65 and i256 will use the alignment of i64 (largest specified).</li>
+ <li>If no match is found, and the type sought is a vector type, then the
+ largest vector type that is smaller than the sought vector type will be used
+ as a fall back. This happens because <128 x double> can be implemented in
+ terms of 64 <2 x double>, for example.</li>
+</ol>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="typesystem">Type System</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM type system is one of the most important features of the
+intermediate representation. Being typed enables a number of
+optimizations to be performed on the IR directly, without having to do
+extra analyses on the side before the transformation. A strong type
+system makes it easier to read the generated code and enables novel
+analyses and transformations that are not feasible to perform on normal
+three address code representations.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="t_primitive">Primitive Types</a> </div>
+<div class="doc_text">
+<p>The primitive types are the fundamental building blocks of the LLVM
+system. The current set of primitive types is as follows:</p>
+
+<table class="layout">
+ <tr class="layout">
+ <td class="left">
+ <table>
+ <tbody>
+ <tr><th>Type</th><th>Description</th></tr>
+ <tr><td><tt><a name="t_void">void</a></tt></td><td>No value</td></tr>
+ <tr><td><tt>label</tt></td><td>Branch destination</td></tr>
+ </tbody>
+ </table>
+ </td>
+ <td class="right">
+ <table>
+ <tbody>
+ <tr><th>Type</th><th>Description</th></tr>
+ <tr><td><tt>float</tt></td><td>32-bit floating point value</td></tr>
+ <tr><td><tt>double</tt></td><td>64-bit floating point value</td></tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+</table>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_classifications">Type
+Classifications</a> </div>
+<div class="doc_text">
+<p>These different primitive types fall into a few useful
+classifications:</p>
+
+<table border="1" cellspacing="0" cellpadding="4">
+ <tbody>
+ <tr><th>Classification</th><th>Types</th></tr>
+ <tr>
+ <td><a name="t_integer">integer</a></td>
+ <td><tt>i1, i2, i3, ... i8, ... i16, ... i32, ... i64, ... </tt></td>
+ </tr>
+ <tr>
+ <td><a name="t_floating">floating point</a></td>
+ <td><tt>float, double</tt></td>
+ </tr>
+ <tr>
+ <td><a name="t_firstclass">first class</a></td>
+ <td><tt>i1, ..., float, double, <br/>
+ <a href="#t_pointer">pointer</a>,<a href="#t_vector">vector</a></tt>
+ </td>
+ </tr>
+ </tbody>
+</table>
+
+<p>The <a href="#t_firstclass">first class</a> types are perhaps the
+most important. Values of these types are the only ones which can be
+produced by instructions, passed as arguments, or used as operands to
+instructions. This means that all structures and arrays must be
+manipulated either by pointer or by component.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="t_derived">Derived Types</a> </div>
+
+<div class="doc_text">
+
+<p>The real power in LLVM comes from the derived types in the system.
+This is what allows a programmer to represent arrays, functions,
+pointers, and other useful types. Note that these derived types may be
+recursive: For example, it is possible to have a two dimensional array.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_integer">Integer Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The integer type is a very simple derived type that simply specifies an
+arbitrary bit width for the integer type desired. Any bit width from 1 bit to
+2^23-1 (about 8 million) can be specified.</p>
+
+<h5>Syntax:</h5>
+
+<pre>
+ iN
+</pre>
+
+<p>The number of bits the integer will occupy is specified by the <tt>N</tt>
+value.</p>
+
+<h5>Examples:</h5>
+<table class="layout">
+ <tr class="layout">
+ <td class="left">
+ <tt>i1</tt><br/>
+ <tt>i4</tt><br/>
+ <tt>i8</tt><br/>
+ <tt>i16</tt><br/>
+ <tt>i32</tt><br/>
+ <tt>i42</tt><br/>
+ <tt>i64</tt><br/>
+ <tt>i1942652</tt><br/>
+ </td>
+ <td class="left">
+ A boolean integer of 1 bit<br/>
+ A nibble sized integer of 4 bits.<br/>
+ A byte sized integer of 8 bits.<br/>
+ A half word sized integer of 16 bits.<br/>
+ A word sized integer of 32 bits.<br/>
+ An integer whose bit width is the answer. <br/>
+ A double word sized integer of 64 bits.<br/>
+ A really big integer of over 1 million bits.<br/>
+ </td>
+ </tr>
+</table>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_array">Array Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+
+<p>The array type is a very simple derived type that arranges elements
+sequentially in memory. The array type requires a size (number of
+elements) and an underlying data type.</p>
+
+<h5>Syntax:</h5>
+
+<pre>
+ [&lt;# elements&gt; x &lt;elementtype&gt;]
+</pre>
+
+<p>The number of elements is a constant integer value; elementtype may
+be any type with a size.</p>
+
+<h5>Examples:</h5>
+<table class="layout">
+ <tr class="layout">
+ <td class="left">
+ <tt>[40 x i32 ]</tt><br/>
+ <tt>[41 x i32 ]</tt><br/>
+ <tt>[40 x i8]</tt><br/>
+ </td>
+ <td class="left">
+ Array of 40 32-bit integer values.<br/>
+ Array of 41 32-bit integer values.<br/>
+ Array of 40 8-bit integer values.<br/>
+ </td>
+ </tr>
+</table>
+<p>Here are some examples of multidimensional arrays:</p>
+<table class="layout">
+ <tr class="layout">
+ <td class="left">
+ <tt>[3 x [4 x i32]]</tt><br/>
+ <tt>[12 x [10 x float]]</tt><br/>
+ <tt>[2 x [3 x [4 x i16]]]</tt><br/>
+ </td>
+ <td class="left">
+ 3x4 array of 32-bit integer values.<br/>
+ 12x10 array of single precision floating point values.<br/>
+ 2x3x4 array of 16-bit integer values.<br/>
+ </td>
+ </tr>
+</table>
+
+<p>Note that 'variable sized arrays' can be implemented in LLVM with a zero
+length array. Normally, accesses past the end of an array are undefined in
+LLVM (e.g. it is illegal to access the 5th element of a 3 element array).
+As a special case, however, zero length arrays are recognized to be variable
+length. This allows implementation of 'pascal style arrays' with the LLVM
+type "{ i32, [0 x float]}", for example.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_function">Function Type</a> </div>
+<div class="doc_text">
+<h5>Overview:</h5>
+<p>The function type can be thought of as a function signature. It
+consists of a return type and a list of formal parameter types.
+Function types are usually used to build virtual function tables
+(which are structures of pointers to functions), for indirect function
+calls, and when defining a function.</p>
+<p>
+The return type of a function type cannot be an aggregate type.
+</p>
+<h5>Syntax:</h5>
+<pre> &lt;returntype&gt; (&lt;parameter list&gt;)<br></pre>
+<p>...where '<tt>&lt;parameter list&gt;</tt>' is a comma-separated list of type
+specifiers. Optionally, the parameter list may include a type <tt>...</tt>,
+which indicates that the function takes a variable number of arguments.
+Variable argument functions can access their arguments with the <a
+ href="#int_varargs">variable argument handling intrinsic</a> functions.</p>
+<h5>Examples:</h5>
+<table class="layout">
+ <tr class="layout">
+ <td class="left"><tt>i32 (i32)</tt></td>
+ <td class="left">function taking an <tt>i32</tt>, returning an <tt>i32</tt>
+ </td>
+ </tr><tr class="layout">
+ <td class="left"><tt>float&nbsp;(i16&nbsp;sext,&nbsp;i32&nbsp;*)&nbsp;*
+ </tt></td>
+ <td class="left"><a href="#t_pointer">Pointer</a> to a function that takes
+ an <tt>i16</tt> that should be sign extended and a
+ <a href="#t_pointer">pointer</a> to <tt>i32</tt>, returning
+ <tt>float</tt>.
+ </td>
+ </tr><tr class="layout">
+ <td class="left"><tt>i32 (i8*, ...)</tt></td>
+ <td class="left">A vararg function that takes at least one
+ <a href="#t_pointer">pointer</a> to <tt>i8 </tt> (char in C),
+ which returns an integer. This is the signature for <tt>printf</tt> in
+ LLVM.
+ </td>
+ </tr>
+</table>
+
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_struct">Structure Type</a> </div>
+<div class="doc_text">
+<h5>Overview:</h5>
+<p>The structure type is used to represent a collection of data members
+together in memory. The packing of the field types is defined to match
+the ABI of the underlying processor. The elements of a structure may
+be any type that has a size.</p>
+<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt>
+and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a
+field with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>'
+instruction.</p>
+<h5>Syntax:</h5>
+<pre> { &lt;type list&gt; }<br></pre>
+<h5>Examples:</h5>
+<table class="layout">
+ <tr class="layout">
+ <td class="left"><tt>{ i32, i32, i32 }</tt></td>
+ <td class="left">A triple of three <tt>i32</tt> values</td>
+ </tr><tr class="layout">
+ <td class="left"><tt>{&nbsp;float,&nbsp;i32&nbsp;(i32)&nbsp;*&nbsp;}</tt></td>
+ <td class="left">A pair, where the first element is a <tt>float</tt> and the
+ second element is a <a href="#t_pointer">pointer</a> to a
+ <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
+ an <tt>i32</tt>.</td>
+ </tr>
+</table>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_pstruct">Packed Structure Type</a>
+</div>
+<div class="doc_text">
+<h5>Overview:</h5>
+<p>The packed structure type is used to represent a collection of data members
+together in memory. There is no padding between fields. Further, the alignment
+of a packed structure is 1 byte. The elements of a packed structure may
+be any type that has a size.</p>
+<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt>
+and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a
+field with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>'
+instruction.</p>
+<h5>Syntax:</h5>
+<pre> &lt; { &lt;type list&gt; } &gt; <br></pre>
+<h5>Examples:</h5>
+<table class="layout">
+ <tr class="layout">
+ <td class="left"><tt>&lt; { i32, i32, i32 } &gt;</tt></td>
+ <td class="left">A triple of three <tt>i32</tt> values</td>
+ </tr><tr class="layout">
+ <td class="left"><tt>&lt;&nbsp;{&nbsp;float,&nbsp;i32&nbsp;(i32)&nbsp;*&nbsp;}&nbsp;&gt;</tt></td>
+ <td class="left">A pair, where the first element is a <tt>float</tt> and the
+ second element is a <a href="#t_pointer">pointer</a> to a
+ <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
+ an <tt>i32</tt>.</td>
+ </tr>
+</table>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_pointer">Pointer Type</a> </div>
+<div class="doc_text">
+<h5>Overview:</h5>
+<p>As in many languages, the pointer type represents a pointer or
+reference to another object, which must live in memory.</p>
+<h5>Syntax:</h5>
+<pre> &lt;type&gt; *<br></pre>
+<h5>Examples:</h5>
+<table class="layout">
+ <tr class="layout">
+ <td class="left">
+ <tt>[4x i32]*</tt><br/>
+ <tt>i32 (i32 *) *</tt><br/>
+ </td>
+ <td class="left">
+ A <a href="#t_pointer">pointer</a> to <a href="#t_array">array</a> of
+ four <tt>i32</tt> values<br/>
+ A <a href="#t_pointer">pointer</a> to a <a
+ href="#t_function">function</a> that takes an <tt>i32*</tt>, returning an
+ <tt>i32</tt>.<br/>
+ </td>
+ </tr>
+</table>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_vector">Vector Type</a> </div>
+<div class="doc_text">
+
+<h5>Overview:</h5>
+
+<p>A vector type is a simple derived type that represents a vector
+of elements. Vector types are used when multiple primitive data
+are operated in parallel using a single instruction (SIMD).
+A vector type requires a size (number of
+elements) and an underlying primitive data type. Vectors must have a power
+of two length (1, 2, 4, 8, 16 ...). Vector types are
+considered <a href="#t_firstclass">first class</a>.</p>
+
+<h5>Syntax:</h5>
+
+<pre>
+ &lt; &lt;# elements&gt; x &lt;elementtype&gt; &gt;
+</pre>
+
+<p>The number of elements is a constant integer value; elementtype may
+be any integer or floating point type.</p>
+
+<h5>Examples:</h5>
+
+<table class="layout">
+ <tr class="layout">
+ <td class="left">
+ <tt>&lt;4 x i32&gt;</tt><br/>
+ <tt>&lt;8 x float&gt;</tt><br/>
+ <tt>&lt;2 x i64&gt;</tt><br/>
+ </td>
+ <td class="left">
+ Vector of 4 32-bit integer values.<br/>
+ Vector of 8 floating-point values.<br/>
+ Vector of 2 64-bit integer values.<br/>
+ </td>
+ </tr>
+</table>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_opaque">Opaque Type</a> </div>
+<div class="doc_text">
+
+<h5>Overview:</h5>
+
+<p>Opaque types are used to represent unknown types in the system. This
+corresponds (for example) to the C notion of a foward declared structure type.
+In LLVM, opaque types can eventually be resolved to any type (not just a
+structure type).</p>
+
+<h5>Syntax:</h5>
+
+<pre>
+ opaque
+</pre>
+
+<h5>Examples:</h5>
+
+<table class="layout">
+ <tr class="layout">
+ <td class="left">
+ <tt>opaque</tt>
+ </td>
+ <td class="left">
+ An opaque type.<br/>
+ </td>
+ </tr>
+</table>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="constants">Constants</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM has several different basic types of constants. This section describes
+them all and their syntax.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="simpleconstants">Simple Constants</a></div>
+
+<div class="doc_text">
+
+<dl>
+ <dt><b>Boolean constants</b></dt>
+
+ <dd>The two strings '<tt>true</tt>' and '<tt>false</tt>' are both valid
+ constants of the <tt><a href="#t_primitive">i1</a></tt> type.
+ </dd>
+
+ <dt><b>Integer constants</b></dt>
+
+ <dd>Standard integers (such as '4') are constants of the <a
+ href="#t_integer">integer</a> type. Negative numbers may be used with
+ integer types.
+ </dd>
+
+ <dt><b>Floating point constants</b></dt>
+
+ <dd>Floating point constants use standard decimal notation (e.g. 123.421),
+ exponential notation (e.g. 1.23421e+2), or a more precise hexadecimal
+ notation (see below). Floating point constants must have a <a
+ href="#t_floating">floating point</a> type. </dd>
+
+ <dt><b>Null pointer constants</b></dt>
+
+ <dd>The identifier '<tt>null</tt>' is recognized as a null pointer constant
+ and must be of <a href="#t_pointer">pointer type</a>.</dd>
+
+</dl>
+
+<p>The one non-intuitive notation for constants is the optional hexadecimal form
+of floating point constants. For example, the form '<tt>double
+0x432ff973cafa8000</tt>' is equivalent to (but harder to read than) '<tt>double
+4.5e+15</tt>'. The only time hexadecimal floating point constants are required
+(and the only time that they are generated by the disassembler) is when a
+floating point constant must be emitted but it cannot be represented as a
+decimal floating point number. For example, NaN's, infinities, and other
+special values are represented in their IEEE hexadecimal format so that
+assembly and disassembly do not cause any bits to change in the constants.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="aggregateconstants">Aggregate Constants</a>
+</div>
+
+<div class="doc_text">
+<p>Aggregate constants arise from aggregation of simple constants
+and smaller aggregate constants.</p>
+
+<dl>
+ <dt><b>Structure constants</b></dt>
+
+ <dd>Structure constants are represented with notation similar to structure
+ type definitions (a comma separated list of elements, surrounded by braces
+ (<tt>{}</tt>)). For example: "<tt>{ i32 4, float 17.0, i32* %G }</tt>",
+ where "<tt>%G</tt>" is declared as "<tt>@G = external global i32</tt>". Structure constants
+ must have <a href="#t_struct">structure type</a>, and the number and
+ types of elements must match those specified by the type.
+ </dd>
+
+ <dt><b>Array constants</b></dt>
+
+ <dd>Array constants are represented with notation similar to array type
+ definitions (a comma separated list of elements, surrounded by square brackets
+ (<tt>[]</tt>)). For example: "<tt>[ i32 42, i32 11, i32 74 ]</tt>". Array
+ constants must have <a href="#t_array">array type</a>, and the number and
+ types of elements must match those specified by the type.
+ </dd>
+
+ <dt><b>Vector constants</b></dt>
+
+ <dd>Vector constants are represented with notation similar to vector type
+ definitions (a comma separated list of elements, surrounded by
+ less-than/greater-than's (<tt>&lt;&gt;</tt>)). For example: "<tt>&lt; i32 42,
+ i32 11, i32 74, i32 100 &gt;</tt>". Vector constants must have <a
+ href="#t_vector">vector type</a>, and the number and types of elements must
+ match those specified by the type.
+ </dd>
+
+ <dt><b>Zero initialization</b></dt>
+
+ <dd>The string '<tt>zeroinitializer</tt>' can be used to zero initialize a
+ value to zero of <em>any</em> type, including scalar and aggregate types.
+ This is often used to avoid having to print large zero initializers (e.g. for
+ large arrays) and is always exactly equivalent to using explicit zero
+ initializers.
+ </dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="globalconstants">Global Variable and Function Addresses</a>
+</div>
+
+<div class="doc_text">
+
+<p>The addresses of <a href="#globalvars">global variables</a> and <a
+href="#functionstructure">functions</a> are always implicitly valid (link-time)
+constants. These constants are explicitly referenced when the <a
+href="#identifiers">identifier for the global</a> is used and always have <a
+href="#t_pointer">pointer</a> type. For example, the following is a legal LLVM
+file:</p>
+
+<div class="doc_code">
+<pre>
+@X = global i32 17
+@Y = global i32 42
+@Z = global [2 x i32*] [ i32* @X, i32* @Y ]
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="undefvalues">Undefined Values</a></div>
+<div class="doc_text">
+ <p>The string '<tt>undef</tt>' is recognized as a type-less constant that has
+ no specific value. Undefined values may be of any type and be used anywhere
+ a constant is permitted.</p>
+
+ <p>Undefined values indicate to the compiler that the program is well defined
+ no matter what value is used, giving the compiler more freedom to optimize.
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="constantexprs">Constant Expressions</a>
+</div>
+
+<div class="doc_text">
+
+<p>Constant expressions are used to allow expressions involving other constants
+to be used as constants. Constant expressions may be of any <a
+href="#t_firstclass">first class</a> type and may involve any LLVM operation
+that does not have side effects (e.g. load and call are not supported). The
+following is the syntax for constant expressions:</p>
+
+<dl>
+ <dt><b><tt>trunc ( CST to TYPE )</tt></b></dt>
+ <dd>Truncate a constant to another type. The bit size of CST must be larger
+ than the bit size of TYPE. Both types must be integers.</dd>
+
+ <dt><b><tt>zext ( CST to TYPE )</tt></b></dt>
+ <dd>Zero extend a constant to another type. The bit size of CST must be
+ smaller or equal to the bit size of TYPE. Both types must be integers.</dd>
+
+ <dt><b><tt>sext ( CST to TYPE )</tt></b></dt>
+ <dd>Sign extend a constant to another type. The bit size of CST must be
+ smaller or equal to the bit size of TYPE. Both types must be integers.</dd>
+
+ <dt><b><tt>fptrunc ( CST to TYPE )</tt></b></dt>
+ <dd>Truncate a floating point constant to another floating point type. The
+ size of CST must be larger than the size of TYPE. Both types must be
+ floating point.</dd>
+
+ <dt><b><tt>fpext ( CST to TYPE )</tt></b></dt>
+ <dd>Floating point extend a constant to another type. The size of CST must be
+ smaller or equal to the size of TYPE. Both types must be floating point.</dd>
+
+ <dt><b><tt>fp2uint ( CST to TYPE )</tt></b></dt>
+ <dd>Convert a floating point constant to the corresponding unsigned integer
+ constant. TYPE must be an integer type. CST must be floating point. If the
+ value won't fit in the integer type, the results are undefined.</dd>
+
+ <dt><b><tt>fptosi ( CST to TYPE )</tt></b></dt>
+ <dd>Convert a floating point constant to the corresponding signed integer
+ constant. TYPE must be an integer type. CST must be floating point. If the
+ value won't fit in the integer type, the results are undefined.</dd>
+
+ <dt><b><tt>uitofp ( CST to TYPE )</tt></b></dt>
+ <dd>Convert an unsigned integer constant to the corresponding floating point
+ constant. TYPE must be floating point. CST must be of integer type. If the
+ value won't fit in the floating point type, the results are undefined.</dd>
+
+ <dt><b><tt>sitofp ( CST to TYPE )</tt></b></dt>
+ <dd>Convert a signed integer constant to the corresponding floating point
+ constant. TYPE must be floating point. CST must be of integer type. If the
+ value won't fit in the floating point type, the results are undefined.</dd>
+
+ <dt><b><tt>ptrtoint ( CST to TYPE )</tt></b></dt>
+ <dd>Convert a pointer typed constant to the corresponding integer constant
+ TYPE must be an integer type. CST must be of pointer type. The CST value is
+ zero extended, truncated, or unchanged to make it fit in TYPE.</dd>
+
+ <dt><b><tt>inttoptr ( CST to TYPE )</tt></b></dt>
+ <dd>Convert a integer constant to a pointer constant. TYPE must be a
+ pointer type. CST must be of integer type. The CST value is zero extended,
+ truncated, or unchanged to make it fit in a pointer size. This one is
+ <i>really</i> dangerous!</dd>
+
+ <dt><b><tt>bitcast ( CST to TYPE )</tt></b></dt>
+ <dd>Convert a constant, CST, to another TYPE. The size of CST and TYPE must be
+ identical (same number of bits). The conversion is done as if the CST value
+ was stored to memory and read back as TYPE. In other words, no bits change
+ with this operator, just the type. This can be used for conversion of
+ vector types to any other type, as long as they have the same bit width. For
+ pointers it is only valid to cast to another pointer type.
+ </dd>
+
+ <dt><b><tt>getelementptr ( CSTPTR, IDX0, IDX1, ... )</tt></b></dt>
+
+ <dd>Perform the <a href="#i_getelementptr">getelementptr operation</a> on
+ constants. As with the <a href="#i_getelementptr">getelementptr</a>
+ instruction, the index list may have zero or more indexes, which are required
+ to make sense for the type of "CSTPTR".</dd>
+
+ <dt><b><tt>select ( COND, VAL1, VAL2 )</tt></b></dt>
+
+ <dd>Perform the <a href="#i_select">select operation</a> on
+ constants.</dd>
+
+ <dt><b><tt>icmp COND ( VAL1, VAL2 )</tt></b></dt>
+ <dd>Performs the <a href="#i_icmp">icmp operation</a> on constants.</dd>
+
+ <dt><b><tt>fcmp COND ( VAL1, VAL2 )</tt></b></dt>
+ <dd>Performs the <a href="#i_fcmp">fcmp operation</a> on constants.</dd>
+
+ <dt><b><tt>extractelement ( VAL, IDX )</tt></b></dt>
+
+ <dd>Perform the <a href="#i_extractelement">extractelement
+ operation</a> on constants.
+
+ <dt><b><tt>insertelement ( VAL, ELT, IDX )</tt></b></dt>
+
+ <dd>Perform the <a href="#i_insertelement">insertelement
+ operation</a> on constants.</dd>
+
+
+ <dt><b><tt>shufflevector ( VEC1, VEC2, IDXMASK )</tt></b></dt>
+
+ <dd>Perform the <a href="#i_shufflevector">shufflevector
+ operation</a> on constants.</dd>
+
+ <dt><b><tt>OPCODE ( LHS, RHS )</tt></b></dt>
+
+ <dd>Perform the specified operation of the LHS and RHS constants. OPCODE may
+ be any of the <a href="#binaryops">binary</a> or <a href="#bitwiseops">bitwise
+ binary</a> operations. The constraints on operands are the same as those for
+ the corresponding instruction (e.g. no bitwise operations on floating point
+ values are allowed).</dd>
+</dl>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="othervalues">Other Values</a> </div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="inlineasm">Inline Assembler Expressions</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+LLVM supports inline assembler expressions (as opposed to <a href="#moduleasm">
+Module-Level Inline Assembly</a>) through the use of a special value. This
+value represents the inline assembler as a string (containing the instructions
+to emit), a list of operand constraints (stored as a string), and a flag that
+indicates whether or not the inline asm expression has side effects. An example
+inline assembler expression is:
+</p>
+
+<div class="doc_code">
+<pre>
+i32 (i32) asm "bswap $0", "=r,r"
+</pre>
+</div>
+
+<p>
+Inline assembler expressions may <b>only</b> be used as the callee operand of
+a <a href="#i_call"><tt>call</tt> instruction</a>. Thus, typically we have:
+</p>
+
+<div class="doc_code">
+<pre>
+%X = call i32 asm "<a href="#int_bswap">bswap</a> $0", "=r,r"(i32 %Y)
+</pre>
+</div>
+
+<p>
+Inline asms with side effects not visible in the constraint list must be marked
+as having side effects. This is done through the use of the
+'<tt>sideeffect</tt>' keyword, like so:
+</p>
+
+<div class="doc_code">
+<pre>
+call void asm sideeffect "eieio", ""()
+</pre>
+</div>
+
+<p>TODO: The format of the asm and constraints string still need to be
+documented here. Constraints on what can be done (e.g. duplication, moving, etc
+need to be documented).
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="instref">Instruction Reference</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM instruction set consists of several different
+classifications of instructions: <a href="#terminators">terminator
+instructions</a>, <a href="#binaryops">binary instructions</a>,
+<a href="#bitwiseops">bitwise binary instructions</a>, <a
+ href="#memoryops">memory instructions</a>, and <a href="#otherops">other
+instructions</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="terminators">Terminator
+Instructions</a> </div>
+
+<div class="doc_text">
+
+<p>As mentioned <a href="#functionstructure">previously</a>, every
+basic block in a program ends with a "Terminator" instruction, which
+indicates which block should be executed after the current block is
+finished. These terminator instructions typically yield a '<tt>void</tt>'
+value: they produce control flow, not values (the one exception being
+the '<a href="#i_invoke"><tt>invoke</tt></a>' instruction).</p>
+<p>There are six different terminator instructions: the '<a
+ href="#i_ret"><tt>ret</tt></a>' instruction, the '<a href="#i_br"><tt>br</tt></a>'
+instruction, the '<a href="#i_switch"><tt>switch</tt></a>' instruction,
+the '<a href="#i_invoke"><tt>invoke</tt></a>' instruction, the '<a
+ href="#i_unwind"><tt>unwind</tt></a>' instruction, and the '<a
+ href="#i_unreachable"><tt>unreachable</tt></a>' instruction.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_ret">'<tt>ret</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> ret &lt;type&gt; &lt;value&gt; <i>; Return a value from a non-void function</i>
+ ret void <i>; Return from void function</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>ret</tt>' instruction is used to return control flow (and a
+value) from a function back to the caller.</p>
+<p>There are two forms of the '<tt>ret</tt>' instruction: one that
+returns a value and then causes control flow, and one that just causes
+control flow to occur.</p>
+<h5>Arguments:</h5>
+<p>The '<tt>ret</tt>' instruction may return any '<a
+ href="#t_firstclass">first class</a>' type. Notice that a function is
+not <a href="#wellformed">well formed</a> if there exists a '<tt>ret</tt>'
+instruction inside of the function that returns a value that does not
+match the return type of the function.</p>
+<h5>Semantics:</h5>
+<p>When the '<tt>ret</tt>' instruction is executed, control flow
+returns back to the calling function's context. If the caller is a "<a
+ href="#i_call"><tt>call</tt></a>" instruction, execution continues at
+the instruction after the call. If the caller was an "<a
+ href="#i_invoke"><tt>invoke</tt></a>" instruction, execution continues
+at the beginning of the "normal" destination block. If the instruction
+returns a value, that value shall set the call or invoke instruction's
+return value.</p>
+<h5>Example:</h5>
+<pre> ret i32 5 <i>; Return an integer value of 5</i>
+ ret void <i>; Return from a void function</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_br">'<tt>br</tt>' Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> br i1 &lt;cond&gt;, label &lt;iftrue&gt;, label &lt;iffalse&gt;<br> br label &lt;dest&gt; <i>; Unconditional branch</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>br</tt>' instruction is used to cause control flow to
+transfer to a different basic block in the current function. There are
+two forms of this instruction, corresponding to a conditional branch
+and an unconditional branch.</p>
+<h5>Arguments:</h5>
+<p>The conditional branch form of the '<tt>br</tt>' instruction takes a
+single '<tt>i1</tt>' value and two '<tt>label</tt>' values. The
+unconditional form of the '<tt>br</tt>' instruction takes a single
+'<tt>label</tt>' value as a target.</p>
+<h5>Semantics:</h5>
+<p>Upon execution of a conditional '<tt>br</tt>' instruction, the '<tt>i1</tt>'
+argument is evaluated. If the value is <tt>true</tt>, control flows
+to the '<tt>iftrue</tt>' <tt>label</tt> argument. If "cond" is <tt>false</tt>,
+control flows to the '<tt>iffalse</tt>' <tt>label</tt> argument.</p>
+<h5>Example:</h5>
+<pre>Test:<br> %cond = <a href="#i_icmp">icmp</a> eq, i32 %a, %b<br> br i1 %cond, label %IfEqual, label %IfUnequal<br>IfEqual:<br> <a
+ href="#i_ret">ret</a> i32 1<br>IfUnequal:<br> <a href="#i_ret">ret</a> i32 0<br></pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_switch">'<tt>switch</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+<h5>Syntax:</h5>
+
+<pre>
+ switch &lt;intty&gt; &lt;value&gt;, label &lt;defaultdest&gt; [ &lt;intty&gt; &lt;val&gt;, label &lt;dest&gt; ... ]
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>switch</tt>' instruction is used to transfer control flow to one of
+several different places. It is a generalization of the '<tt>br</tt>'
+instruction, allowing a branch to occur to one of many possible
+destinations.</p>
+
+
+<h5>Arguments:</h5>
+
+<p>The '<tt>switch</tt>' instruction uses three parameters: an integer
+comparison value '<tt>value</tt>', a default '<tt>label</tt>' destination, and
+an array of pairs of comparison value constants and '<tt>label</tt>'s. The
+table is not allowed to contain duplicate constant entries.</p>
+
+<h5>Semantics:</h5>
+
+<p>The <tt>switch</tt> instruction specifies a table of values and
+destinations. When the '<tt>switch</tt>' instruction is executed, this
+table is searched for the given value. If the value is found, control flow is
+transfered to the corresponding destination; otherwise, control flow is
+transfered to the default destination.</p>
+
+<h5>Implementation:</h5>
+
+<p>Depending on properties of the target machine and the particular
+<tt>switch</tt> instruction, this instruction may be code generated in different
+ways. For example, it could be generated as a series of chained conditional
+branches or with a lookup table.</p>
+
+<h5>Example:</h5>
+
+<pre>
+ <i>; Emulate a conditional br instruction</i>
+ %Val = <a href="#i_zext">zext</a> i1 %value to i32
+ switch i32 %Val, label %truedest [i32 0, label %falsedest ]
+
+ <i>; Emulate an unconditional br instruction</i>
+ switch i32 0, label %dest [ ]
+
+ <i>; Implement a jump table:</i>
+ switch i32 %val, label %otherwise [ i32 0, label %onzero
+ i32 1, label %onone
+ i32 2, label %ontwo ]
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_invoke">'<tt>invoke</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ &lt;result&gt; = invoke [<a href="#callingconv">cconv</a>] &lt;ptr to function ty&gt; %&lt;function ptr val&gt;(&lt;function args&gt;)
+ to label &lt;normal label&gt; unwind label &lt;exception label&gt;
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>invoke</tt>' instruction causes control to transfer to a specified
+function, with the possibility of control flow transfer to either the
+'<tt>normal</tt>' label or the
+'<tt>exception</tt>' label. If the callee function returns with the
+"<tt><a href="#i_ret">ret</a></tt>" instruction, control flow will return to the
+"normal" label. If the callee (or any indirect callees) returns with the "<a
+href="#i_unwind"><tt>unwind</tt></a>" instruction, control is interrupted and
+continued at the dynamically nearest "exception" label.</p>
+
+<h5>Arguments:</h5>
+
+<p>This instruction requires several arguments:</p>
+
+<ol>
+ <li>
+ The optional "cconv" marker indicates which <a href="#callingconv">calling
+ convention</a> the call should use. If none is specified, the call defaults
+ to using C calling conventions.
+ </li>
+ <li>'<tt>ptr to function ty</tt>': shall be the signature of the pointer to
+ function value being invoked. In most cases, this is a direct function
+ invocation, but indirect <tt>invoke</tt>s are just as possible, branching off
+ an arbitrary pointer to function value.
+ </li>
+
+ <li>'<tt>function ptr val</tt>': An LLVM value containing a pointer to a
+ function to be invoked. </li>
+
+ <li>'<tt>function args</tt>': argument list whose types match the function
+ signature argument types. If the function signature indicates the function
+ accepts a variable number of arguments, the extra arguments can be
+ specified. </li>
+
+ <li>'<tt>normal label</tt>': the label reached when the called function
+ executes a '<tt><a href="#i_ret">ret</a></tt>' instruction. </li>
+
+ <li>'<tt>exception label</tt>': the label reached when a callee returns with
+ the <a href="#i_unwind"><tt>unwind</tt></a> instruction. </li>
+
+</ol>
+
+<h5>Semantics:</h5>
+
+<p>This instruction is designed to operate as a standard '<tt><a
+href="#i_call">call</a></tt>' instruction in most regards. The primary
+difference is that it establishes an association with a label, which is used by
+the runtime library to unwind the stack.</p>
+
+<p>This instruction is used in languages with destructors to ensure that proper
+cleanup is performed in the case of either a <tt>longjmp</tt> or a thrown
+exception. Additionally, this is important for implementation of
+'<tt>catch</tt>' clauses in high-level languages that support them.</p>
+
+<h5>Example:</h5>
+<pre>
+ %retval = invoke i32 %Test(i32 15) to label %Continue
+ unwind label %TestCleanup <i>; {i32}:retval set</i>
+ %retval = invoke <a href="#callingconv">coldcc</a> i32 %Test(i32 15) to label %Continue
+ unwind label %TestCleanup <i>; {i32}:retval set</i>
+</pre>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_subsubsection"> <a name="i_unwind">'<tt>unwind</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ unwind
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>unwind</tt>' instruction unwinds the stack, continuing control flow
+at the first callee in the dynamic call stack which used an <a
+href="#i_invoke"><tt>invoke</tt></a> instruction to perform the call. This is
+primarily used to implement exception handling.</p>
+
+<h5>Semantics:</h5>
+
+<p>The '<tt>unwind</tt>' intrinsic causes execution of the current function to
+immediately halt. The dynamic call stack is then searched for the first <a
+href="#i_invoke"><tt>invoke</tt></a> instruction on the call stack. Once found,
+execution continues at the "exceptional" destination block specified by the
+<tt>invoke</tt> instruction. If there is no <tt>invoke</tt> instruction in the
+dynamic call chain, undefined behavior results.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_subsubsection"> <a name="i_unreachable">'<tt>unreachable</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ unreachable
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>unreachable</tt>' instruction has no defined semantics. This
+instruction is used to inform the optimizer that a particular portion of the
+code is not reachable. This can be used to indicate that the code after a
+no-return function cannot be reached, and other facts.</p>
+
+<h5>Semantics:</h5>
+
+<p>The '<tt>unreachable</tt>' instruction has no defined semantics.</p>
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="binaryops">Binary Operations</a> </div>
+<div class="doc_text">
+<p>Binary operators are used to do most of the computation in a
+program. They require two operands, execute an operation on them, and
+produce a single value. The operands might represent
+multiple data, as is the case with the <a href="#t_vector">vector</a> data type.
+The result value of a binary operator is not
+necessarily the same type as its operands.</p>
+<p>There are several different binary operators:</p>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_add">'<tt>add</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = add &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>add</tt>' instruction returns the sum of its two operands.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>add</tt>' instruction must be either <a
+ href="#t_integer">integer</a> or <a href="#t_floating">floating point</a> values.
+ This instruction can also take <a href="#t_vector">vector</a> versions of the values.
+Both arguments must have identical types.</p>
+<h5>Semantics:</h5>
+<p>The value produced is the integer or floating point sum of the two
+operands.</p>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = add i32 4, %var <i>; yields {i32}:result = 4 + %var</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_sub">'<tt>sub</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = sub &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>sub</tt>' instruction returns the difference of its two
+operands.</p>
+<p>Note that the '<tt>sub</tt>' instruction is used to represent the '<tt>neg</tt>'
+instruction present in most other intermediate representations.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>sub</tt>' instruction must be either <a
+ href="#t_integer">integer</a> or <a href="#t_floating">floating point</a>
+values.
+This instruction can also take <a href="#t_vector">vector</a> versions of the values.
+Both arguments must have identical types.</p>
+<h5>Semantics:</h5>
+<p>The value produced is the integer or floating point difference of
+the two operands.</p>
+<h5>Example:</h5>
+<pre>
+ &lt;result&gt; = sub i32 4, %var <i>; yields {i32}:result = 4 - %var</i>
+ &lt;result&gt; = sub i32 0, %val <i>; yields {i32}:result = -%var</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_mul">'<tt>mul</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = mul &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>mul</tt>' instruction returns the product of its two
+operands.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>mul</tt>' instruction must be either <a
+ href="#t_integer">integer</a> or <a href="#t_floating">floating point</a>
+values.
+This instruction can also take <a href="#t_vector">vector</a> versions of the values.
+Both arguments must have identical types.</p>
+<h5>Semantics:</h5>
+<p>The value produced is the integer or floating point product of the
+two operands.</p>
+<p>Because the operands are the same width, the result of an integer
+multiplication is the same whether the operands should be deemed unsigned or
+signed.</p>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = mul i32 4, %var <i>; yields {i32}:result = 4 * %var</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_udiv">'<tt>udiv</tt>' Instruction
+</a></div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = udiv &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>udiv</tt>' instruction returns the quotient of its two
+operands.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>udiv</tt>' instruction must be
+<a href="#t_integer">integer</a> values. Both arguments must have identical
+types. This instruction can also take <a href="#t_vector">vector</a> versions
+of the values in which case the elements must be integers.</p>
+<h5>Semantics:</h5>
+<p>The value produced is the unsigned integer quotient of the two operands. This
+instruction always performs an unsigned division operation, regardless of
+whether the arguments are unsigned or not.</p>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = udiv i32 4, %var <i>; yields {i32}:result = 4 / %var</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_sdiv">'<tt>sdiv</tt>' Instruction
+</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = sdiv &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>sdiv</tt>' instruction returns the quotient of its two
+operands.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>sdiv</tt>' instruction must be
+<a href="#t_integer">integer</a> values. Both arguments must have identical
+types. This instruction can also take <a href="#t_vector">vector</a> versions
+of the values in which case the elements must be integers.</p>
+<h5>Semantics:</h5>
+<p>The value produced is the signed integer quotient of the two operands. This
+instruction always performs a signed division operation, regardless of whether
+the arguments are signed or not.</p>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = sdiv i32 4, %var <i>; yields {i32}:result = 4 / %var</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_fdiv">'<tt>fdiv</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = fdiv &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>fdiv</tt>' instruction returns the quotient of its two
+operands.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>fdiv</tt>' instruction must be
+<a href="#t_floating">floating point</a> values. Both arguments must have
+identical types. This instruction can also take <a href="#t_vector">vector</a>
+versions of floating point values.</p>
+<h5>Semantics:</h5>
+<p>The value produced is the floating point quotient of the two operands.</p>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = fdiv float 4.0, %var <i>; yields {float}:result = 4.0 / %var</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_urem">'<tt>urem</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = urem &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>urem</tt>' instruction returns the remainder from the
+unsigned division of its two arguments.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>urem</tt>' instruction must be
+<a href="#t_integer">integer</a> values. Both arguments must have identical
+types.</p>
+<h5>Semantics:</h5>
+<p>This instruction returns the unsigned integer <i>remainder</i> of a division.
+This instruction always performs an unsigned division to get the remainder,
+regardless of whether the arguments are unsigned or not.</p>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = urem i32 4, %var <i>; yields {i32}:result = 4 % %var</i>
+</pre>
+
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_srem">'<tt>srem</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = srem &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>srem</tt>' instruction returns the remainder from the
+signed division of its two operands.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>srem</tt>' instruction must be
+<a href="#t_integer">integer</a> values. Both arguments must have identical
+types.</p>
+<h5>Semantics:</h5>
+<p>This instruction returns the <i>remainder</i> of a division (where the result
+has the same sign as the dividend, <tt>var1</tt>), not the <i>modulo</i>
+operator (where the result has the same sign as the divisor, <tt>var2</tt>) of
+a value. For more information about the difference, see <a
+ href="http://mathforum.org/dr.math/problems/anne.4.28.99.html">The
+Math Forum</a>. For a table of how this is implemented in various languages,
+please see <a href="http://en.wikipedia.org/wiki/Modulo_operation">
+Wikipedia: modulo operation</a>.</p>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = srem i32 4, %var <i>; yields {i32}:result = 4 % %var</i>
+</pre>
+
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_frem">'<tt>frem</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = frem &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>frem</tt>' instruction returns the remainder from the
+division of its two operands.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>frem</tt>' instruction must be
+<a href="#t_floating">floating point</a> values. Both arguments must have
+identical types.</p>
+<h5>Semantics:</h5>
+<p>This instruction returns the <i>remainder</i> of a division.</p>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = frem float 4.0, %var <i>; yields {float}:result = 4.0 % %var</i>
+</pre>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="bitwiseops">Bitwise Binary
+Operations</a> </div>
+<div class="doc_text">
+<p>Bitwise binary operators are used to do various forms of
+bit-twiddling in a program. They are generally very efficient
+instructions and can commonly be strength reduced from other
+instructions. They require two operands, execute an operation on them,
+and produce a single value. The resulting value of the bitwise binary
+operators is always the same type as its first operand.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_shl">'<tt>shl</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = shl &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>shl</tt>' instruction returns the first operand shifted to
+the left a specified number of bits.</p>
+<h5>Arguments:</h5>
+<p>Both arguments to the '<tt>shl</tt>' instruction must be the same <a
+ href="#t_integer">integer</a> type.</p>
+<h5>Semantics:</h5>
+<p>The value produced is <tt>var1</tt> * 2<sup><tt>var2</tt></sup>.</p>
+<h5>Example:</h5><pre>
+ &lt;result&gt; = shl i32 4, %var <i>; yields {i32}: 4 &lt;&lt; %var</i>
+ &lt;result&gt; = shl i32 4, 2 <i>; yields {i32}: 16</i>
+ &lt;result&gt; = shl i32 1, 10 <i>; yields {i32}: 1024</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_lshr">'<tt>lshr</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = lshr &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>lshr</tt>' instruction (logical shift right) returns the first
+operand shifted to the right a specified number of bits with zero fill.</p>
+
+<h5>Arguments:</h5>
+<p>Both arguments to the '<tt>lshr</tt>' instruction must be the same
+<a href="#t_integer">integer</a> type.</p>
+
+<h5>Semantics:</h5>
+<p>This instruction always performs a logical shift right operation. The most
+significant bits of the result will be filled with zero bits after the
+shift.</p>
+
+<h5>Example:</h5>
+<pre>
+ &lt;result&gt; = lshr i32 4, 1 <i>; yields {i32}:result = 2</i>
+ &lt;result&gt; = lshr i32 4, 2 <i>; yields {i32}:result = 1</i>
+ &lt;result&gt; = lshr i8 4, 3 <i>; yields {i8}:result = 0</i>
+ &lt;result&gt; = lshr i8 -2, 1 <i>; yields {i8}:result = 0x7FFFFFFF </i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_ashr">'<tt>ashr</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = ashr &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>ashr</tt>' instruction (arithmetic shift right) returns the first
+operand shifted to the right a specified number of bits with sign extension.</p>
+
+<h5>Arguments:</h5>
+<p>Both arguments to the '<tt>ashr</tt>' instruction must be the same
+<a href="#t_integer">integer</a> type.</p>
+
+<h5>Semantics:</h5>
+<p>This instruction always performs an arithmetic shift right operation,
+The most significant bits of the result will be filled with the sign bit
+of <tt>var1</tt>.</p>
+
+<h5>Example:</h5>
+<pre>
+ &lt;result&gt; = ashr i32 4, 1 <i>; yields {i32}:result = 2</i>
+ &lt;result&gt; = ashr i32 4, 2 <i>; yields {i32}:result = 1</i>
+ &lt;result&gt; = ashr i8 4, 3 <i>; yields {i8}:result = 0</i>
+ &lt;result&gt; = ashr i8 -2, 1 <i>; yields {i8}:result = -1</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_and">'<tt>and</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = and &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>and</tt>' instruction returns the bitwise logical and of
+its two operands.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>and</tt>' instruction must be <a
+ href="#t_integer">integer</a> values. Both arguments must have
+identical types.</p>
+<h5>Semantics:</h5>
+<p>The truth table used for the '<tt>and</tt>' instruction is:</p>
+<p> </p>
+<div style="align: center">
+<table border="1" cellspacing="0" cellpadding="4">
+ <tbody>
+ <tr>
+ <td>In0</td>
+ <td>In1</td>
+ <td>Out</td>
+ </tr>
+ <tr>
+ <td>0</td>
+ <td>0</td>
+ <td>0</td>
+ </tr>
+ <tr>
+ <td>0</td>
+ <td>1</td>
+ <td>0</td>
+ </tr>
+ <tr>
+ <td>1</td>
+ <td>0</td>
+ <td>0</td>
+ </tr>
+ <tr>
+ <td>1</td>
+ <td>1</td>
+ <td>1</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = and i32 4, %var <i>; yields {i32}:result = 4 &amp; %var</i>
+ &lt;result&gt; = and i32 15, 40 <i>; yields {i32}:result = 8</i>
+ &lt;result&gt; = and i32 4, 8 <i>; yields {i32}:result = 0</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_or">'<tt>or</tt>' Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = or &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>or</tt>' instruction returns the bitwise logical inclusive
+or of its two operands.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>or</tt>' instruction must be <a
+ href="#t_integer">integer</a> values. Both arguments must have
+identical types.</p>
+<h5>Semantics:</h5>
+<p>The truth table used for the '<tt>or</tt>' instruction is:</p>
+<p> </p>
+<div style="align: center">
+<table border="1" cellspacing="0" cellpadding="4">
+ <tbody>
+ <tr>
+ <td>In0</td>
+ <td>In1</td>
+ <td>Out</td>
+ </tr>
+ <tr>
+ <td>0</td>
+ <td>0</td>
+ <td>0</td>
+ </tr>
+ <tr>
+ <td>0</td>
+ <td>1</td>
+ <td>1</td>
+ </tr>
+ <tr>
+ <td>1</td>
+ <td>0</td>
+ <td>1</td>
+ </tr>
+ <tr>
+ <td>1</td>
+ <td>1</td>
+ <td>1</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = or i32 4, %var <i>; yields {i32}:result = 4 | %var</i>
+ &lt;result&gt; = or i32 15, 40 <i>; yields {i32}:result = 47</i>
+ &lt;result&gt; = or i32 4, 8 <i>; yields {i32}:result = 12</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_xor">'<tt>xor</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = xor &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {ty}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>xor</tt>' instruction returns the bitwise logical exclusive
+or of its two operands. The <tt>xor</tt> is used to implement the
+"one's complement" operation, which is the "~" operator in C.</p>
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>xor</tt>' instruction must be <a
+ href="#t_integer">integer</a> values. Both arguments must have
+identical types.</p>
+<h5>Semantics:</h5>
+<p>The truth table used for the '<tt>xor</tt>' instruction is:</p>
+<p> </p>
+<div style="align: center">
+<table border="1" cellspacing="0" cellpadding="4">
+ <tbody>
+ <tr>
+ <td>In0</td>
+ <td>In1</td>
+ <td>Out</td>
+ </tr>
+ <tr>
+ <td>0</td>
+ <td>0</td>
+ <td>0</td>
+ </tr>
+ <tr>
+ <td>0</td>
+ <td>1</td>
+ <td>1</td>
+ </tr>
+ <tr>
+ <td>1</td>
+ <td>0</td>
+ <td>1</td>
+ </tr>
+ <tr>
+ <td>1</td>
+ <td>1</td>
+ <td>0</td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<p> </p>
+<h5>Example:</h5>
+<pre> &lt;result&gt; = xor i32 4, %var <i>; yields {i32}:result = 4 ^ %var</i>
+ &lt;result&gt; = xor i32 15, 40 <i>; yields {i32}:result = 39</i>
+ &lt;result&gt; = xor i32 4, 8 <i>; yields {i32}:result = 12</i>
+ &lt;result&gt; = xor i32 %V, -1 <i>; yields {i32}:result = ~%V</i>
+</pre>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="vectorops">Vector Operations</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM supports several instructions to represent vector operations in a
+target-independent manner. These instructions cover the element-access and
+vector-specific operations needed to process vectors effectively. While LLVM
+does directly support these vector operations, many sophisticated algorithms
+will want to use target-specific intrinsics to take full advantage of a specific
+target.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_extractelement">'<tt>extractelement</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ &lt;result&gt; = extractelement &lt;n x &lt;ty&gt;&gt; &lt;val&gt;, i32 &lt;idx&gt; <i>; yields &lt;ty&gt;</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>extractelement</tt>' instruction extracts a single scalar
+element from a vector at a specified index.
+</p>
+
+
+<h5>Arguments:</h5>
+
+<p>
+The first operand of an '<tt>extractelement</tt>' instruction is a
+value of <a href="#t_vector">vector</a> type. The second operand is
+an index indicating the position from which to extract the element.
+The index may be a variable.</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The result is a scalar of the same type as the element type of
+<tt>val</tt>. Its value is the value at position <tt>idx</tt> of
+<tt>val</tt>. If <tt>idx</tt> exceeds the length of <tt>val</tt>, the
+results are undefined.
+</p>
+
+<h5>Example:</h5>
+
+<pre>
+ %result = extractelement &lt;4 x i32&gt; %vec, i32 0 <i>; yields i32</i>
+</pre>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_insertelement">'<tt>insertelement</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ &lt;result&gt; = insertelement &lt;n x &lt;ty&gt;&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt, i32 &lt;idx&gt; <i>; yields &lt;n x &lt;ty&gt;&gt;</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>insertelement</tt>' instruction inserts a scalar
+element into a vector at a specified index.
+</p>
+
+
+<h5>Arguments:</h5>
+
+<p>
+The first operand of an '<tt>insertelement</tt>' instruction is a
+value of <a href="#t_vector">vector</a> type. The second operand is a
+scalar value whose type must equal the element type of the first
+operand. The third operand is an index indicating the position at
+which to insert the value. The index may be a variable.</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The result is a vector of the same type as <tt>val</tt>. Its
+element values are those of <tt>val</tt> except at position
+<tt>idx</tt>, where it gets the value <tt>elt</tt>. If <tt>idx</tt>
+exceeds the length of <tt>val</tt>, the results are undefined.
+</p>
+
+<h5>Example:</h5>
+
+<pre>
+ %result = insertelement &lt;4 x i32&gt; %vec, i32 1, i32 0 <i>; yields &lt;4 x i32&gt;</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_shufflevector">'<tt>shufflevector</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ &lt;result&gt; = shufflevector &lt;n x &lt;ty&gt;&gt; &lt;v1&gt;, &lt;n x &lt;ty&gt;&gt; &lt;v2&gt;, &lt;n x i32&gt; &lt;mask&gt; <i>; yields &lt;n x &lt;ty&gt;&gt;</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>shufflevector</tt>' instruction constructs a permutation of elements
+from two input vectors, returning a vector of the same type.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The first two operands of a '<tt>shufflevector</tt>' instruction are vectors
+with types that match each other and types that match the result of the
+instruction. The third argument is a shuffle mask, which has the same number
+of elements as the other vector type, but whose element type is always 'i32'.
+</p>
+
+<p>
+The shuffle mask operand is required to be a constant vector with either
+constant integer or undef values.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The elements of the two input vectors are numbered from left to right across
+both of the vectors. The shuffle mask operand specifies, for each element of
+the result vector, which element of the two input registers the result element
+gets. The element selector may be undef (meaning "don't care") and the second
+operand may be undef if performing a shuffle from only one vector.
+</p>
+
+<h5>Example:</h5>
+
+<pre>
+ %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2,
+ &lt;4 x i32&gt; &lt;i32 0, i32 4, i32 1, i32 5&gt; <i>; yields &lt;4 x i32&gt;</i>
+ %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; undef,
+ &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt; <i>; yields &lt;4 x i32&gt;</i> - Identity shuffle.
+</pre>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="memoryops">Memory Access and Addressing Operations</a>
+</div>
+
+<div class="doc_text">
+
+<p>A key design point of an SSA-based representation is how it
+represents memory. In LLVM, no memory locations are in SSA form, which
+makes things very simple. This section describes how to read, write,
+allocate, and free memory in LLVM.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_malloc">'<tt>malloc</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ &lt;result&gt; = malloc &lt;type&gt;[, i32 &lt;NumElements&gt;][, align &lt;alignment&gt;] <i>; yields {type*}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>malloc</tt>' instruction allocates memory from the system
+heap and returns a pointer to it.</p>
+
+<h5>Arguments:</h5>
+
+<p>The '<tt>malloc</tt>' instruction allocates
+<tt>sizeof(&lt;type&gt;)*NumElements</tt>
+bytes of memory from the operating system and returns a pointer of the
+appropriate type to the program. If "NumElements" is specified, it is the
+number of elements allocated. If an alignment is specified, the value result
+of the allocation is guaranteed to be aligned to at least that boundary. If
+not specified, or if zero, the target can choose to align the allocation on any
+convenient boundary.</p>
+
+<p>'<tt>type</tt>' must be a sized type.</p>
+
+<h5>Semantics:</h5>
+
+<p>Memory is allocated using the system "<tt>malloc</tt>" function, and
+a pointer is returned.</p>
+
+<h5>Example:</h5>
+
+<pre>
+ %array = malloc [4 x i8 ] <i>; yields {[%4 x i8]*}:array</i>
+
+ %size = <a href="#i_add">add</a> i32 2, 2 <i>; yields {i32}:size = i32 4</i>
+ %array1 = malloc i8, i32 4 <i>; yields {i8*}:array1</i>
+ %array2 = malloc [12 x i8], i32 %size <i>; yields {[12 x i8]*}:array2</i>
+ %array3 = malloc i32, i32 4, align 1024 <i>; yields {i32*}:array3</i>
+ %array4 = malloc i32, align 1024 <i>; yields {i32*}:array4</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_free">'<tt>free</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ free &lt;type&gt; &lt;value&gt; <i>; yields {void}</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>free</tt>' instruction returns memory back to the unused
+memory heap to be reallocated in the future.</p>
+
+<h5>Arguments:</h5>
+
+<p>'<tt>value</tt>' shall be a pointer value that points to a value
+that was allocated with the '<tt><a href="#i_malloc">malloc</a></tt>'
+instruction.</p>
+
+<h5>Semantics:</h5>
+
+<p>Access to the memory pointed to by the pointer is no longer defined
+after this instruction executes.</p>
+
+<h5>Example:</h5>
+
+<pre>
+ %array = <a href="#i_malloc">malloc</a> [4 x i8] <i>; yields {[4 x i8]*}:array</i>
+ free [4 x i8]* %array
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_alloca">'<tt>alloca</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ &lt;result&gt; = alloca &lt;type&gt;[, i32 &lt;NumElements&gt;][, align &lt;alignment&gt;] <i>; yields {type*}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>alloca</tt>' instruction allocates memory on the stack frame of the
+currently executing function, to be automatically released when this function
+returns to its caller.</p>
+
+<h5>Arguments:</h5>
+
+<p>The '<tt>alloca</tt>' instruction allocates <tt>sizeof(&lt;type&gt;)*NumElements</tt>
+bytes of memory on the runtime stack, returning a pointer of the
+appropriate type to the program. If "NumElements" is specified, it is the
+number of elements allocated. If an alignment is specified, the value result
+of the allocation is guaranteed to be aligned to at least that boundary. If
+not specified, or if zero, the target can choose to align the allocation on any
+convenient boundary.</p>
+
+<p>'<tt>type</tt>' may be any sized type.</p>
+
+<h5>Semantics:</h5>
+
+<p>Memory is allocated; a pointer is returned. '<tt>alloca</tt>'d
+memory is automatically released when the function returns. The '<tt>alloca</tt>'
+instruction is commonly used to represent automatic variables that must
+have an address available. When the function returns (either with the <tt><a
+ href="#i_ret">ret</a></tt> or <tt><a href="#i_unwind">unwind</a></tt>
+instructions), the memory is reclaimed.</p>
+
+<h5>Example:</h5>
+
+<pre>
+ %ptr = alloca i32 <i>; yields {i32*}:ptr</i>
+ %ptr = alloca i32, i32 4 <i>; yields {i32*}:ptr</i>
+ %ptr = alloca i32, i32 4, align 1024 <i>; yields {i32*}:ptr</i>
+ %ptr = alloca i32, align 1024 <i>; yields {i32*}:ptr</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_load">'<tt>load</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;]<br> &lt;result&gt; = volatile load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;]<br></pre>
+<h5>Overview:</h5>
+<p>The '<tt>load</tt>' instruction is used to read from memory.</p>
+<h5>Arguments:</h5>
+<p>The argument to the '<tt>load</tt>' instruction specifies the memory
+address from which to load. The pointer must point to a <a
+ href="#t_firstclass">first class</a> type. If the <tt>load</tt> is
+marked as <tt>volatile</tt>, then the optimizer is not allowed to modify
+the number or order of execution of this <tt>load</tt> with other
+volatile <tt>load</tt> and <tt><a href="#i_store">store</a></tt>
+instructions. </p>
+<h5>Semantics:</h5>
+<p>The location of memory pointed to is loaded.</p>
+<h5>Examples:</h5>
+<pre> %ptr = <a href="#i_alloca">alloca</a> i32 <i>; yields {i32*}:ptr</i>
+ <a
+ href="#i_store">store</a> i32 3, i32* %ptr <i>; yields {void}</i>
+ %val = load i32* %ptr <i>; yields {i32}:val = i32 3</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_store">'<tt>store</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> store &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;] <i>; yields {void}</i>
+ volatile store &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;] <i>; yields {void}</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>store</tt>' instruction is used to write to memory.</p>
+<h5>Arguments:</h5>
+<p>There are two arguments to the '<tt>store</tt>' instruction: a value
+to store and an address at which to store it. The type of the '<tt>&lt;pointer&gt;</tt>'
+operand must be a pointer to the type of the '<tt>&lt;value&gt;</tt>'
+operand. If the <tt>store</tt> is marked as <tt>volatile</tt>, then the
+optimizer is not allowed to modify the number or order of execution of
+this <tt>store</tt> with other volatile <tt>load</tt> and <tt><a
+ href="#i_store">store</a></tt> instructions.</p>
+<h5>Semantics:</h5>
+<p>The contents of memory are updated to contain '<tt>&lt;value&gt;</tt>'
+at the location specified by the '<tt>&lt;pointer&gt;</tt>' operand.</p>
+<h5>Example:</h5>
+<pre> %ptr = <a href="#i_alloca">alloca</a> i32 <i>; yields {i32*}:ptr</i>
+ <a
+ href="#i_store">store</a> i32 3, i32* %ptr <i>; yields {void}</i>
+ %val = load i32* %ptr <i>; yields {i32}:val = i32 3</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_getelementptr">'<tt>getelementptr</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = getelementptr &lt;ty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>getelementptr</tt>' instruction is used to get the address of a
+subelement of an aggregate data structure.</p>
+
+<h5>Arguments:</h5>
+
+<p>This instruction takes a list of integer operands that indicate what
+elements of the aggregate object to index to. The actual types of the arguments
+provided depend on the type of the first pointer argument. The
+'<tt>getelementptr</tt>' instruction is used to index down through the type
+levels of a structure or to a specific index in an array. When indexing into a
+structure, only <tt>i32</tt> integer constants are allowed. When indexing
+into an array or pointer, only integers of 32 or 64 bits are allowed, and will
+be sign extended to 64-bit values.</p>
+
+<p>For example, let's consider a C code fragment and how it gets
+compiled to LLVM:</p>
+
+<div class="doc_code">
+<pre>
+struct RT {
+ char A;
+ int B[10][20];
+ char C;
+};
+struct ST {
+ int X;
+ double Y;
+ struct RT Z;
+};
+
+int *foo(struct ST *s) {
+ return &amp;s[1].Z.B[5][13];
+}
+</pre>
+</div>
+
+<p>The LLVM code generated by the GCC frontend is:</p>
+
+<div class="doc_code">
+<pre>
+%RT = type { i8 , [10 x [20 x i32]], i8 }
+%ST = type { i32, double, %RT }
+
+define i32* %foo(%ST* %s) {
+entry:
+ %reg = getelementptr %ST* %s, i32 1, i32 2, i32 1, i32 5, i32 13
+ ret i32* %reg
+}
+</pre>
+</div>
+
+<h5>Semantics:</h5>
+
+<p>The index types specified for the '<tt>getelementptr</tt>' instruction depend
+on the pointer type that is being indexed into. <a href="#t_pointer">Pointer</a>
+and <a href="#t_array">array</a> types can use a 32-bit or 64-bit
+<a href="#t_integer">integer</a> type but the value will always be sign extended
+to 64-bits. <a href="#t_struct">Structure</a> types require <tt>i32</tt>
+<b>constants</b>.</p>
+
+<p>In the example above, the first index is indexing into the '<tt>%ST*</tt>'
+type, which is a pointer, yielding a '<tt>%ST</tt>' = '<tt>{ i32, double, %RT
+}</tt>' type, a structure. The second index indexes into the third element of
+the structure, yielding a '<tt>%RT</tt>' = '<tt>{ i8 , [10 x [20 x i32]],
+i8 }</tt>' type, another structure. The third index indexes into the second
+element of the structure, yielding a '<tt>[10 x [20 x i32]]</tt>' type, an
+array. The two dimensions of the array are subscripted into, yielding an
+'<tt>i32</tt>' type. The '<tt>getelementptr</tt>' instruction returns a pointer
+to this element, thus computing a value of '<tt>i32*</tt>' type.</p>
+
+<p>Note that it is perfectly legal to index partially through a
+structure, returning a pointer to an inner element. Because of this,
+the LLVM code for the given testcase is equivalent to:</p>
+
+<pre>
+ define i32* %foo(%ST* %s) {
+ %t1 = getelementptr %ST* %s, i32 1 <i>; yields %ST*:%t1</i>
+ %t2 = getelementptr %ST* %t1, i32 0, i32 2 <i>; yields %RT*:%t2</i>
+ %t3 = getelementptr %RT* %t2, i32 0, i32 1 <i>; yields [10 x [20 x i32]]*:%t3</i>
+ %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5 <i>; yields [20 x i32]*:%t4</i>
+ %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13 <i>; yields i32*:%t5</i>
+ ret i32* %t5
+ }
+</pre>
+
+<p>Note that it is undefined to access an array out of bounds: array and
+pointer indexes must always be within the defined bounds of the array type.
+The one exception for this rules is zero length arrays. These arrays are
+defined to be accessible as variable length arrays, which requires access
+beyond the zero'th element.</p>
+
+<p>The getelementptr instruction is often confusing. For some more insight
+into how it works, see <a href="GetElementPtr.html">the getelementptr
+FAQ</a>.</p>
+
+<h5>Example:</h5>
+
+<pre>
+ <i>; yields [12 x i8]*:aptr</i>
+ %aptr = getelementptr {i32, [12 x i8]}* %sptr, i64 0, i32 1
+</pre>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="convertops">Conversion Operations</a>
+</div>
+<div class="doc_text">
+<p>The instructions in this category are the conversion instructions (casting)
+which all take a single operand and a type. They perform various bit conversions
+on the operand.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_trunc">'<tt>trunc .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = trunc &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>
+The '<tt>trunc</tt>' instruction truncates its operand to the type <tt>ty2</tt>.
+</p>
+
+<h5>Arguments:</h5>
+<p>
+The '<tt>trunc</tt>' instruction takes a <tt>value</tt> to trunc, which must
+be an <a href="#t_integer">integer</a> type, and a type that specifies the size
+and type of the result, which must be an <a href="#t_integer">integer</a>
+type. The bit size of <tt>value</tt> must be larger than the bit size of
+<tt>ty2</tt>. Equal sized types are not allowed.</p>
+
+<h5>Semantics:</h5>
+<p>
+The '<tt>trunc</tt>' instruction truncates the high order bits in <tt>value</tt>
+and converts the remaining bits to <tt>ty2</tt>. Since the source size must be
+larger than the destination size, <tt>trunc</tt> cannot be a <i>no-op cast</i>.
+It will always truncate bits.</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = trunc i32 257 to i8 <i>; yields i8:1</i>
+ %Y = trunc i32 123 to i1 <i>; yields i1:true</i>
+ %Y = trunc i32 122 to i1 <i>; yields i1:false</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_zext">'<tt>zext .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = zext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>zext</tt>' instruction zero extends its operand to type
+<tt>ty2</tt>.</p>
+
+
+<h5>Arguments:</h5>
+<p>The '<tt>zext</tt>' instruction takes a value to cast, which must be of
+<a href="#t_integer">integer</a> type, and a type to cast it to, which must
+also be of <a href="#t_integer">integer</a> type. The bit size of the
+<tt>value</tt> must be smaller than the bit size of the destination type,
+<tt>ty2</tt>.</p>
+
+<h5>Semantics:</h5>
+<p>The <tt>zext</tt> fills the high order bits of the <tt>value</tt> with zero
+bits until it reaches the size of the destination type, <tt>ty2</tt>.</p>
+
+<p>When zero extending from i1, the result will always be either 0 or 1.</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = zext i32 257 to i64 <i>; yields i64:257</i>
+ %Y = zext i1 true to i32 <i>; yields i32:1</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_sext">'<tt>sext .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = sext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>sext</tt>' sign extends <tt>value</tt> to the type <tt>ty2</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>
+The '<tt>sext</tt>' instruction takes a value to cast, which must be of
+<a href="#t_integer">integer</a> type, and a type to cast it to, which must
+also be of <a href="#t_integer">integer</a> type. The bit size of the
+<tt>value</tt> must be smaller than the bit size of the destination type,
+<tt>ty2</tt>.</p>
+
+<h5>Semantics:</h5>
+<p>
+The '<tt>sext</tt>' instruction performs a sign extension by copying the sign
+bit (highest order bit) of the <tt>value</tt> until it reaches the bit size of
+the type <tt>ty2</tt>.</p>
+
+<p>When sign extending from i1, the extension always results in -1 or 0.</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = sext i8 -1 to i16 <i>; yields i16 :65535</i>
+ %Y = sext i1 true to i32 <i>; yields i32:-1</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_fptrunc">'<tt>fptrunc .. to</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ &lt;result&gt; = fptrunc &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fptrunc</tt>' instruction truncates <tt>value</tt> to type
+<tt>ty2</tt>.</p>
+
+
+<h5>Arguments:</h5>
+<p>The '<tt>fptrunc</tt>' instruction takes a <a href="#t_floating">floating
+ point</a> value to cast and a <a href="#t_floating">floating point</a> type to
+cast it to. The size of <tt>value</tt> must be larger than the size of
+<tt>ty2</tt>. This implies that <tt>fptrunc</tt> cannot be used to make a
+<i>no-op cast</i>.</p>
+
+<h5>Semantics:</h5>
+<p> The '<tt>fptrunc</tt>' instruction truncates a <tt>value</tt> from a larger
+<a href="#t_floating">floating point</a> type to a smaller
+<a href="#t_floating">floating point</a> type. If the value cannot fit within
+the destination type, <tt>ty2</tt>, then the results are undefined.</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = fptrunc double 123.0 to float <i>; yields float:123.0</i>
+ %Y = fptrunc double 1.0E+300 to float <i>; yields undefined</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_fpext">'<tt>fpext .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = fpext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fpext</tt>' extends a floating point <tt>value</tt> to a larger
+floating point value.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>fpext</tt>' instruction takes a
+<a href="#t_floating">floating point</a> <tt>value</tt> to cast,
+and a <a href="#t_floating">floating point</a> type to cast it to. The source
+type must be smaller than the destination type.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>fpext</tt>' instruction extends the <tt>value</tt> from a smaller
+<a href="#t_floating">floating point</a> type to a larger
+<a href="#t_floating">floating point</a> type. The <tt>fpext</tt> cannot be
+used to make a <i>no-op cast</i> because it always changes bits. Use
+<tt>bitcast</tt> to make a <i>no-op cast</i> for a floating point cast.</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = fpext float 3.1415 to double <i>; yields double:3.1415</i>
+ %Y = fpext float 1.0 to float <i>; yields float:1.0 (no-op)</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_fptoui">'<tt>fptoui .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = fp2uint &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fp2uint</tt>' converts a floating point <tt>value</tt> to its
+unsigned integer equivalent of type <tt>ty2</tt>.
+</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>fp2uint</tt>' instruction takes a value to cast, which must be a
+<a href="#t_floating">floating point</a> value, and a type to cast it to, which
+must be an <a href="#t_integer">integer</a> type.</p>
+
+<h5>Semantics:</h5>
+<p> The '<tt>fp2uint</tt>' instruction converts its
+<a href="#t_floating">floating point</a> operand into the nearest (rounding
+towards zero) unsigned integer value. If the value cannot fit in <tt>ty2</tt>,
+the results are undefined.</p>
+
+<p>When converting to i1, the conversion is done as a comparison against
+zero. If the <tt>value</tt> was zero, the i1 result will be <tt>false</tt>.
+If the <tt>value</tt> was non-zero, the i1 result will be <tt>true</tt>.</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = fp2uint double 123.0 to i32 <i>; yields i32:123</i>
+ %Y = fp2uint float 1.0E+300 to i1 <i>; yields i1:true</i>
+ %X = fp2uint float 1.04E+17 to i8 <i>; yields undefined:1</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_fptosi">'<tt>fptosi .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = fptosi &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fptosi</tt>' instruction converts
+<a href="#t_floating">floating point</a> <tt>value</tt> to type <tt>ty2</tt>.
+</p>
+
+
+<h5>Arguments:</h5>
+<p> The '<tt>fptosi</tt>' instruction takes a value to cast, which must be a
+<a href="#t_floating">floating point</a> value, and a type to cast it to, which
+must also be an <a href="#t_integer">integer</a> type.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>fptosi</tt>' instruction converts its
+<a href="#t_floating">floating point</a> operand into the nearest (rounding
+towards zero) signed integer value. If the value cannot fit in <tt>ty2</tt>,
+the results are undefined.</p>
+
+<p>When converting to i1, the conversion is done as a comparison against
+zero. If the <tt>value</tt> was zero, the i1 result will be <tt>false</tt>.
+If the <tt>value</tt> was non-zero, the i1 result will be <tt>true</tt>.</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = fptosi double -123.0 to i32 <i>; yields i32:-123</i>
+ %Y = fptosi float 1.0E-247 to i1 <i>; yields i1:true</i>
+ %X = fptosi float 1.04E+17 to i8 <i>; yields undefined:1</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_uitofp">'<tt>uitofp .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = uitofp &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>uitofp</tt>' instruction regards <tt>value</tt> as an unsigned
+integer and converts that value to the <tt>ty2</tt> type.</p>
+
+
+<h5>Arguments:</h5>
+<p>The '<tt>uitofp</tt>' instruction takes a value to cast, which must be an
+<a href="#t_integer">integer</a> value, and a type to cast it to, which must
+be a <a href="#t_floating">floating point</a> type.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>uitofp</tt>' instruction interprets its operand as an unsigned
+integer quantity and converts it to the corresponding floating point value. If
+the value cannot fit in the floating point value, the results are undefined.</p>
+
+
+<h5>Example:</h5>
+<pre>
+ %X = uitofp i32 257 to float <i>; yields float:257.0</i>
+ %Y = uitofp i8 -1 to double <i>; yields double:255.0</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_sitofp">'<tt>sitofp .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = sitofp &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>sitofp</tt>' instruction regards <tt>value</tt> as a signed
+integer and converts that value to the <tt>ty2</tt> type.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>sitofp</tt>' instruction takes a value to cast, which must be an
+<a href="#t_integer">integer</a> value, and a type to cast it to, which must be
+a <a href="#t_floating">floating point</a> type.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>sitofp</tt>' instruction interprets its operand as a signed
+integer quantity and converts it to the corresponding floating point value. If
+the value cannot fit in the floating point value, the results are undefined.</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = sitofp i32 257 to float <i>; yields float:257.0</i>
+ %Y = sitofp i8 -1 to double <i>; yields double:-1.0</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_ptrtoint">'<tt>ptrtoint .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = ptrtoint &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>ptrtoint</tt>' instruction converts the pointer <tt>value</tt> to
+the integer type <tt>ty2</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>ptrtoint</tt>' instruction takes a <tt>value</tt> to cast, which
+must be a <a href="#t_pointer">pointer</a> value, and a type to cast it to
+<tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> type.
+
+<h5>Semantics:</h5>
+<p>The '<tt>ptrtoint</tt>' instruction converts <tt>value</tt> to integer type
+<tt>ty2</tt> by interpreting the pointer value as an integer and either
+truncating or zero extending that value to the size of the integer type. If
+<tt>value</tt> is smaller than <tt>ty2</tt> then a zero extension is done. If
+<tt>value</tt> is larger than <tt>ty2</tt> then a truncation is done. If they
+are the same size, then nothing is done (<i>no-op cast</i>) other than a type
+change.</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = ptrtoint i32* %X to i8 <i>; yields truncation on 32-bit architecture</i>
+ %Y = ptrtoint i32* %x to i64 <i>; yields zero extension on 32-bit architecture</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_inttoptr">'<tt>inttoptr .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = inttoptr &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>inttoptr</tt>' instruction converts an integer <tt>value</tt> to
+a pointer type, <tt>ty2</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>inttoptr</tt>' instruction takes an <a href="#t_integer">integer</a>
+value to cast, and a type to cast it to, which must be a
+<a href="#t_pointer">pointer</a> type.
+
+<h5>Semantics:</h5>
+<p>The '<tt>inttoptr</tt>' instruction converts <tt>value</tt> to type
+<tt>ty2</tt> by applying either a zero extension or a truncation depending on
+the size of the integer <tt>value</tt>. If <tt>value</tt> is larger than the
+size of a pointer then a truncation is done. If <tt>value</tt> is smaller than
+the size of a pointer then a zero extension is done. If they are the same size,
+nothing is done (<i>no-op cast</i>).</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = inttoptr i32 255 to i32* <i>; yields zero extension on 64-bit architecture</i>
+ %X = inttoptr i32 255 to i32* <i>; yields no-op on 32-bit architecture</i>
+ %Y = inttoptr i64 0 to i32* <i>; yields truncation on 32-bit architecture</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_bitcast">'<tt>bitcast .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = bitcast &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt; <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
+<tt>ty2</tt> without changing any bits.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>bitcast</tt>' instruction takes a value to cast, which must be
+a first class value, and a type to cast it to, which must also be a <a
+ href="#t_firstclass">first class</a> type. The bit sizes of <tt>value</tt>
+and the destination type, <tt>ty2</tt>, must be identical. If the source
+type is a pointer, the destination type must also be a pointer.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
+<tt>ty2</tt>. It is always a <i>no-op cast</i> because no bits change with
+this conversion. The conversion is done as if the <tt>value</tt> had been
+stored to memory and read back as type <tt>ty2</tt>. Pointer types may only be
+converted to other pointer types with this instruction. To convert pointers to
+other types, use the <a href="#i_inttoptr">inttoptr</a> or
+<a href="#i_ptrtoint">ptrtoint</a> instructions first.</p>
+
+<h5>Example:</h5>
+<pre>
+ %X = bitcast i8 255 to i8 <i>; yields i8 :-1</i>
+ %Y = bitcast i32* %x to sint* <i>; yields sint*:%x</i>
+ %Z = bitcast <2xint> %V to i64; <i>; yields i64: %V</i>
+</pre>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="otherops">Other Operations</a> </div>
+<div class="doc_text">
+<p>The instructions in this category are the "miscellaneous"
+instructions, which defy better classification.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="i_icmp">'<tt>icmp</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = icmp &lt;cond&gt; &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {i1}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>icmp</tt>' instruction returns a boolean value based on comparison
+of its two integer operands.</p>
+<h5>Arguments:</h5>
+<p>The '<tt>icmp</tt>' instruction takes three operands. The first operand is
+the condition code indicating the kind of comparison to perform. It is not
+a value, just a keyword. The possible condition code are:
+<ol>
+ <li><tt>eq</tt>: equal</li>
+ <li><tt>ne</tt>: not equal </li>
+ <li><tt>ugt</tt>: unsigned greater than</li>
+ <li><tt>uge</tt>: unsigned greater or equal</li>
+ <li><tt>ult</tt>: unsigned less than</li>
+ <li><tt>ule</tt>: unsigned less or equal</li>
+ <li><tt>sgt</tt>: signed greater than</li>
+ <li><tt>sge</tt>: signed greater or equal</li>
+ <li><tt>slt</tt>: signed less than</li>
+ <li><tt>sle</tt>: signed less or equal</li>
+</ol>
+<p>The remaining two arguments must be <a href="#t_integer">integer</a> or
+<a href="#t_pointer">pointer</a> typed. They must also be identical types.</p>
+<h5>Semantics:</h5>
+<p>The '<tt>icmp</tt>' compares <tt>var1</tt> and <tt>var2</tt> according to
+the condition code given as <tt>cond</tt>. The comparison performed always
+yields a <a href="#t_primitive">i1</a> result, as follows:
+<ol>
+ <li><tt>eq</tt>: yields <tt>true</tt> if the operands are equal,
+ <tt>false</tt> otherwise. No sign interpretation is necessary or performed.
+ </li>
+ <li><tt>ne</tt>: yields <tt>true</tt> if the operands are unequal,
+ <tt>false</tt> otherwise. No sign interpretation is necessary or performed.
+ <li><tt>ugt</tt>: interprets the operands as unsigned values and yields
+ <tt>true</tt> if <tt>var1</tt> is greater than <tt>var2</tt>.</li>
+ <li><tt>uge</tt>: interprets the operands as unsigned values and yields
+ <tt>true</tt> if <tt>var1</tt> is greater than or equal to <tt>var2</tt>.</li>
+ <li><tt>ult</tt>: interprets the operands as unsigned values and yields
+ <tt>true</tt> if <tt>var1</tt> is less than <tt>var2</tt>.</li>
+ <li><tt>ule</tt>: interprets the operands as unsigned values and yields
+ <tt>true</tt> if <tt>var1</tt> is less than or equal to <tt>var2</tt>.</li>
+ <li><tt>sgt</tt>: interprets the operands as signed values and yields
+ <tt>true</tt> if <tt>var1</tt> is greater than <tt>var2</tt>.</li>
+ <li><tt>sge</tt>: interprets the operands as signed values and yields
+ <tt>true</tt> if <tt>var1</tt> is greater than or equal to <tt>var2</tt>.</li>
+ <li><tt>slt</tt>: interprets the operands as signed values and yields
+ <tt>true</tt> if <tt>var1</tt> is less than <tt>var2</tt>.</li>
+ <li><tt>sle</tt>: interprets the operands as signed values and yields
+ <tt>true</tt> if <tt>var1</tt> is less than or equal to <tt>var2</tt>.</li>
+</ol>
+<p>If the operands are <a href="#t_pointer">pointer</a> typed, the pointer
+values are compared as if they were integers.</p>
+
+<h5>Example:</h5>
+<pre> &lt;result&gt; = icmp eq i32 4, 5 <i>; yields: result=false</i>
+ &lt;result&gt; = icmp ne float* %X, %X <i>; yields: result=false</i>
+ &lt;result&gt; = icmp ult i16 4, 5 <i>; yields: result=true</i>
+ &lt;result&gt; = icmp sgt i16 4, 5 <i>; yields: result=false</i>
+ &lt;result&gt; = icmp ule i16 -4, 5 <i>; yields: result=false</i>
+ &lt;result&gt; = icmp sge i16 4, 5 <i>; yields: result=false</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="i_fcmp">'<tt>fcmp</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = fcmp &lt;cond&gt; &lt;ty&gt; &lt;var1&gt;, &lt;var2&gt; <i>; yields {i1}:result</i>
+</pre>
+<h5>Overview:</h5>
+<p>The '<tt>fcmp</tt>' instruction returns a boolean value based on comparison
+of its floating point operands.</p>
+<h5>Arguments:</h5>
+<p>The '<tt>fcmp</tt>' instruction takes three operands. The first operand is
+the condition code indicating the kind of comparison to perform. It is not
+a value, just a keyword. The possible condition code are:
+<ol>
+ <li><tt>false</tt>: no comparison, always returns false</li>
+ <li><tt>oeq</tt>: ordered and equal</li>
+ <li><tt>ogt</tt>: ordered and greater than </li>
+ <li><tt>oge</tt>: ordered and greater than or equal</li>
+ <li><tt>olt</tt>: ordered and less than </li>
+ <li><tt>ole</tt>: ordered and less than or equal</li>
+ <li><tt>one</tt>: ordered and not equal</li>
+ <li><tt>ord</tt>: ordered (no nans)</li>
+ <li><tt>ueq</tt>: unordered or equal</li>
+ <li><tt>ugt</tt>: unordered or greater than </li>
+ <li><tt>uge</tt>: unordered or greater than or equal</li>
+ <li><tt>ult</tt>: unordered or less than </li>
+ <li><tt>ule</tt>: unordered or less than or equal</li>
+ <li><tt>une</tt>: unordered or not equal</li>
+ <li><tt>uno</tt>: unordered (either nans)</li>
+ <li><tt>true</tt>: no comparison, always returns true</li>
+</ol>
+<p><i>Ordered</i> means that neither operand is a QNAN while
+<i>unordered</i> means that either operand may be a QNAN.</p>
+<p>The <tt>val1</tt> and <tt>val2</tt> arguments must be
+<a href="#t_floating">floating point</a> typed. They must have identical
+types.</p>
+<h5>Semantics:</h5>
+<p>The '<tt>fcmp</tt>' compares <tt>var1</tt> and <tt>var2</tt> according to
+the condition code given as <tt>cond</tt>. The comparison performed always
+yields a <a href="#t_primitive">i1</a> result, as follows:
+<ol>
+ <li><tt>false</tt>: always yields <tt>false</tt>, regardless of operands.</li>
+ <li><tt>oeq</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+ <tt>var1</tt> is equal to <tt>var2</tt>.</li>
+ <li><tt>ogt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+ <tt>var1</tt> is greather than <tt>var2</tt>.</li>
+ <li><tt>oge</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+ <tt>var1</tt> is greater than or equal to <tt>var2</tt>.</li>
+ <li><tt>olt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+ <tt>var1</tt> is less than <tt>var2</tt>.</li>
+ <li><tt>ole</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+ <tt>var1</tt> is less than or equal to <tt>var2</tt>.</li>
+ <li><tt>one</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+ <tt>var1</tt> is not equal to <tt>var2</tt>.</li>
+ <li><tt>ord</tt>: yields <tt>true</tt> if both operands are not a QNAN.</li>
+ <li><tt>ueq</tt>: yields <tt>true</tt> if either operand is a QNAN or
+ <tt>var1</tt> is equal to <tt>var2</tt>.</li>
+ <li><tt>ugt</tt>: yields <tt>true</tt> if either operand is a QNAN or
+ <tt>var1</tt> is greater than <tt>var2</tt>.</li>
+ <li><tt>uge</tt>: yields <tt>true</tt> if either operand is a QNAN or
+ <tt>var1</tt> is greater than or equal to <tt>var2</tt>.</li>
+ <li><tt>ult</tt>: yields <tt>true</tt> if either operand is a QNAN or
+ <tt>var1</tt> is less than <tt>var2</tt>.</li>
+ <li><tt>ule</tt>: yields <tt>true</tt> if either operand is a QNAN or
+ <tt>var1</tt> is less than or equal to <tt>var2</tt>.</li>
+ <li><tt>une</tt>: yields <tt>true</tt> if either operand is a QNAN or
+ <tt>var1</tt> is not equal to <tt>var2</tt>.</li>
+ <li><tt>uno</tt>: yields <tt>true</tt> if either operand is a QNAN.</li>
+ <li><tt>true</tt>: always yields <tt>true</tt>, regardless of operands.</li>
+</ol>
+
+<h5>Example:</h5>
+<pre> &lt;result&gt; = fcmp oeq float 4.0, 5.0 <i>; yields: result=false</i>
+ &lt;result&gt; = icmp one float 4.0, 5.0 <i>; yields: result=true</i>
+ &lt;result&gt; = icmp olt float 4.0, 5.0 <i>; yields: result=true</i>
+ &lt;result&gt; = icmp ueq double 1.0, 2.0 <i>; yields: result=false</i>
+</pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_phi">'<tt>phi</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> &lt;result&gt; = phi &lt;ty&gt; [ &lt;val0&gt;, &lt;label0&gt;], ...<br></pre>
+<h5>Overview:</h5>
+<p>The '<tt>phi</tt>' instruction is used to implement the &#966; node in
+the SSA graph representing the function.</p>
+<h5>Arguments:</h5>
+<p>The type of the incoming values is specified with the first type
+field. After this, the '<tt>phi</tt>' instruction takes a list of pairs
+as arguments, with one pair for each predecessor basic block of the
+current block. Only values of <a href="#t_firstclass">first class</a>
+type may be used as the value arguments to the PHI node. Only labels
+may be used as the label arguments.</p>
+<p>There must be no non-phi instructions between the start of a basic
+block and the PHI instructions: i.e. PHI instructions must be first in
+a basic block.</p>
+<h5>Semantics:</h5>
+<p>At runtime, the '<tt>phi</tt>' instruction logically takes on the value
+specified by the pair corresponding to the predecessor basic block that executed
+just prior to the current block.</p>
+<h5>Example:</h5>
+<pre>Loop: ; Infinite loop that counts from 0 on up...<br> %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]<br> %nextindvar = add i32 %indvar, 1<br> br label %Loop<br></pre>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_select">'<tt>select</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ &lt;result&gt; = select i1 &lt;cond&gt;, &lt;ty&gt; &lt;val1&gt;, &lt;ty&gt; &lt;val2&gt; <i>; yields ty</i>
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>select</tt>' instruction is used to choose one value based on a
+condition, without branching.
+</p>
+
+
+<h5>Arguments:</h5>
+
+<p>
+The '<tt>select</tt>' instruction requires a boolean value indicating the condition, and two values of the same <a href="#t_firstclass">first class</a> type.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+If the boolean condition evaluates to true, the instruction returns the first
+value argument; otherwise, it returns the second value argument.
+</p>
+
+<h5>Example:</h5>
+
+<pre>
+ %X = select i1 true, i8 17, i8 42 <i>; yields i8:17</i>
+</pre>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_call">'<tt>call</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ &lt;result&gt; = [tail] call [<a href="#callingconv">cconv</a>] &lt;ty&gt;* &lt;fnptrval&gt;(&lt;param list&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>call</tt>' instruction represents a simple function call.</p>
+
+<h5>Arguments:</h5>
+
+<p>This instruction requires several arguments:</p>
+
+<ol>
+ <li>
+ <p>The optional "tail" marker indicates whether the callee function accesses
+ any allocas or varargs in the caller. If the "tail" marker is present, the
+ function call is eligible for tail call optimization. Note that calls may
+ be marked "tail" even if they do not occur before a <a
+ href="#i_ret"><tt>ret</tt></a> instruction.
+ </li>
+ <li>
+ <p>The optional "cconv" marker indicates which <a href="#callingconv">calling
+ convention</a> the call should use. If none is specified, the call defaults
+ to using C calling conventions.
+ </li>
+ <li>
+ <p>'<tt>ty</tt>': shall be the signature of the pointer to function value
+ being invoked. The argument types must match the types implied by this
+ signature. This type can be omitted if the function is not varargs and
+ if the function type does not return a pointer to a function.</p>
+ </li>
+ <li>
+ <p>'<tt>fnptrval</tt>': An LLVM value containing a pointer to a function to
+ be invoked. In most cases, this is a direct function invocation, but
+ indirect <tt>call</tt>s are just as possible, calling an arbitrary pointer
+ to function value.</p>
+ </li>
+ <li>
+ <p>'<tt>function args</tt>': argument list whose types match the
+ function signature argument types. All arguments must be of
+ <a href="#t_firstclass">first class</a> type. If the function signature
+ indicates the function accepts a variable number of arguments, the extra
+ arguments can be specified.</p>
+ </li>
+</ol>
+
+<h5>Semantics:</h5>
+
+<p>The '<tt>call</tt>' instruction is used to cause control flow to
+transfer to a specified function, with its incoming arguments bound to
+the specified values. Upon a '<tt><a href="#i_ret">ret</a></tt>'
+instruction in the called function, control flow continues with the
+instruction after the function call, and the return value of the
+function is bound to the result argument. This is a simpler case of
+the <a href="#i_invoke">invoke</a> instruction.</p>
+
+<h5>Example:</h5>
+
+<pre>
+ %retval = call i32 %test(i32 %argc)
+ call i32(i8 *, ...) *%printf(i8 * %msg, i32 12, i8 42);
+ %X = tail call i32 %foo()
+ %Y = tail call <a href="#callingconv">fastcc</a> i32 %foo()
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="i_va_arg">'<tt>va_arg</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ &lt;resultval&gt; = va_arg &lt;va_list*&gt; &lt;arglist&gt;, &lt;argty&gt;
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>va_arg</tt>' instruction is used to access arguments passed through
+the "variable argument" area of a function call. It is used to implement the
+<tt>va_arg</tt> macro in C.</p>
+
+<h5>Arguments:</h5>
+
+<p>This instruction takes a <tt>va_list*</tt> value and the type of
+the argument. It returns a value of the specified argument type and
+increments the <tt>va_list</tt> to point to the next argument. The
+actual type of <tt>va_list</tt> is target specific.</p>
+
+<h5>Semantics:</h5>
+
+<p>The '<tt>va_arg</tt>' instruction loads an argument of the specified
+type from the specified <tt>va_list</tt> and causes the
+<tt>va_list</tt> to point to the next argument. For more information,
+see the variable argument handling <a href="#int_varargs">Intrinsic
+Functions</a>.</p>
+
+<p>It is legal for this instruction to be called in a function which does not
+take a variable number of arguments, for example, the <tt>vfprintf</tt>
+function.</p>
+
+<p><tt>va_arg</tt> is an LLVM instruction instead of an <a
+href="#intrinsics">intrinsic function</a> because it takes a type as an
+argument.</p>
+
+<h5>Example:</h5>
+
+<p>See the <a href="#int_varargs">variable argument processing</a> section.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="intrinsics">Intrinsic Functions</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM supports the notion of an "intrinsic function". These functions have
+well known names and semantics and are required to follow certain restrictions.
+Overall, these intrinsics represent an extension mechanism for the LLVM
+language that does not require changing all of the transformations in LLVM when
+adding to the language (or the bitcode reader/writer, the parser, etc...).</p>
+
+<p>Intrinsic function names must all start with an "<tt>llvm.</tt>" prefix. This
+prefix is reserved in LLVM for intrinsic names; thus, function names may not
+begin with this prefix. Intrinsic functions must always be external functions:
+you cannot define the body of intrinsic functions. Intrinsic functions may
+only be used in call or invoke instructions: it is illegal to take the address
+of an intrinsic function. Additionally, because intrinsic functions are part
+of the LLVM language, it is required if any are added that they be documented
+here.</p>
+
+<p>Some intrinsic functions can be overloaded, i.e., the intrinsic represents
+a family of functions that perform the same operation but on different data
+types. This is most frequent with the integer types. Since LLVM can represent
+over 8 million different integer types, there is a way to declare an intrinsic
+that can be overloaded based on its arguments. Such an intrinsic will have the
+names of its argument types encoded into its function name, each
+preceded by a period. For example, the <tt>llvm.ctpop</tt> function can take an
+integer of any width. This leads to a family of functions such as
+<tt>i32 @llvm.ctpop.i8(i8 %val)</tt> and <tt>i32 @llvm.ctpop.i29(i29 %val)</tt>.
+</p>
+
+
+<p>To learn how to add an intrinsic function, please see the
+<a href="ExtendingLLVM.html">Extending LLVM Guide</a>.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="int_varargs">Variable Argument Handling Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>Variable argument support is defined in LLVM with the <a
+ href="#i_va_arg"><tt>va_arg</tt></a> instruction and these three
+intrinsic functions. These functions are related to the similarly
+named macros defined in the <tt>&lt;stdarg.h&gt;</tt> header file.</p>
+
+<p>All of these functions operate on arguments that use a
+target-specific value type "<tt>va_list</tt>". The LLVM assembly
+language reference manual does not define what this type is, so all
+transformations should be prepared to handle these functions regardless of
+the type used.</p>
+
+<p>This example shows how the <a href="#i_va_arg"><tt>va_arg</tt></a>
+instruction and the variable argument handling intrinsic functions are
+used.</p>
+
+<div class="doc_code">
+<pre>
+define i32 @test(i32 %X, ...) {
+ ; Initialize variable argument processing
+ %ap = alloca i8*
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ ; Read a single integer argument
+ %tmp = va_arg i8** %ap, i32
+
+ ; Demonstrate usage of llvm.va_copy and llvm.va_end
+ %aq = alloca i8*
+ %aq2 = bitcast i8** %aq to i8*
+ call void @llvm.va_copy(i8* %aq2, i8* %ap2)
+ call void @llvm.va_end(i8* %aq2)
+
+ ; Stop processing of arguments.
+ call void @llvm.va_end(i8* %ap2)
+ ret i32 %tmp
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_va_start">'<tt>llvm.va_start</tt>' Intrinsic</a>
+</div>
+
+
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> declare void %llvm.va_start(i8* &lt;arglist&gt;)<br></pre>
+<h5>Overview:</h5>
+<P>The '<tt>llvm.va_start</tt>' intrinsic initializes
+<tt>*&lt;arglist&gt;</tt> for subsequent use by <tt><a
+href="#i_va_arg">va_arg</a></tt>.</p>
+
+<h5>Arguments:</h5>
+
+<P>The argument is a pointer to a <tt>va_list</tt> element to initialize.</p>
+
+<h5>Semantics:</h5>
+
+<P>The '<tt>llvm.va_start</tt>' intrinsic works just like the <tt>va_start</tt>
+macro available in C. In a target-dependent way, it initializes the
+<tt>va_list</tt> element to which the argument points, so that the next call to
+<tt>va_arg</tt> will produce the first variable argument passed to the function.
+Unlike the C <tt>va_start</tt> macro, this intrinsic does not need to know the
+last argument of the function as the compiler can figure that out.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_va_end">'<tt>llvm.va_end</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre> declare void @llvm.va_end(i8* &lt;arglist&gt;)<br></pre>
+<h5>Overview:</h5>
+
+<p>The '<tt>llvm.va_end</tt>' intrinsic destroys <tt>*&lt;arglist&gt;</tt>,
+which has been initialized previously with <tt><a href="#int_va_start">llvm.va_start</a></tt>
+or <tt><a href="#i_va_copy">llvm.va_copy</a></tt>.</p>
+
+<h5>Arguments:</h5>
+
+<p>The argument is a pointer to a <tt>va_list</tt> to destroy.</p>
+
+<h5>Semantics:</h5>
+
+<p>The '<tt>llvm.va_end</tt>' intrinsic works just like the <tt>va_end</tt>
+macro available in C. In a target-dependent way, it destroys the
+<tt>va_list</tt> element to which the argument points. Calls to <a
+href="#int_va_start"><tt>llvm.va_start</tt></a> and <a href="#int_va_copy">
+<tt>llvm.va_copy</tt></a> must be matched exactly with calls to
+<tt>llvm.va_end</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_va_copy">'<tt>llvm.va_copy</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ declare void @llvm.va_copy(i8* &lt;destarglist&gt;, i8* &lt;srcarglist&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>llvm.va_copy</tt>' intrinsic copies the current argument position
+from the source argument list to the destination argument list.</p>
+
+<h5>Arguments:</h5>
+
+<p>The first argument is a pointer to a <tt>va_list</tt> element to initialize.
+The second argument is a pointer to a <tt>va_list</tt> element to copy from.</p>
+
+
+<h5>Semantics:</h5>
+
+<p>The '<tt>llvm.va_copy</tt>' intrinsic works just like the <tt>va_copy</tt>
+macro available in C. In a target-dependent way, it copies the source
+<tt>va_list</tt> element into the destination <tt>va_list</tt> element. This
+intrinsic is necessary because the <tt><a href="#int_va_start">
+llvm.va_start</a></tt> intrinsic may be arbitrarily complex and require, for
+example, memory allocation.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="int_gc">Accurate Garbage Collection Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+LLVM support for <a href="GarbageCollection.html">Accurate Garbage
+Collection</a> requires the implementation and generation of these intrinsics.
+These intrinsics allow identification of <a href="#int_gcroot">GC roots on the
+stack</a>, as well as garbage collector implementations that require <a
+href="#int_gcread">read</a> and <a href="#int_gcwrite">write</a> barriers.
+Front-ends for type-safe garbage collected languages should generate these
+intrinsics to make use of the LLVM garbage collectors. For more details, see <a
+href="GarbageCollection.html">Accurate Garbage Collection with LLVM</a>.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_gcroot">'<tt>llvm.gcroot</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ declare void @llvm.gcroot(&lt;ty&gt;** %ptrloc, &lt;ty2&gt;* %metadata)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>llvm.gcroot</tt>' intrinsic declares the existence of a GC root to
+the code generator, and allows some metadata to be associated with it.</p>
+
+<h5>Arguments:</h5>
+
+<p>The first argument specifies the address of a stack object that contains the
+root pointer. The second pointer (which must be either a constant or a global
+value address) contains the meta-data to be associated with the root.</p>
+
+<h5>Semantics:</h5>
+
+<p>At runtime, a call to this intrinsics stores a null pointer into the "ptrloc"
+location. At compile-time, the code generator generates information to allow
+the runtime to find the pointer at GC safe points.
+</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_gcread">'<tt>llvm.gcread</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ declare i8 * @llvm.gcread(i8 * %ObjPtr, i8 ** %Ptr)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>llvm.gcread</tt>' intrinsic identifies reads of references from heap
+locations, allowing garbage collector implementations that require read
+barriers.</p>
+
+<h5>Arguments:</h5>
+
+<p>The second argument is the address to read from, which should be an address
+allocated from the garbage collector. The first object is a pointer to the
+start of the referenced object, if needed by the language runtime (otherwise
+null).</p>
+
+<h5>Semantics:</h5>
+
+<p>The '<tt>llvm.gcread</tt>' intrinsic has the same semantics as a load
+instruction, but may be replaced with substantially more complex code by the
+garbage collector runtime, as needed.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_gcwrite">'<tt>llvm.gcwrite</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+
+<pre>
+ declare void @llvm.gcwrite(i8 * %P1, i8 * %Obj, i8 ** %P2)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>llvm.gcwrite</tt>' intrinsic identifies writes of references to heap
+locations, allowing garbage collector implementations that require write
+barriers (such as generational or reference counting collectors).</p>
+
+<h5>Arguments:</h5>
+
+<p>The first argument is the reference to store, the second is the start of the
+object to store it to, and the third is the address of the field of Obj to
+store to. If the runtime does not require a pointer to the object, Obj may be
+null.</p>
+
+<h5>Semantics:</h5>
+
+<p>The '<tt>llvm.gcwrite</tt>' intrinsic has the same semantics as a store
+instruction, but may be replaced with substantially more complex code by the
+garbage collector runtime, as needed.</p>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="int_codegen">Code Generator Intrinsics</a>
+</div>
+
+<div class="doc_text">
+<p>
+These intrinsics are provided by LLVM to expose special features that may only
+be implemented with code generator support.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_returnaddress">'<tt>llvm.returnaddress</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare i8 *@llvm.returnaddress(i32 &lt;level&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.returnaddress</tt>' intrinsic attempts to compute a
+target-specific value indicating the return address of the current function
+or one of its callers.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The argument to this intrinsic indicates which function to return the address
+for. Zero indicates the calling function, one indicates its caller, etc. The
+argument is <b>required</b> to be a constant integer value.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The '<tt>llvm.returnaddress</tt>' intrinsic either returns a pointer indicating
+the return address of the specified call frame, or zero if it cannot be
+identified. The value returned by this intrinsic is likely to be incorrect or 0
+for arguments other than zero, so it should only be used for debugging purposes.
+</p>
+
+<p>
+Note that calling this intrinsic does not prevent function inlining or other
+aggressive transformations, so the value returned may not be that of the obvious
+source-language caller.
+</p>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_frameaddress">'<tt>llvm.frameaddress</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare i8 *@llvm.frameaddress(i32 &lt;level&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.frameaddress</tt>' intrinsic attempts to return the
+target-specific frame pointer value for the specified stack frame.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The argument to this intrinsic indicates which function to return the frame
+pointer for. Zero indicates the calling function, one indicates its caller,
+etc. The argument is <b>required</b> to be a constant integer value.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The '<tt>llvm.frameaddress</tt>' intrinsic either returns a pointer indicating
+the frame address of the specified call frame, or zero if it cannot be
+identified. The value returned by this intrinsic is likely to be incorrect or 0
+for arguments other than zero, so it should only be used for debugging purposes.
+</p>
+
+<p>
+Note that calling this intrinsic does not prevent function inlining or other
+aggressive transformations, so the value returned may not be that of the obvious
+source-language caller.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_stacksave">'<tt>llvm.stacksave</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare i8 *@llvm.stacksave()
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.stacksave</tt>' intrinsic is used to remember the current state of
+the function stack, for use with <a href="#int_stackrestore">
+<tt>llvm.stackrestore</tt></a>. This is useful for implementing language
+features like scoped automatic variable sized arrays in C99.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+This intrinsic returns a opaque pointer value that can be passed to <a
+href="#int_stackrestore"><tt>llvm.stackrestore</tt></a>. When an
+<tt>llvm.stackrestore</tt> intrinsic is executed with a value saved from
+<tt>llvm.stacksave</tt>, it effectively restores the state of the stack to the
+state it was in when the <tt>llvm.stacksave</tt> intrinsic executed. In
+practice, this pops any <a href="#i_alloca">alloca</a> blocks from the stack
+that were allocated after the <tt>llvm.stacksave</tt> was executed.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_stackrestore">'<tt>llvm.stackrestore</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare void @llvm.stackrestore(i8 * %ptr)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.stackrestore</tt>' intrinsic is used to restore the state of
+the function stack to the state it was in when the corresponding <a
+href="#int_stacksave"><tt>llvm.stacksave</tt></a> intrinsic executed. This is
+useful for implementing language features like scoped automatic variable sized
+arrays in C99.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+See the description for <a href="#int_stacksave"><tt>llvm.stacksave</tt></a>.
+</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare void @llvm.prefetch(i8 * &lt;address&gt;,
+ i32 &lt;rw&gt;, i32 &lt;locality&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+
+<p>
+The '<tt>llvm.prefetch</tt>' intrinsic is a hint to the code generator to insert
+a prefetch instruction if supported; otherwise, it is a noop. Prefetches have
+no
+effect on the behavior of the program but can change its performance
+characteristics.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+<tt>address</tt> is the address to be prefetched, <tt>rw</tt> is the specifier
+determining if the fetch should be for a read (0) or write (1), and
+<tt>locality</tt> is a temporal locality specifier ranging from (0) - no
+locality, to (3) - extremely local keep in cache. The <tt>rw</tt> and
+<tt>locality</tt> arguments must be constant integers.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+This intrinsic does not modify the behavior of the program. In particular,
+prefetches cannot trap and do not produce a value. On targets that support this
+intrinsic, the prefetch can provide hints to the processor cache for better
+performance.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare void @llvm.pcmarker( i32 &lt;id&gt; )
+</pre>
+
+<h5>Overview:</h5>
+
+
+<p>
+The '<tt>llvm.pcmarker</tt>' intrinsic is a method to export a Program Counter
+(PC) in a region of
+code to simulators and other tools. The method is target specific, but it is
+expected that the marker will use exported symbols to transmit the PC of the marker.
+The marker makes no guarantees that it will remain with any specific instruction
+after optimizations. It is possible that the presence of a marker will inhibit
+optimizations. The intended use is to be inserted after optimizations to allow
+correlations of simulation runs.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+<tt>id</tt> is a numerical id identifying the marker.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+This intrinsic does not modify the behavior of the program. Backends that do not
+support this intrinisic may ignore it.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare i64 @llvm.readcyclecounter( )
+</pre>
+
+<h5>Overview:</h5>
+
+
+<p>
+The '<tt>llvm.readcyclecounter</tt>' intrinsic provides access to the cycle
+counter register (or similar low latency, high accuracy clocks) on those targets
+that support it. On X86, it should map to RDTSC. On Alpha, it should map to RPCC.
+As the backing counters overflow quickly (on the order of 9 seconds on alpha), this
+should only be used for small timings.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+When directly supported, reading the cycle counter should not modify any memory.
+Implementations are allowed to either return a application specific value or a
+system wide value. On backends without support, this is lowered to a constant 0.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="int_libc">Standard C Library Intrinsics</a>
+</div>
+
+<div class="doc_text">
+<p>
+LLVM provides intrinsics for a few important standard C library functions.
+These intrinsics allow source-language front-ends to pass information about the
+alignment of the pointer arguments to the code generator, providing opportunity
+for more efficient code generation.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_memcpy">'<tt>llvm.memcpy</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare void @llvm.memcpy.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+ i32 &lt;len&gt;, i32 &lt;align&gt;)
+ declare void @llvm.memcpy.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+ i64 &lt;len&gt;, i32 &lt;align&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the source
+location to the destination location.
+</p>
+
+<p>
+Note that, unlike the standard libc function, the <tt>llvm.memcpy.*</tt>
+intrinsics do not return a value, and takes an extra alignment argument.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The first argument is a pointer to the destination, the second is a pointer to
+the source. The third argument is an integer argument
+specifying the number of bytes to copy, and the fourth argument is the alignment
+of the source and destination locations.
+</p>
+
+<p>
+If the call to this intrinisic has an alignment value that is not 0 or 1, then
+the caller guarantees that both the source and destination pointers are aligned
+to that boundary.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the source
+location to the destination location, which are not allowed to overlap. It
+copies "len" bytes of memory over. If the argument is known to be aligned to
+some boundary, this can be specified as the fourth argument, otherwise it should
+be set to 0 or 1.
+</p>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_memmove">'<tt>llvm.memmove</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare void @llvm.memmove.i32(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+ i32 &lt;len&gt;, i32 &lt;align&gt;)
+ declare void @llvm.memmove.i64(i8 * &lt;dest&gt;, i8 * &lt;src&gt;,
+ i64 &lt;len&gt;, i32 &lt;align&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.memmove.*</tt>' intrinsics move a block of memory from the source
+location to the destination location. It is similar to the
+'<tt>llvm.memcmp</tt>' intrinsic but allows the two memory locations to overlap.
+</p>
+
+<p>
+Note that, unlike the standard libc function, the <tt>llvm.memmove.*</tt>
+intrinsics do not return a value, and takes an extra alignment argument.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The first argument is a pointer to the destination, the second is a pointer to
+the source. The third argument is an integer argument
+specifying the number of bytes to copy, and the fourth argument is the alignment
+of the source and destination locations.
+</p>
+
+<p>
+If the call to this intrinisic has an alignment value that is not 0 or 1, then
+the caller guarantees that the source and destination pointers are aligned to
+that boundary.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The '<tt>llvm.memmove.*</tt>' intrinsics copy a block of memory from the source
+location to the destination location, which may overlap. It
+copies "len" bytes of memory over. If the argument is known to be aligned to
+some boundary, this can be specified as the fourth argument, otherwise it should
+be set to 0 or 1.
+</p>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_memset">'<tt>llvm.memset.*</tt>' Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare void @llvm.memset.i32(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
+ i32 &lt;len&gt;, i32 &lt;align&gt;)
+ declare void @llvm.memset.i64(i8 * &lt;dest&gt;, i8 &lt;val&gt;,
+ i64 &lt;len&gt;, i32 &lt;align&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.memset.*</tt>' intrinsics fill a block of memory with a particular
+byte value.
+</p>
+
+<p>
+Note that, unlike the standard libc function, the <tt>llvm.memset</tt> intrinsic
+does not return a value, and takes an extra alignment argument.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The first argument is a pointer to the destination to fill, the second is the
+byte value to fill it with, the third argument is an integer
+argument specifying the number of bytes to fill, and the fourth argument is the
+known alignment of destination location.
+</p>
+
+<p>
+If the call to this intrinisic has an alignment value that is not 0 or 1, then
+the caller guarantees that the destination pointer is aligned to that boundary.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The '<tt>llvm.memset.*</tt>' intrinsics fill "len" bytes of memory starting at
+the
+destination location. If the argument is known to be aligned to some boundary,
+this can be specified as the fourth argument, otherwise it should be set to 0 or
+1.
+</p>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_sqrt">'<tt>llvm.sqrt.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare float @llvm.sqrt.f32(float %Val)
+ declare double @llvm.sqrt.f64(double %Val)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.sqrt</tt>' intrinsics return the sqrt of the specified operand,
+returning the same value as the libm '<tt>sqrt</tt>' function would. Unlike
+<tt>sqrt</tt> in libm, however, <tt>llvm.sqrt</tt> has undefined behavior for
+negative numbers (which allows for better optimization).
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The argument and return value are floating point numbers of the same type.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+This function returns the sqrt of the specified operand if it is a nonnegative
+floating point number.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_powi">'<tt>llvm.powi.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare float @llvm.powi.f32(float %Val, i32 %power)
+ declare double @llvm.powi.f64(double %Val, i32 %power)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.powi.*</tt>' intrinsics return the first operand raised to the
+specified (positive or negative) power. The order of evaluation of
+multiplications is not defined.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The second argument is an integer power, and the first is a value to raise to
+that power.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+This function returns the first value raised to the second power with an
+unspecified sequence of rounding operations.</p>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="int_manip">Bit Manipulation Intrinsics</a>
+</div>
+
+<div class="doc_text">
+<p>
+LLVM provides intrinsics for a few important bit manipulation operations.
+These allow efficient code generation for some algorithms.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_bswap">'<tt>llvm.bswap.*</tt>' Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic function. You can use bswap on any integer
+type that is an even number of bytes (i.e. BitWidth % 16 == 0). Note the suffix
+that includes the type for the result and the operand.
+<pre>
+ declare i16 @llvm.bswap.i16.i16(i16 &lt;id&gt;)
+ declare i32 @llvm.bswap.i32.i32(i32 &lt;id&gt;)
+ declare i64 @llvm.bswap.i64.i64(i64 &lt;id&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.bswap</tt>' family of intrinsics is used to byte swap integer
+values with an even number of bytes (positive multiple of 16 bits). These are
+useful for performing operations on data that is not in the target's native
+byte order.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The <tt>llvm.bswap.16.i16</tt> intrinsic returns an i16 value that has the high
+and low byte of the input i16 swapped. Similarly, the <tt>llvm.bswap.i32</tt>
+intrinsic returns an i32 value that has the four bytes of the input i32
+swapped, so that if the input bytes are numbered 0, 1, 2, 3 then the returned
+i32 will have its bytes in 3, 2, 1, 0 order. The <tt>llvm.bswap.i48.i48</tt>,
+<tt>llvm.bswap.i64.i64</tt> and other intrinsics extend this concept to
+additional even-byte lengths (6 bytes, 8 bytes and more, respectively).
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_ctpop">'<tt>llvm.ctpop.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit
+width. Not all targets support all bit widths however.
+<pre>
+ declare i32 @llvm.ctpop.i8 (i8 &lt;src&gt;)
+ declare i32 @llvm.ctpop.i16(i16 &lt;src&gt;)
+ declare i32 @llvm.ctpop.i32(i32 &lt;src&gt;)
+ declare i32 @llvm.ctpop.i64(i64 &lt;src&gt;)
+ declare i32 @llvm.ctpop.i256(i256 &lt;src&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.ctpop</tt>' family of intrinsics counts the number of bits set in a
+value.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The only argument is the value to be counted. The argument may be of any
+integer type. The return type must match the argument type.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_ctlz">'<tt>llvm.ctlz.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any
+integer bit width. Not all targets support all bit widths however.
+<pre>
+ declare i32 @llvm.ctlz.i8 (i8 &lt;src&gt;)
+ declare i32 @llvm.ctlz.i16(i16 &lt;src&gt;)
+ declare i32 @llvm.ctlz.i32(i32 &lt;src&gt;)
+ declare i32 @llvm.ctlz.i64(i64 &lt;src&gt;)
+ declare i32 @llvm.ctlz.i256(i256 &lt;src&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.ctlz</tt>' family of intrinsic functions counts the number of
+leading zeros in a variable.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The only argument is the value to be counted. The argument may be of any
+integer type. The return type must match the argument type.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The '<tt>llvm.ctlz</tt>' intrinsic counts the leading (most significant) zeros
+in a variable. If the src == 0 then the result is the size in bits of the type
+of src. For example, <tt>llvm.ctlz(i32 2) = 30</tt>.
+</p>
+</div>
+
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any
+integer bit width. Not all targets support all bit widths however.
+<pre>
+ declare i32 @llvm.cttz.i8 (i8 &lt;src&gt;)
+ declare i32 @llvm.cttz.i16(i16 &lt;src&gt;)
+ declare i32 @llvm.cttz.i32(i32 &lt;src&gt;)
+ declare i32 @llvm.cttz.i64(i64 &lt;src&gt;)
+ declare i32 @llvm.cttz.i256(i256 &lt;src&gt;)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.cttz</tt>' family of intrinsic functions counts the number of
+trailing zeros.
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The only argument is the value to be counted. The argument may be of any
+integer type. The return type must match the argument type.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+The '<tt>llvm.cttz</tt>' intrinsic counts the trailing (least significant) zeros
+in a variable. If the src == 0 then the result is the size in bits of the type
+of src. For example, <tt>llvm.cttz(2) = 1</tt>.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_part_select">'<tt>llvm.part.select.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.part.select</tt>
+on any integer bit width.
+<pre>
+ declare i17 @llvm.part.select.i17.i17 (i17 %val, i32 %loBit, i32 %hiBit)
+ declare i29 @llvm.part.select.i29.i29 (i29 %val, i32 %loBit, i32 %hiBit)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.part.select</tt>' family of intrinsic functions selects a
+range of bits from an integer value and returns them in the same bit width as
+the original value.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument, <tt>%val</tt> and the result may be integer types of
+any bit width but they must have the same bit width. The second and third
+arguments must be <tt>i32</tt> type since they specify only a bit index.</p>
+
+<h5>Semantics:</h5>
+<p>The operation of the '<tt>llvm.part.select</tt>' intrinsic has two modes
+of operation: forwards and reverse. If <tt>%loBit</tt> is greater than
+<tt>%hiBits</tt> then the intrinsic operates in reverse mode. Otherwise it
+operates in forward mode.</p>
+<p>In forward mode, this intrinsic is the equivalent of shifting <tt>%val</tt>
+right by <tt>%loBit</tt> bits and then ANDing it with a mask with
+only the <tt>%hiBit - %loBit</tt> bits set, as follows:</p>
+<ol>
+ <li>The <tt>%val</tt> is shifted right (LSHR) by the number of bits specified
+ by <tt>%loBits</tt>. This normalizes the value to the low order bits.</li>
+ <li>The <tt>%loBits</tt> value is subtracted from the <tt>%hiBits</tt> value
+ to determine the number of bits to retain.</li>
+ <li>A mask of the retained bits is created by shifting a -1 value.</li>
+ <li>The mask is ANDed with <tt>%val</tt> to produce the result.
+</ol>
+<p>In reverse mode, a similar computation is made except that the bits are
+returned in the reverse order. So, for example, if <tt>X</tt> has the value
+<tt>i16 0x0ACF (101011001111)</tt> and we apply
+<tt>part.select(i16 X, 8, 3)</tt> to it, we get back the value
+<tt>i16 0x0026 (000000100110)</tt>.</p>
+</div>
+
+<div class="doc_subsubsection">
+ <a name="int_part_set">'<tt>llvm.part.set.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.part.set</tt>
+on any integer bit width.
+<pre>
+ declare i17 @llvm.part.set.i17.i17.i9 (i17 %val, i9 %repl, i32 %lo, i32 %hi)
+ declare i29 @llvm.part.set.i29.i29.i9 (i29 %val, i9 %repl, i32 %lo, i32 %hi)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.part.set</tt>' family of intrinsic functions replaces a range
+of bits in an integer value with another integer value. It returns the integer
+with the replaced bits.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument, <tt>%val</tt> and the result may be integer types of
+any bit width but they must have the same bit width. <tt>%val</tt> is the value
+whose bits will be replaced. The second argument, <tt>%repl</tt> may be an
+integer of any bit width. The third and fourth arguments must be <tt>i32</tt>
+type since they specify only a bit index.</p>
+
+<h5>Semantics:</h5>
+<p>The operation of the '<tt>llvm.part.set</tt>' intrinsic has two modes
+of operation: forwards and reverse. If <tt>%lo</tt> is greater than
+<tt>%hi</tt> then the intrinsic operates in reverse mode. Otherwise it
+operates in forward mode.</p>
+<p>For both modes, the <tt>%repl</tt> value is prepared for use by either
+truncating it down to the size of the replacement area or zero extending it
+up to that size.</p>
+<p>In forward mode, the bits between <tt>%lo</tt> and <tt>%hi</tt> (inclusive)
+are replaced with corresponding bits from <tt>%repl</tt>. That is the 0th bit
+in <tt>%repl</tt> replaces the <tt>%lo</tt>th bit in <tt>%val</tt> and etc. up
+to the <tt>%hi</tt>th bit.
+<p>In reverse mode, a similar computation is made except that the bits are
+reversed. That is, the <tt>0</tt>th bit in <tt>%repl</tt> replaces the
+<tt>%hi</tt> bit in <tt>%val</tt> and etc. down to the <tt>%lo</tt>th bit.
+<h5>Examples:</h5>
+<pre>
+ llvm.part.set(0xFFFF, 0, 4, 7) -&gt; 0xFF0F
+ llvm.part.set(0xFFFF, 0, 7, 4) -&gt; 0xFF0F
+ llvm.part.set(0xFFFF, 1, 7, 4) -&gt; 0xFF8F
+ llvm.part.set(0xFFFF, F, 8, 3) -&gt; 0xFFE7
+ llvm.part.set(0xFFFF, 0, 3, 8) -&gt; 0xFE07
+</pre>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="int_debugger">Debugger Intrinsics</a>
+</div>
+
+<div class="doc_text">
+<p>
+The LLVM debugger intrinsics (which all start with <tt>llvm.dbg.</tt> prefix),
+are described in the <a
+href="SourceLevelDebugging.html#format_common_intrinsics">LLVM Source Level
+Debugging</a> document.
+</p>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="int_eh">Exception Handling Intrinsics</a>
+</div>
+
+<div class="doc_text">
+<p> The LLVM exception handling intrinsics (which all start with
+<tt>llvm.eh.</tt> prefix), are described in the <a
+href="ExceptionHandling.html#format_common_intrinsics">LLVM Exception
+Handling</a> document. </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="int_general">General Intrinsics</a>
+</div>
+
+<div class="doc_text">
+<p> This class of intrinsics is designed to be generic and has
+no specific purpose. </p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_var_annotation">'<tt>llvm.var.annotation</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+ declare void @llvm.var.annotation(i8* &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32 &lt;int&gt; )
+</pre>
+
+<h5>Overview:</h5>
+
+<p>
+The '<tt>llvm.var.annotation</tt>' intrinsic
+</p>
+
+<h5>Arguments:</h5>
+
+<p>
+The first argument is a pointer to a value, the second is a pointer to a
+global string, the third is a pointer to a global string which is the source
+file name, and the last argument is the line number.
+</p>
+
+<h5>Semantics:</h5>
+
+<p>
+This intrinsic allows annotation of local variables with arbitrary strings.
+This can be useful for special purpose optimizations that want to look for these
+ annotations. These have no other defined use, they are ignored by code
+ generation and optimization.
+</div>
+
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/Lexicon.html b/docs/Lexicon.html
new file mode 100644
index 0000000..dac1ead
--- /dev/null
+++ b/docs/Lexicon.html
@@ -0,0 +1,181 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <title>The LLVM Lexicon</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+ <meta name="author" content="Various">
+ <meta name="description"
+ content="A glossary of terms used with the LLVM project.">
+</head>
+<body>
+<div class="doc_title">The LLVM Lexicon</div>
+<p class="doc_warning">NOTE: This document is a work in progress!</p>
+<!-- *********************************************************************** -->
+<div class="doc_section">Table Of Contents</div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+ <table>
+ <tr><th colspan="8"><b>- <a href="#A">A</a> -</b></th></tr>
+ <tr>
+ <td><a href="#ADCE">ADCE</a></td>
+ </tr>
+ <tr><th colspan="8"><b>- <a href="#B">B</a> -</b></th></tr>
+ <tr>
+ <td><a href="#BURS">BURS</a></td>
+ </tr>
+ <tr><th colspan="8"><b>- <a href="#C">C</a> -</b></th></tr>
+ <tr>
+ <td><a href="#CSE">CSE</a></td>
+ </tr>
+ <tr><th colspan="8"><b>- <a href="#D">D</a> -</b></th></tr>
+ <tr>
+ <td><a href="#DSA">DSA</a></td>
+ <td><a href="#DSE">DSE</a></td>
+ </tr>
+ <tr><th colspan="8"><b>- <a href="#I">I</a> -</b></th></tr>
+ <tr>
+ <td><a href="#IPA">IPA</a></td>
+ <td><a href="#IPO">IPO</a></td>
+ </tr>
+ <tr><th colspan="8"><b>- <a href="#L">L</a> -</b></th></tr>
+ <tr>
+ <td><a href="#LCSSA">LCSSA</a></td>
+ <td><a href="#LICM">LICM</a></td>
+ <td><a href="#Load-VN">Load-VN</a></td>
+ </tr>
+ <tr><th colspan="8"><b>- <a href="#P">P</a> -</b></th></tr>
+ <tr>
+ <td><a href="#PRE">PRE</a></td>
+ </tr>
+ <tr><th colspan="8"><b>- <a href="#R">R</a> -</b></th></tr>
+ <tr>
+ <td><a href="#Reassociation">Reassociation</a></td>
+ </tr>
+ <tr><th colspan="8"><b>- <a href="#S">S</a> -</b></th></tr>
+ <tr>
+ <td><a href="#SCC">SCC</a></td>
+ <td><a href="#SCCP">SCCP</a></td>
+ <td><a href="#SRoA">SRoA</a></td>
+ <td><a href="#SSA">SSA</a></td>
+ </tr>
+ </table>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">Definitions</div>
+<!-- *********************************************************************** -->
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="A">- A -</a></div>
+<div class="doc_text">
+ <dl>
+ <dt><a name="ADCE"><b>ADCE</b></a></dt>
+ <dd>Aggressive Dead Code Elimination</dd>
+ </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="B">- B -</a></div>
+<div class="doc_text">
+ <dl>
+ <dt><a name="BURS"><b>BURS</b></a></dt>
+ <dd>Bottom Up Rewriting System - A method of instruction selection for
+ code generation. An example is the <a
+href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
+ </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="C">- C -</a></div>
+<div class="doc_text">
+ <dl>
+ <dt><a name="CSE"><b>CSE</b></a></dt>
+ <dd>Common Subexpression Elimination. An optimization that removes common
+ subexpression compuation. For example <tt>(a+b)*(a+b)</tt> has two
+ subexpressions that are the same: <tt>(a+b)</tt>. This optimization would
+ perform the addition only once and then perform the multiply (but only if
+ its compulationally correct/safe).
+ </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="D">- D -</a></div>
+<div class="doc_text">
+ <dl>
+ <dt><a name="DSA"><b>DSA</b></a></dt>
+ <dd>Data Structure Analysis</dd>
+ <dt><a name="DSE"><b>DSE</b></a></dt>
+ <dd>Dead Store Elimination</dd>
+ </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="I">- I -</a></div>
+<div class="doc_text">
+ <dl>
+ <dt><a name="IPA"><b>IPA</b></a></dt>
+ <dd>Inter-Procedural Analysis. Refers to any variety of code analysis that
+ occurs between procedures, functions or compilation units (modules).</dd>
+ <dt><a name="IPO"><b>IPO</b></a></dt>
+ <dd>Inter-Procedural Optimization. Refers to any variety of code
+ optimization that occurs between procedures, functions or compilation units
+ (modules).</dd>
+ </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="L">- L -</a></div>
+<div class="doc_text">
+ <dl>
+ <dt><a name="LCSSA"><b>LCSSA</b></a></dt>
+ <dd>Loop-Closed Static Single Assignment Form</dd>
+ <dt><a name="LICM"><b>LICM</b></a></dt>
+ <dd>Loop Invariant Code Motion</dd>
+ <dt><a name="Load-VN"><b>Load-VN</b></a></dt>
+ <dd>Load Value Numbering</dd>
+ </dl>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="P">- P -</a></div>
+<div class="doc_text">
+ <dl>
+ <dt><a name="PRE"><b>PRE</b></a></dt>
+ <dd>Partial Redundancy Elimination</dd>
+ </dl>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="R">- R -</a></div>
+<div class="doc_text">
+ <dl>
+ <dt><a name="Reassociation"><b>Reassociation</b></a></dt> <dd>Rearranging
+ associative expressions to promote better redundancy elimination and other
+ optimization. For example, changing (A+B-A) into (B+A-A), permitting it to
+ be optimized into (B+0) then (B).
+ </dl>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="S">- S -</a></div>
+<div class="doc_text">
+ <dl>
+ <dt><a name="SCC"><b>SCC</b></a></dt>
+ <dd>Strongly Connected Component</dd>
+ <dt><a name="SCCP"><b>SCCP</b></a></dt>
+ <dd>Sparse Conditional Constant Propagation</dd>
+ <dt><a name="SRoA"><b>SRoA</b></a></dt>
+ <dd>Scalar Replacement of Aggregates</dd>
+ <dt><a name="SSA"><b>SSA</b></a></dt>
+ <dd>Static Single Assignment</dd>
+ </dl>
+</div>
+<!-- *********************************************************************** -->
+<hr>
+<address> <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a><a
+ href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a><a
+ href="http://llvm.org/">The LLVM Team</a><br>
+<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+Last modified: $Date$
+</address>
+<!-- vim: sw=2
+-->
+</body>
+</html>
diff --git a/docs/LinkTimeOptimization.html b/docs/LinkTimeOptimization.html
new file mode 100644
index 0000000..fdae78a
--- /dev/null
+++ b/docs/LinkTimeOptimization.html
@@ -0,0 +1,392 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM Link Time Optimization: Design and Implementation</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<div class="doc_title">
+ LLVM Link Time Optimization: Design and Implementation
+</div>
+
+<ul>
+ <li><a href="#desc">Description</a></li>
+ <li><a href="#design">Design Philosophy</a>
+ <ul>
+ <li><a href="#example1">Example of link time optimization</a></li>
+ <li><a href="#alternative_approaches">Alternative Approaches</a></li>
+ </ul></li>
+ <li><a href="#multiphase">Multi-phase communication between LLVM and linker</a>
+ <ul>
+ <li><a href="#phase1">Phase 1 : Read LLVM Bytecode Files</a></li>
+ <li><a href="#phase2">Phase 2 : Symbol Resolution</a></li>
+ <li><a href="#phase3">Phase 3 : Optimize Bytecode Files</a></li>
+ <li><a href="#phase4">Phase 4 : Symbol Resolution after optimization</a></li>
+ </ul></li>
+ <li><a href="#lto">LLVMlto</a>
+ <ul>
+ <li><a href="#llvmsymbol">LLVMSymbol</a></li>
+ <li><a href="#readllvmobjectfile">readLLVMObjectFile()</a></li>
+ <li><a href="#optimizemodules">optimizeModules()</a></li>
+ <li><a href="#gettargettriple">getTargetTriple()</a></li>
+ <li><a href="#removemodule">removeModule()</a></li>
+ <li><a href="#getalignment">getAlignment()</a></li>
+ </ul></li>
+ <li><a href="#debug">Debugging Information</a></li>
+</ul>
+
+<div class="doc_author">
+<p>Written by Devang Patel</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="desc">Description</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+LLVM features powerful intermodular optimizations which can be used at link
+time. Link Time Optimization is another name for intermodular optimization
+when performed during the link stage. This document describes the interface
+and design between the LLVM intermodular optimizer and the linker.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="design">Design Philosophy</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+The LLVM Link Time Optimizer provides complete transparency, while doing
+intermodular optimization, in the compiler tool chain. Its main goal is to let
+the developer take advantage of intermodular optimizations without making any
+significant changes to the developer's makefiles or build system. This is
+achieved through tight integration with the linker. In this model, the linker
+treates LLVM bitcode files like native object files and allows mixing and
+matching among them. The linker uses <a href="#lto">LLVMlto</a>, a dynamically
+loaded library, to handle LLVM bitcode files. This tight integration between
+the linker and LLVM optimizer helps to do optimizations that are not possible
+in other models. The linker input allows the optimizer to avoid relying on
+conservative escape analysis.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="example1">Example of link time optimization</a>
+</div>
+
+<div class="doc_text">
+ <p>The following example illustrates the advantages of LTO's integrated
+ approach and clean interface. This example requires a system linker which
+ supports LTO through the interface described in this document. Here,
+ llvm-gcc4 transparently invokes system linker. </p>
+ <ul>
+ <li> Input source file <tt>a.c</tt> is compiled into LLVM bitcode form.
+ <li> Input source file <tt>main.c</tt> is compiled into native object code.
+ </ul>
+<div class="doc_code"><pre>
+--- a.h ---
+extern int foo1(void);
+extern void foo2(void);
+extern void foo4(void);
+--- a.c ---
+#include "a.h"
+
+static signed int i = 0;
+
+void foo2(void) {
+ i = -1;
+}
+
+static int foo3() {
+foo4();
+return 10;
+}
+
+int foo1(void) {
+int data = 0;
+
+if (i &lt; 0) { data = foo3(); }
+
+data = data + 42;
+return data;
+}
+
+--- main.c ---
+#include &lt;stdio.h&gt;
+#include "a.h"
+
+void foo4(void) {
+ printf ("Hi\n");
+}
+
+int main() {
+ return foo1();
+}
+
+--- command lines ---
+$ llvm-gcc4 --emit-llvm -c a.c -o a.o # &lt;-- a.o is LLVM bitcode file
+$ llvm-gcc4 -c main.c -o main.o # &lt;-- main.o is native object file
+$ llvm-gcc4 a.o main.o -o main # &lt;-- standard link command without any modifications
+</pre></div>
+ <p>In this example, the linker recognizes that <tt>foo2()</tt> is an
+ externally visible symbol defined in LLVM bitcode file. This information
+ is collected using <a href="#readllvmobjectfile"> readLLVMObjectFile()</a>.
+ Based on this information, the linker completes its usual symbol resolution
+ pass and finds that <tt>foo2()</tt> is not used anywhere. This information
+ is used by the LLVM optimizer and it removes <tt>foo2()</tt>. As soon as
+ <tt>foo2()</tt> is removed, the optimizer recognizes that condition
+ <tt>i &lt; 0</tt> is always false, which means <tt>foo3()</tt> is never
+ used. Hence, the optimizer removes <tt>foo3()</tt>, also. And this in turn,
+ enables linker to remove <tt>foo4()</tt>. This example illustrates the
+ advantage of tight integration with the linker. Here, the optimizer can not
+ remove <tt>foo3()</tt> without the linker's input.
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="alternative_approaches">Alternative Approaches</a>
+</div>
+
+<div class="doc_text">
+ <dl>
+ <dt><b>Compiler driver invokes link time optimizer separately.</b></dt>
+ <dd>In this model the link time optimizer is not able to take advantage of
+ information collected during the linker's normal symbol resolution phase.
+ In the above example, the optimizer can not remove <tt>foo2()</tt> without
+ the linker's input because it is externally visible. This in turn prohibits
+ the optimizer from removing <tt>foo3()</tt>.</dd>
+ <dt><b>Use separate tool to collect symbol information from all object
+ files.</b></dt>
+ <dd>In this model, a new, separate, tool or library replicates the linker's
+ capability to collect information for link time optimization. Not only is
+ this code duplication difficult to justify, but it also has several other
+ disadvantages. For example, the linking semantics and the features
+ provided by the linker on various platform are not unique. This means,
+ this new tool needs to support all such features and platforms in one
+ super tool or a separate tool per platform is required. This increases
+ maintance cost for link time optimizer significantly, which is not
+ necessary. This approach also requires staying synchronized with linker
+ developements on various platforms, which is not the main focus of the link
+ time optimizer. Finally, this approach increases end user's build time due
+ to the duplication of work done by this separate tool and the linker itself.
+ </dd>
+ </dl>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="multiphase">Multi-phase communication between LLVM and linker</a>
+</div>
+
+<div class="doc_text">
+ <p>The linker collects information about symbol defininitions and uses in
+ various link objects which is more accurate than any information collected
+ by other tools during typical build cycles. The linker collects this
+ information by looking at the definitions and uses of symbols in native .o
+ files and using symbol visibility information. The linker also uses
+ user-supplied information, such as a list of exported symbols. LLVM
+ optimizer collects control flow information, data flow information and knows
+ much more about program structure from the optimizer's point of view.
+ Our goal is to take advantage of tight intergration between the linker and
+ the optimizer by sharing this information during various linking phases.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="phase1">Phase 1 : Read LLVM Bitcode Files</a>
+</div>
+
+<div class="doc_text">
+ <p>The linker first reads all object files in natural order and collects
+ symbol information. This includes native object files as well as LLVM bitcode
+ files. In this phase, the linker uses
+ <a href="#readllvmobjectfile"> readLLVMObjectFile() </a> to collect symbol
+ information from each LLVM bitcode files and updates its internal global
+ symbol table accordingly. The intent of this interface is to avoid overhead
+ in the non LLVM case, where all input object files are native object files,
+ by putting this code in the error path of the linker. When the linker sees
+ the first llvm .o file, it <tt>dlopen()</tt>s the dynamic library. This is
+ to allow changes to the LLVM LTO code without relinking the linker.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="phase2">Phase 2 : Symbol Resolution</a>
+</div>
+
+<div class="doc_text">
+ <p>In this stage, the linker resolves symbols using global symbol table
+ information to report undefined symbol errors, read archive members, resolve
+ weak symbols, etc. The linker is able to do this seamlessly even though it
+ does not know the exact content of input LLVM bitcode files because it uses
+ symbol information provided by
+ <a href="#readllvmobjectfile">readLLVMObjectFile()</a>. If dead code
+ stripping is enabled then the linker collects the list of live symbols.
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="phase3">Phase 3 : Optimize Bitcode Files</a>
+</div>
+<div class="doc_text">
+ <p>After symbol resolution, the linker updates symbol information supplied
+ by LLVM bitcode files appropriately. For example, whether certain LLVM
+ bitcode supplied symbols are used or not. In the example above, the linker
+ reports that <tt>foo2()</tt> is not used anywhere in the program, including
+ native <tt>.o</tt> files. This information is used by the LLVM interprocedural
+ optimizer. The linker uses <a href="#optimizemodules">optimizeModules()</a>
+ and requests an optimized native object file of the LLVM portion of the
+ program.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="phase4">Phase 4 : Symbol Resolution after optimization</a>
+</div>
+
+<div class="doc_text">
+ <p>In this phase, the linker reads optimized a native object file and
+ updates the internal global symbol table to reflect any changes. The linker
+ also collects information about any changes in use of external symbols by
+ LLVM bitcode files. In the examle above, the linker notes that
+ <tt>foo4()</tt> is not used any more. If dead code stripping is enabled then
+ the linker refreshes the live symbol information appropriately and performs
+ dead code stripping.</p>
+ <p>After this phase, the linker continues linking as if it never saw LLVM
+ bitcode files.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="lto">LLVMlto</a>
+</div>
+
+<div class="doc_text">
+ <p><tt>LLVMlto</tt> is a dynamic library that is part of the LLVM tools, and
+ is intended for use by a linker. <tt>LLVMlto</tt> provides an abstract C++
+ interface to use the LLVM interprocedural optimizer without exposing details
+ of LLVM's internals. The intention is to keep the interface as stable as
+ possible even when the LLVM optimizer continues to evolve.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="llvmsymbol">LLVMSymbol</a>
+</div>
+
+<div class="doc_text">
+ <p>The <tt>LLVMSymbol</tt> class is used to describe the externally visible
+ functions and global variables, defined in LLVM bitcode files, to the linker.
+ This includes symbol visibility information. This information is used by
+ the linker to do symbol resolution. For example: function <tt>foo2()</tt> is
+ defined inside an LLVM bitcode module and it is an externally visible symbol.
+ This helps the linker connect the use of <tt>foo2()</tt> in native object
+ files with a future definition of the symbol <tt>foo2()</tt>. The linker
+ will see the actual definition of <tt>foo2()</tt> when it receives the
+ optimized native object file in
+ <a href="#phase4">Symbol Resolution after optimization</a> phase. If the
+ linker does not find any uses of <tt>foo2()</tt>, it updates LLVMSymbol
+ visibility information to notify LLVM intermodular optimizer that it is dead.
+ The LLVM intermodular optimizer takes advantage of such information to
+ generate better code.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="readllvmobjectfile">readLLVMObjectFile()</a>
+</div>
+
+<div class="doc_text">
+ <p>The <tt>readLLVMObjectFile()</tt> function is used by the linker to read
+ LLVM bitcode files and collect LLVMSymbol information. This routine also
+ supplies a list of externally defined symbols that are used by LLVM bitcode
+ files. The linker uses this symbol information to do symbol resolution.
+ Internally, <a href="#lto">LLVMlto</a> maintains LLVM bitcode modules in
+ memory. This function also provides a list of external references used by
+ bitcode files.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="optimizemodules">optimizeModules()</a>
+</div>
+
+<div class="doc_text">
+ <p>The linker invokes <tt>optimizeModules</tt> to optimize already read
+ LLVM bitcode files by applying LLVM intermodular optimization techniques.
+ This function runs the LLVM intermodular optimizer and generates native
+ object code as <tt>.o</tt> files at the name and location provided by the
+ linker.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="gettargettriple">getTargetTriple()</a>
+</div>
+
+<div class="doc_text">
+ <p>The linker may use <tt>getTargetTriple()</tt> to query target architecture
+ while validating LLVM bitcode file.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="removemodule">removeModule()</a>
+</div>
+
+<div class="doc_text">
+ <p>Internally, <a href="#lto">LLVMlto</a> maintains LLVM bitcode modules in
+ memory. The linker may use <tt>removeModule()</tt> method to remove desired
+ modules from memory. </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="getalignment">getAlignment()</a>
+</div>
+
+<div class="doc_text">
+ <p>The linker may use <a href="#llvmsymbol">LLVMSymbol</a> method
+ <tt>getAlignment()</tt> to query symbol alignment information.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="debug">Debugging Information</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><tt> ... To be completed ... </tt></p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ Devang Patel<br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..58175bd
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,91 @@
+##===- docs/Makefile ---------------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ..
+DIRS := CommandGuide
+
+ifdef BUILD_FOR_WEBSITE
+PROJ_OBJ_DIR = .
+DOXYGEN = doxygen
+# Extract version number from the AC_INT line in configure.ac
+# AC_INIT([[llvm]],[[2.1svn]],[llvmbugs@cs.uiuc.edu])
+PACKAGE_VERSION = $(shell grep AC_INIT ../autoconf/configure.ac | sed -e 's/^[^0-9]*\([0-9_.a-zA-Z-]*\).*/\1/' )
+
+$(PROJ_OBJ_DIR)/doxygen.cfg: doxygen.cfg.in
+ cat $< | sed \
+ -e 's/@abs_top_srcdir@/../g' \
+ -e 's/@DOT@/dot/g' \
+ -e 's/@PACKAGE_VERSION@/$(PACKAGE_VERSION)/' \
+ -e 's/@abs_top_builddir@/../g' > $@
+endif
+
+include $(LEVEL)/Makefile.common
+
+HTML := $(wildcard $(PROJ_SRC_DIR)/*.html) \
+ $(wildcard $(PROJ_SRC_DIR)/*.css)
+IMAGES := $(wildcard $(PROJ_SRC_DIR)/img/*.*)
+DOXYFILES := doxygen.cfg.in doxygen.css doxygen.footer doxygen.header \
+ doxygen.intro
+EXTRA_DIST := $(HTML) $(DOXYFILES) llvm.css CommandGuide img
+
+.PHONY: install-html install-doxygen doxygen
+
+ifeq ($(ENABLE_DOXYGEN),1)
+install-local:: install-html install-doxygen
+else
+install-local:: install-html
+endif
+
+install-html: $(PROJ_OBJ_DIR)/html.tar.gz
+ $(Echo) Installing HTML documentation
+ $(Verb) $(MKDIR) $(PROJ_docsdir)/html
+ $(Verb) $(MKDIR) $(PROJ_docsdir)/html/img
+ $(Verb) $(DataInstall) $(HTML) $(PROJ_docsdir)/html
+ $(Verb) $(DataInstall) $(IMAGES) $(PROJ_docsdir)/html/img
+ $(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/html.tar.gz $(PROJ_docsdir)
+
+$(PROJ_OBJ_DIR)/html.tar.gz: $(HTML)
+ $(Echo) Packaging HTML documentation
+ $(Verb) $(RM) -rf $@ $(PROJ_OBJ_DIR)/html.tar
+ $(Verb) cd $(PROJ_SRC_DIR) && \
+ $(TAR) cf $(PROJ_OBJ_DIR)/html.tar *.html
+ $(Verb) $(GZIP) $(PROJ_OBJ_DIR)/html.tar
+
+install-doxygen: doxygen
+ $(Echo) Installing doxygen documentation
+ $(Echo) Installing doxygen documentation
+ $(Verb) $(MKDIR) $(PROJ_docsdir)/html/doxygen
+ $(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/doxygen.tar.gz $(PROJ_docsdir)
+ $(Verb) cd $(PROJ_OBJ_DIR)/doxygen && \
+ $(FIND) . -type f -exec \
+ $(DataInstall) {} $(PROJ_docsdir)/html/doxygen \;
+
+doxygen: regendoc $(PROJ_OBJ_DIR)/doxygen.tar.gz
+
+regendoc:
+ $(Echo) Building doxygen documentation
+ $(Verb) if test -e $(PROJ_OBJ_DIR)/doxygen ; then \
+ $(RM) -rf $(PROJ_OBJ_DIR)/doxygen ; \
+ fi
+ $(Verb) $(DOXYGEN) $(PROJ_OBJ_DIR)/doxygen.cfg
+
+$(PROJ_OBJ_DIR)/doxygen.tar.gz: $(DOXYFILES) $(PROJ_OBJ_DIR)/doxygen.cfg
+ $(Echo) Packaging doxygen documentation
+ $(Verb) $(RM) -rf $@ $(PROJ_OBJ_DIR)/doxygen.tar
+ $(Verb) $(TAR) cf $(PROJ_OBJ_DIR)/doxygen.tar doxygen
+ $(Verb) $(GZIP) $(PROJ_OBJ_DIR)/doxygen.tar
+ $(Verb) $(CP) $(PROJ_OBJ_DIR)/doxygen.tar.gz $(PROJ_OBJ_DIR)/doxygen/html/
+
+userloc: $(LLVM_SRC_ROOT)/docs/userloc.html
+
+$(LLVM_SRC_ROOT)/docs/userloc.html:
+ $(Echo) Making User LOC Table
+ $(Verb) cd $(LLVM_SRC_ROOT) ; ./utils/userloc.pl -details -recurse \
+ -html lib include tools runtime utils examples autoconf test > docs/userloc.html
+
diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html
new file mode 100644
index 0000000..9cb5b15
--- /dev/null
+++ b/docs/MakefileGuide.html
@@ -0,0 +1,1046 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <title>LLVM Makefile Guide</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">LLVM Makefile Guide</div>
+
+<ol>
+ <li><a href="#introduction">Introduction</a></li>
+ <li><a href="#general">General Concepts</a>
+ <ol>
+ <li><a href="#projects">Projects</a></li>
+ <li><a href="#varvals">Variable Values</a></li>
+ <li><a href="#including">Including Makefiles</a>
+ <ol>
+ <li><a href="#Makefile">Makefile</a></li>
+ <li><a href="#Makefile.common">Makefile.common</a></li>
+ <li><a href="#Makefile.config">Makefile.config</a></li>
+ <li><a href="#Makefile.rules">Makefile.rules</a></li>
+ </ol>
+ </li>
+ <li><a href="#Comments">Comments</a></li>
+ </ol>
+ </li>
+ <li><a href="#tutorial">Tutorial</a>
+ <ol>
+ <li><a href="#libraries">Libraries</a>
+ <ol>
+ <li><a href="#BCModules">Bitcode Modules</a></li>
+ <li><a href="#LoadableModules">Loadable Modules</a></li>
+ </ol>
+ </li>
+ <li><a href="#tools">Tools</a>
+ <ol>
+ <li><a href="#JIT">JIT Tools</a></li>
+ </ol>
+ </li>
+ <li><a href="#projects">Projects</a></li>
+ </ol>
+ </li>
+ <li><a href="#targets">Targets Supported</a>
+ <ol>
+ <li><a href="#all">all</a></li>
+ <li><a href="#all-local">all-local</a></li>
+ <li><a href="#check">check</a></li>
+ <li><a href="#check-local">check-local</a></li>
+ <li><a href="#clean">clean</a></li>
+ <li><a href="#clean-local">clean-local</a></li>
+ <li><a href="#dist">dist</a></li>
+ <li><a href="#dist-check">dist-check</a></li>
+ <li><a href="#dist-clean">dist-clean</a></li>
+ <li><a href="#install">install</a></li>
+ <li><a href="#preconditions">preconditions</a></li>
+ <li><a href="#printvars">printvars</a></li>
+ <li><a href="#reconfigure">reconfigure</a></li>
+ <li><a href="#spotless">spotless</a></li>
+ <li><a href="#tags">tags</a></li>
+ <li><a href="#uninstall">uninstall</a></li>
+ </ol>
+ </li>
+ <li><a href="#variables">Using Variables</a>
+ <ol>
+ <li><a href="#setvars">Control Variables</a></li>
+ <li><a href="#overvars">Override Variables</a></li>
+ <li><a href="#getvars">Readable Variables</a></li>
+ <li><a href="#intvars">Internal Variables</a></li>
+ </ol>
+ </li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:reid@x10sys.com">Reid Spencer</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="introduction">Introduction </a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+ <p>This document provides <em>usage</em> information about the LLVM makefile
+ system. While loosely patterned after the BSD makefile system, LLVM has taken
+ a departure from BSD in order to implement additional features needed by LLVM.
+ Although makefile systems such as automake were attempted at one point, it
+ has become clear that the features needed by LLVM and the Makefile norm are
+ too great to use a more limited tool. Consequently, LLVM requires simply GNU
+ Make 3.79, a widely portable makefile processor. LLVM unabashedly makes heavy
+ use of the features of GNU Make so the dependency on GNU Make is firm. If
+ you're not familiar with <tt>make</tt>, it is recommended that you read the
+ <a href="http://www.gnu.org/software/make/manual/make.html">GNU Makefile
+ Manual</a>.</p>
+ <p>While this document is rightly part of the
+ <a href="ProgrammersManual.html">LLVM Programmer's Manual</a>, it is treated
+ separately here because of the volume of content and because it is often an
+ early source of bewilderment for new developers.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="general">General Concepts</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+ <p>The LLVM Makefile System is the component of LLVM that is responsible for
+ building the software, testing it, generating distributions, checking those
+ distributions, installing and uninstalling, etc. It consists of a several
+ files throughout the source tree. These files and other general concepts are
+ described in this section.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="projects">Projects</a></div>
+<div class="doc_text">
+ <p>The LLVM Makefile System is quite generous. It not only builds its own
+ software, but it can build yours too. Built into the system is knowledge of
+ the <tt>llvm/projects</tt> directory. Any directory under <tt>projects</tt>
+ that has both a <tt>configure</tt> script and a <tt>Makefile</tt> is assumed
+ to be a project that uses the LLVM Makefile system. Building software that
+ uses LLVM does not require the LLVM Makefile System nor even placement in the
+ <tt>llvm/projects</tt> directory. However, doing so will allow your project
+ to get up and running quickly by utilizing the built-in features that are used
+ to compile LLVM. LLVM compiles itself using the same features of the makefile
+ system as used for projects.</p>
+ <p>For complete details on setting up your projects configuration, simply
+ mimic the <tt>llvm/projects/sample</tt> project or for further details,
+ consult the <a href="Projects.html">Projects.html</a> page.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="varvalues">Variable Values</a></div>
+<div class="doc_text">
+ <p>To use the makefile system, you simply create a file named
+ <tt>Makefile</tt> in your directory and declare values for certain variables.
+ The variables and values that you select determine what the makefile system
+ will do. These variables enable rules and processing in the makefile system
+ that automatically Do The Right Thing&trade;.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="including">Including Makefiles</a></div>
+<div class="doc_text">
+ <p>Setting variables alone is not enough. You must include into your Makefile
+ additional files that provide the rules of the LLVM Makefile system. The
+ various files involved are described in the sections that follow.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="Makefile">Makefile</a></div>
+<div class="doc_text">
+ <p>Each directory to participate in the build needs to have a file named
+ <tt>Makefile</tt>. This is the file first read by <tt>make</tt>. It has three
+ sections:</p>
+ <ol>
+ <li><a href="#setvars">Settable Variables</a> - Required that must be set
+ first.</li>
+ <li><a href="#Makefile.common">include <tt>$(LEVEL)/Makefile.common</tt></a>
+ - include the LLVM Makefile system.
+ <li><a href="#overvars">Override Variables</a> - Override variables set by
+ the LLVM Makefile system.
+ </ol>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="Makefile.common">Makefile.common</a>
+</div>
+<div class="doc_text">
+ <p>Every project must have a <tt>Makefile.common</tt> file at its top source
+ directory. This file serves three purposes:</p>
+ <ol>
+ <li>It includes the project's configuration makefile to obtain values
+ determined by the <tt>configure</tt> script. This is done by including the
+ <a href="#Makefile.config"><tt>$(LEVEL)/Makefile.config</tt></a> file.</li>
+ <li>It specifies any other (static) values that are needed throughout the
+ project. Only values that are used in all or a large proportion of the
+ project's directories should be placed here.</li>
+ <li>It includes the standard rules for the LLVM Makefile system,
+ <a href="#Makefile.rules"><tt>$(LLVM_SRC_ROOT)/Makefile.rules</tt></a>.
+ This file is the "guts" of the LLVM Makefile system.</li>
+ </ol>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="Makefile.config">Makefile.config</a>
+</div>
+<div class="doc_text">
+ <p>Every project must have a <tt>Makefile.config</tt> at the top of its
+ <em>build</em> directory. This file is <b>generated</b> by the
+ <tt>configure</tt> script from the pattern provided by the
+ <tt>Makefile.config.in</tt> file located at the top of the project's
+ <em>source</em> directory. The contents of this file depend largely on what
+ configuration items the project uses, however most projects can get what they
+ need by just relying on LLVM's configuration found in
+ <tt>$(LLVM_OBJ_ROOT)/Makefile.config</tt>.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="Makefile.rules">Makefile.rules</a></div>
+<div class="doc_text">
+ <p>This file, located at <tt>$(LLVM_SRC_ROOT)/Makefile.rules</tt> is the heart
+ of the LLVM Makefile System. It provides all the logic, dependencies, and
+ rules for building the targets supported by the system. What it does largely
+ depends on the values of <tt>make</tt> <a href="#variables">variables</a> that
+ have been set <em>before</em> <tt>Makefile.rules</tt> is included.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="Comments">Comments</a></div>
+<div class="doc_text">
+ <p>User Makefiles need not have comments in them unless the construction is
+ unusual or it does not strictly follow the rules and patterns of the LLVM
+ makefile system. Makefile comments are invoked with the pound (#) character.
+ The # character and any text following it, to the end of the line, are ignored
+ by <tt>make</tt>.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="tutorial">Tutorial</a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+ <p>This section provides some examples of the different kinds of modules you
+ can build with the LLVM makefile system. In general, each directory you
+ provide will build a single object although that object may be composed of
+ additionally compiled components.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="libraries">Libraries</a></div>
+<div class="doc_text">
+ <p>Only a few variable definitions are needed to build a regular library.
+ Normally, the makefile system will build all the software into a single
+ <tt>libname.o</tt> (pre-linked) object. This means the library is not
+ searchable and that the distinction between compilation units has been
+ dissolved. Optionally, you can ask for a shared library (.so), archive library
+ (.a) or to not have the default (relinked) library built. For example:</p>
+ <pre><tt>
+ LIBRARYNAME = mylib
+ SHARED_LIBRARY = 1
+ ARCHIVE_LIBRARY = 1
+ DONT_BUILD_RELINKED = 1
+ </tt></pre>
+ <p>says to build a library named "mylib" with both a shared library
+ (<tt>mylib.so</tt>) and an archive library (<tt>mylib.a</tt>) version but
+ not to build the relinked object (<tt>mylib.o</tt>). The contents of all the
+ libraries produced will be the same, they are just constructed differently.
+ Note that you normally do not need to specify the sources involved. The LLVM
+ Makefile system will infer the source files from the contents of the source
+ directory.</p>
+ <p>The <tt>LOADABLE_MODULE=1</tt> directive can be used in conjunction with
+ <tt>SHARED_LIBRARY=1</tt> to indicate that the resulting shared library should
+ be openable with the <tt>dlopen</tt> function and searchable with the
+ <tt>dlsym</tt> function (or your operating system's equivalents). While this
+ isn't strictly necessary on Linux and a few other platforms, it is required
+ on systems like HP-UX and Darwin. You should use <tt>LOADABLE_MODULE</tt> for
+ any shared library that you intend to be loaded into an tool via the
+ <tt>-load</tt> option. See the
+ <a href="WritingAnLLVMPass.html#makefile">WritingAnLLVMPass.html</a> document
+ for an example of why you might want to do this.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="BCModules">Bitcode Modules</a></div>
+<div class="doc_text">
+ <p>In some situations, it is desireable to build a single bitcode module from
+ a variety of sources, instead of an archive, shared library, or bitcode
+ library. Bitcode modules can be specified in addition to any of the other
+ types of libraries by defining the <a href="#MODULE_NAME">MODULE_NAME</a>
+ variable. For example:</p>
+ <pre><tt>
+ LIBRARYNAME = mylib
+ BYTECODE_LIBRARY = 1
+ MODULE_NAME = mymod
+ </tt></pre>
+ <p>will build a module named <tt>mymod.bc</tt> from the sources in the
+ directory. This module will be an aggregation of all the bitcode modules
+ derived from the sources. The example will also build a bitcode archive
+ containing a bitcode module for each compiled source file. The difference is
+ subtle, but important depending on how the module or library is to be linked.
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="LoadableModules">Loadable Modules</a>
+</div>
+<div class="doc_text">
+ <p>In some situations, you need to create a loadable module. Loadable modules
+ can be loaded into programs like <tt>opt</tt> or <tt>llc</tt> to specify
+ additional passes to run or targets to support. Loadable modules are also
+ useful for debugging a pass or providing a pass with another package if that
+ pass can't be included in LLVM.</p>
+ <p>LLVM provides complete support for building such a module. All you need to
+ do is use the LOADABLE_MODULE variable in your Makefile. For example, to
+ build a loadable module named <tt>MyMod</tt> that uses the LLVM libraries
+ <tt>LLVMSupport.a</tt> and <tt>LLVMSystem.a</tt>, you would specify:</p>
+ <pre><tt>
+ LIBRARYNAME := MyMod
+ LOADABLE_MODULE := 1
+ LINK_COMPONENTS := support system
+ </tt></pre>
+ <p>Use of the <tt>LOADABLE_MODULE</tt> facility implies several things:</p>
+ <ol>
+ <li>There will be no "lib" prefix on the module. This differentiates it from
+ a standard shared library of the same name.</li>
+ <li>The <a href="#SHARED_LIBRARY">SHARED_LIBRARY</a> variable is turned
+ on.</li>
+ <li>The <a href="#LINK_LIBS_IN_SHARED">LINK_LIBS_IN_SHARED</a> variable
+ is turned on.</li>
+ <li>The <a href="#DONT_BUILD_RELINKED">DONT_BUILD_RELINKED</a> variable
+ is turned on.</li>
+ </ol>
+ <p>A loadable module is loaded by LLVM via the facilities of libtool's libltdl
+ library which is part of <tt>lib/System</tt> implementation.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="tools">Tools</a></div>
+<div class="doc_text">
+ <p>For building executable programs (tools), you must provide the name of the
+ tool and the names of the libraries you wish to link with the tool. For
+ example:</p>
+ <pre><tt>
+ TOOLNAME = mytool
+ USEDLIBS = mylib
+ LINK_COMPONENTS = support system
+ </tt></pre>
+ <p>says that we are to build a tool name <tt>mytool</tt> and that it requires
+ three libraries: <tt>mylib</tt>, <tt>LLVMSupport.a</tt> and
+ <tt>LLVMSystem.a</tt>.</p>
+ <p>Note that two different variables are use to indicate which libraries are
+ linked: <tt>USEDLIBS</tt> and <tt>LLVMLIBS</tt>. This distinction is necessary
+ to support projects. <tt>LLVMLIBS</tt> refers to the LLVM libraries found in
+ the LLVM object directory. <tt>USEDLIBS</tt> refers to the libraries built by
+ your project. In the case of building LLVM tools, <tt>USEDLIBS</tt> and
+ <tt>LLVMLIBS</tt> can be used interchangeably since the "project" is LLVM
+ itself and <tt>USEDLIBS</tt> refers to the same place as <tt>LLVMLIBS</tt>.
+ </p>
+ <p>Also note that there are two different ways of specifying a library: with a
+ <tt>.a</tt> suffix and without. Without the suffix, the entry refers to the
+ re-linked (.o) file which will include <em>all</em> symbols of the library.
+ This is useful, for example, to include all passes from a library of passes.
+ If the <tt>.a</tt> suffix is used then the library is linked as a searchable
+ library (with the <tt>-l</tt> option). In this case, only the symbols that are
+ unresolved <em>at that point</em> will be resolved from the library, if they
+ exist. Other (unreferenced) symbols will not be included when the <tt>.a</tt>
+ syntax is used. Note that in order to use the <tt>.a</tt> suffix, the library
+ in question must have been built with the <tt>ARCHIVE_LIBRARY</tt> option set.
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="JIT">JIT Tools</a></div>
+<div class="doc_text">
+ <p>Many tools will want to use the JIT features of LLVM. To do this, you
+ simply specify that you want an execution 'engine', and the makefiles will
+ automatically link in the appropriate JIT for the host or an interpreter
+ if none is available:</p>
+ <pre><tt>
+ TOOLNAME = my_jit_tool
+ USEDLIBS = mylib
+ LINK_COMPONENTS = engine
+ </tt></pre>
+ <p>Of course, any additional libraries may be listed as other components. To
+ get a full understanding of how this changes the linker command, it is
+ recommended that you:</p>
+ <pre><tt>
+ cd examples/Fibonacci
+ make VERBOSE=1
+ </tt></pre>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="targets">Targets Supported</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+ <p>This section describes each of the targets that can be built using the LLVM
+ Makefile system. Any target can be invoked from any directory but not all are
+ applicable to a given directory (e.g. "check", "dist" and "install" will
+ always operate as if invoked from the top level directory).</p>
+
+ <table style="text-align:left">
+ <tr>
+ <th>Target Name</th><th>Implied Targets</th><th>Target Description</th>
+ </tr>
+ <tr><td><a href="#all"><tt>all</tt></a></td><td></td>
+ <td>Compile the software recursively. Default target.
+ </td></tr>
+ <tr><td><a href="#all-local"><tt>all-local</tt></a></td><td></td>
+ <td>Compile the software in the local directory only.
+ </td></tr>
+ <tr><td><a href="#check"><tt>check</tt></a></td><td></td>
+ <td>Change to the <tt>test</tt> directory in a project and run the
+ test suite there.
+ </td></tr>
+ <tr><td><a href="#check-local"><tt>check-local</tt></a></td><td></td>
+ <td>Run a local test suite. Generally this is only defined in the
+ <tt>Makefile</tt> of the project's <tt>test</tt> directory.
+ </td></tr>
+ <tr><td><a href="#clean"><tt>clean</tt></a></td><td></td>
+ <td>Remove built objects recursively.
+ </td></tr>
+ <tr><td><a href="#clean-local"><tt>clean-local</tt></a></td><td></td>
+ <td>Remove built objects from the local directory only.
+ </td></tr>
+ <tr><td><a href="#dist"><tt>dist</tt></a></td><td>all</td>
+ <td>Prepare a source distribution tarball.
+ </td></tr>
+ <tr><td><a href="#dist-check"><tt>dist-check</tt></a></td><td>all</td>
+ <td>Prepare a source distribution tarball and check that it builds.
+ </td></tr>
+ <tr><td><a href="#dist-clean"><tt>dist-clean</tt></a></td><td>clean</td>
+ <td>Clean source distribution tarball temporary files.
+ </td></tr>
+ <tr><td><a href="#install"><tt>install</tt></a></td><td>all</td>
+ <td>Copy built objects to installation directory.
+ </td></tr>
+ <tr><td><a href="#preconditions"><tt>preconditions</tt></a></td><td>all</td>
+ <td>Check to make sure configuration and makefiles are up to date.
+ </td></tr>
+ <tr><td><a href="#printvars"><tt>printvars</tt></a></td><td>all</td>
+ <td>Prints variables defined by the makefile system (for debugging).
+ </td></tr>
+ <tr><td><a href="#tags"><tt>tags</tt></a></td><td></td>
+ <td>Make C and C++ tags files for emacs and vi.
+ </td></tr>
+ <tr><td><a href="#uninstall"><tt>uninstall</tt></a></td><td></td>
+ <td>Remove built objects from installation directory.
+ </td></tr>
+ </table>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="all">all (default)</a></div>
+<div class="doc_text">
+ <p>When you invoke <tt>make</tt> with no arguments, you are implicitly
+ instructing it to seek the "all" target (goal). This target is used for
+ building the software recursively and will do different things in different
+ directories. For example, in a <tt>lib</tt> directory, the "all" target will
+ compile source files and generate libraries. But, in a <tt>tools</tt>
+ directory, it will link libraries and generate executables.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="all-local">all-local</a></div>
+<div class="doc_text">
+ <p>This target is the same as <a href="#all">all</a> but it operates only on
+ the current directory instead of recursively.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="check">check</a></div>
+<div class="doc_text">
+ <p>This target can be invoked from anywhere within a project's directories
+ but always invokes the <a href="#check-local"><tt>check-local</tt></a> target
+ in the project's <tt>test</tt> directory, if it exists and has a
+ <tt>Makefile</tt>. A warning is produced otherwise. If
+ <a href="#TESTSUITE"><tt>TESTSUITE</tt></a> is defined on the <tt>make</tt>
+ command line, it will be passed down to the invocation of
+ <tt>make check-local</tt> in the <tt>test</tt> directory. The intended usage
+ for this is to assist in running specific suites of tests. If
+ <tt>TESTSUITE</tt> is not set, the implementation of <tt>check-local</tt>
+ should run all normal tests. It is up to the project to define what
+ different values for <tt>TESTSUTE</tt> will do. See the
+ <a href="TestingGuide.html">TestingGuide</a> for further details.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="check-local">check-local</a></div>
+<div class="doc_text">
+ <p>This target should be implemented by the <tt>Makefile</tt> in the project's
+ <tt>test</tt> directory. It is invoked by the <tt>check</tt> target elsewhere.
+ Each project is free to define the actions of <tt>check-local</tt> as
+ appropriate for that project. The LLVM project itself uses dejagnu to run a
+ suite of feature and regresson tests. Other projects may choose to use
+ dejagnu or any other testing mechanism.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="clean">clean</a></div>
+<div class="doc_text">
+ <p>This target cleans the build directory, recursively removing all things
+ that the Makefile builds. The cleaning rules have been made guarded so they
+ shouldn't go awry (via <tt>rm -f $(UNSET_VARIABLE)/*</tt> which will attempt
+ to erase the entire directory structure.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="clean-local">clean-local</a></div>
+<div class="doc_text">
+ <p>This target does the same thing as <tt>clean</tt> but only for the current
+ (local) directory.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="dist">dist</a></div>
+<div class="doc_text">
+ <p>This target builds a distribution tarball. It first builds the entire
+ project using the <tt>all</tt> target and then tars up the necessary files and
+ compresses it. The generated tarball is sufficient for a casual source
+ distribution, but probably not for a release (see <tt>dist-check</tt>).</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="dist-check">dist-check</a></div>
+<div class="doc_text">
+ <p>This target does the same thing as the <tt>dist</tt> target but also checks
+ the distribution tarball. The check is made by unpacking the tarball to a new
+ directory, configuring it, building it, installing it, and then verifying that
+ the installation results are correct (by comparing to the original build).
+ This target can take a long time to run but should be done before a release
+ goes out to make sure that the distributed tarball can actually be built into
+ a working release.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="dist-clean">dist-clean</a></div>
+<div class="doc_text">
+ <p>This is a special form of the <tt>clean</tt> clean target. It performs a
+ normal <tt>clean</tt> but also removes things pertaining to building the
+ distribution.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="install">install</a></div>
+<div class="doc_text">
+ <p>This target finalizes shared objects and executables and copies all
+ libraries, headers, executables and documentation to the directory given
+ with the <tt>--prefix</tt> option to <tt>configure</tt>. When completed,
+ the prefix directory will have everything needed to <b>use</b> LLVM. </p>
+ <p>The LLVM makefiles can generate complete <b>internal</b> documentation
+ for all the classes by using <tt>doxygen</tt>. By default, this feature is
+ <b>not</b> enabled because it takes a long time and generates a massive
+ amount of data (>100MB). If you want this feature, you must configure LLVM
+ with the --enable-doxygen switch and ensure that a modern version of doxygen
+ (1.3.7 or later) is available in your <tt>PATH</tt>. You can download
+ doxygen from
+ <a href="http://www.stack.nl/~dimitri/doxygen/download.html#latestsrc">
+ here</a>.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="preconditions">preconditions</a></div>
+<div class="doc_text">
+ <p>This utility target checks to see if the <tt>Makefile</tt> in the object
+ directory is older than the <tt>Makefile</tt> in the source directory and
+ copies it if so. It also reruns the <tt>configure</tt> script if that needs to
+ be done and rebuilds the <tt>Makefile.config</tt> file similarly. Users may
+ overload this target to ensure that sanity checks are run <em>before</em> any
+ building of targets as all the targets depend on <tt>preconditions</tt>.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="printvars">printvars</a></div>
+<div class="doc_text">
+ <p>This utility target just causes the LLVM makefiles to print out some of
+ the makefile variables so that you can double check how things are set. </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="reconfigure">reconfigure</a></div>
+<div class="doc_text">
+ <p>This utility target will force a reconfigure of LLVM or your project. It
+ simply runs <tt>$(PROJ_OBJ_ROOT)/config.status --recheck</tt> to rerun the
+ configuration tests and rebuild the configured files. This isn't generally
+ useful as the makefiles will reconfigure themselves whenever its necessary.
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="spotless">spotless</a></div>
+<div class="doc_text">
+ <p>This utility target, only available when <tt>$(PROJ_OBJ_ROOT)</tt> is not
+ the same as <tt>$(PROJ_SRC_ROOT)</tt>, will completely clean the
+ <tt>$(PROJ_OBJ_ROOT)</tt> directory by removing its content entirely and
+ reconfiguring the directory. This returns the <tt>$(PROJ_OBJ_ROOT)</tt>
+ directory to a completely fresh state. All content in the directory except
+ configured files and top-level makefiles will be lost.</p>
+ <div class="doc_warning"><p>Use with caution.</p></div>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="tags">tags</a></div>
+<div class="doc_text">
+ <p>This target will generate a <tt>TAGS</tt> file in the top-level source
+ directory. It is meant for use with emacs, XEmacs, or ViM. The TAGS file
+ provides an index of symbol definitions so that the editor can jump you to the
+ definition quickly. </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="uninstall">uninstall</a></div>
+<div class="doc_text">
+ <p>This target is the opposite of the <tt>install</tt> target. It removes the
+ header, library and executable files from the installation directories. Note
+ that the directories themselves are not removed because it is not guaranteed
+ that LLVM is the only thing installing there (e.g. --prefix=/usr).</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="variables">Variables</a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+ <p>Variables are used to tell the LLVM Makefile System what to do and to
+ obtain information from it. Variables are also used internally by the LLVM
+ Makefile System. Variable names that contain only the upper case alphabetic
+ letters and underscore are intended for use by the end user. All other
+ variables are internal to the LLVM Makefile System and should not be relied
+ upon nor modified. The sections below describe how to use the LLVM Makefile
+ variables.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="setvars">Control Variables</a></div>
+<div class="doc_text">
+ <p>Variables listed in the table below should be set <em>before</em> the
+ inclusion of <a href="#Makefile.common"><tt>$(LEVEL)/Makefile.common</tt></a>.
+ These variables provide input to the LLVM make system that tell it what to do
+ for the current directory.</p>
+ <dl>
+ <dt><a name="BUILD_ARCHIVE"><tt>BUILD_ARCHIVE</tt></a></dt>
+ <dd>If set to any value, causes an archive (.a) library to be built.</dd>
+ <dt><a name="BUILT_SOURCES"><tt>BUILT_SOURCES</tt></a></dt>
+ <dd>Specifies a set of source files that are generated from other source
+ files. These sources will be built before any other target processing to
+ ensure they are present.</dd>
+ <dt><a name="BYTECODE_LIBRARY"><tt>BYTECODE_LIBRARY</tt></a></dt>
+ <dd>If set to any value, causes a bitcode library (.bc) to be built.</dd>
+ <dt><a name="CONFIG_FILES"><tt>CONFIG_FILES</tt></a></dt>
+ <dd>Specifies a set of configuration files to be installed.</dd>
+ <dt><a name="DIRS"><tt>DIRS</tt></a></dt>
+ <dd>Specifies a set of directories, usually children of the current
+ directory, that should also be made using the same goal. These directories
+ will be built serially.</dd>
+ <dt><a name="DISABLE_AUTO_DEPENDENCIES"><tt>DISABLE_AUTO_DEPENDENCIES</tt></a></dt>
+ <dd>If set to any value, causes the makefiles to <b>not</b> automatically
+ generate dependencies when running the compiler. Use of this feature is
+ discouraged and it may be removed at a later date.</dd>
+ <dt><a name="DONT_BUILD_RELINKED"><tt>DONT_BUILD_RELINKED</tt></a></dt>
+ <dd>If set to any value, causes a relinked library (.o) not to be built. By
+ default, libraries are built as re-linked since most LLVM libraries are
+ needed in their entirety and re-linked libraries will be linked more quickly
+ than equivalent archive libraries.</dd>
+ <dt><a name="ENABLE_OPTIMIZED"><tt>ENABLE_OPTIMIZED</tt></a></dt>
+ <dd>If set to any value, causes the build to generate optimized objects,
+ libraries and executables. This alters the flags specified to the compilers
+ and linkers. Generally debugging won't be a fun experience with an optimized
+ build.</dd>
+ <dt><a name="ENABLE_PROFILING"><tt>ENABLE_PROFILING</tt></a></dt>
+ <dd>If set to any value, causes the build to generate both optimized and
+ profiled objects, libraries and executables. This alters the flags specified
+ to the compilers and linkers to ensure that profile data can be collected
+ from the tools built. Use the <tt>gprof</tt> tool to analyze the output from
+ the profiled tools (<tt>gmon.out</tt>).</dd>
+ <dt><a name="DISABLE_ASSERTIONS"><tt>DISABLE_ASSERTIONS</tt></a></dt>
+ <dd>If set to any value, causes the build to disable assertions, even if
+ building a release or profile build. This will exclude all assertion check
+ code from the build. LLVM will execute faster, but with little help when
+ things go wrong.</dd>
+ <dt><a name="EXPERIMENTAL_DIRS"><tt>EXPERIMENTAL_DIRS</tt></a></dt>
+ <dd>Specify a set of directories that should be built, but if they fail, it
+ should not cause the build to fail. Note that this should only be used
+ temporarily while code is being written.</dd>
+ <dt><a name="EXPORTED_SYMBOL_FILE"><tt>EXPORTED_SYMBOL_FILE</tt></a></dt>
+ <dd>Specifies the name of a single file that contains a list of the
+ symbols to be exported by the linker. One symbol per line.</dd>
+ <dt><a name="EXPORTED_SYMBOL_LIST"><tt>EXPORTED_SYMBOL_LIST</tt></a></dt>
+ <dd>Specifies a set of symbols to be exported by the linker.</dd>
+ <dt><a name="EXTRA_DIST"><tt>EXTRA_DIST</tt></a></dt>
+ <dd>Specifies additional files that should be distributed with LLVM. All
+ source files, all built sources, all Makefiles, and most documentation files
+ will be automatically distributed. Use this variable to distribute any
+ files that are not automatically distributed.</dd>
+ <dt><a name="KEEP_SYMBOLS"><tt>KEEP_SYMBOLS</tt></a></dt>
+ <dd>If set to any value, specifies that when linking executables the
+ makefiles should retain debug symbols in the executable. Normally, symbols
+ are stripped from the executable.</dd>
+ <dt><a name="LEVEL"><tt>LEVEL</tt></a><small>(required)</small></dt>
+ <dd>Specify the level of nesting from the top level. This variable must be
+ set in each makefile as it is used to find the top level and thus the other
+ makefiles.</dd>
+ <dt><a name="LIBRARYNAME"><tt>LIBRARYNAME</tt></a></dt>
+ <dd>Specify the name of the library to be built. (Required For
+ Libraries)</dd>
+ <dt><a name="LINK_COMPONENTS"><tt>LINK_COMPONENTS</tt></a></dt>
+ <dd>When specified for building a tool, the value of this variable will be
+ passed to the <tt>llvm-config</tt> tool to generate a link line for the
+ tool. Unlike <tt>USEDLIBS</tt> and <tt>LLVMLIBS</tt>, not all libraries need
+ to be specified. The <tt>llvm-config</tt> tool will figure out the library
+ dependencies and add any libraries that are needed. The <tt>USEDLIBS</tt>
+ variable can still be used in conjunction with <tt>LINK_COMPONENTS</tt> so
+ that additional project-specific libraries can be linked with the LLVM
+ libraries specified by <tt>LINK_COMPONENTS</tt></dd>
+ <dt><a name="LINK_LIBS_IN_SHARED"><tt>LINK_LIBS_IN_SHARED</tt></a></dt>
+ <dd>By default, shared library linking will ignore any libraries specified
+ with the <a href="LLVMLIBS">LLVMLIBS</a> or <a href="USEDLIBS">USEDLIBS</a>.
+ This prevents shared libs from including things that will be in the LLVM
+ tool the shared library will be loaded into. However, sometimes it is useful
+ to link certain libraries into your shared library and this option enables
+ that feature.</dd>
+ <dt><a name="LLVMLIBS"><tt>LLVMLIBS</tt></a></dt>
+ <dd>Specifies the set of libraries from the LLVM $(ObjDir) that will be
+ linked into the tool or library.</dd>
+ <dt><a name="LOADABLE_MODULE"><tt>LOADABLE_MODULE</tt></a></dt>
+ <dd>If set to any value, causes the shared library being built to also be
+ a loadable module. Loadable modules can be opened with the dlopen() function
+ and searched with dlsym (or the operating system's equivalent). Note that
+ setting this variable without also setting <tt>SHARED_LIBRARY</tt> will have
+ no effect.</dd>
+ <dt><a name="MODULE_NAME"><tt>MODULE_NAME</tt></a></dt>
+ <dd>Specifies the name of a bitcode module to be created. A bitcode
+ module can be specified in conjunction with other kinds of library builds
+ or by itself. It constructs from the sources a single linked bitcode
+ file.</dd>
+ <dt><a name="NO_INSTALL"><tt>NO_INSTALL</tt></a></dt>
+ <dd>Specifies that the build products of the directory should not be
+ installed but should be built even if the <tt>install</tt> target is given.
+ This is handy for directories that build libraries or tools that are only
+ used as part of the build process, such as code generators (e.g.
+ <tt>tblgen</tt>).</dd>
+ <dt><a name="OPTIONAL_DIRS"><tt>OPTIONAL_DIRS</tt></a></dt>
+ <dd>Specify a set of directories that may be built, if they exist, but its
+ not an error for them not to exist.</dd>
+ <dt><a name="PARALLEL_DIRS"><tt>PARALLEL_DIRS</tt></a></dt>
+ <dd>Specify a set of directories to build recursively and in parallel if
+ the -j option was used with <tt>make</tt>.</dd>
+ <dt><a name="SHARED_LIBRARY"><tt>SHARED_LIBRARY</tt></a></dt>
+ <dd>If set to any value, causes a shared library (.so) to be built in
+ addition to any other kinds of libraries. Note that this option will cause
+ all source files to be built twice: once with options for position
+ independent code and once without. Use it only where you really need a
+ shared library.</dd>
+ <dt><a name="SOURCES"><tt>SOURCES</tt><small>(optional)</small></a></dt>
+ <dd>Specifies the list of source files in the current directory to be
+ built. Source files of any type may be specified (programs, documentation,
+ config files, etc.). If not specified, the makefile system will infer the
+ set of source files from the files present in the current directory.</dd>
+ <dt><a name="SUFFIXES"><tt>SUFFIXES</tt></a></dt>
+ <dd>Specifies a set of filename suffixes that occur in suffix match rules.
+ Only set this if your local <tt>Makefile</tt> specifies additional suffix
+ match rules.</dd>
+ <dt><a name="TARGET"><tt>TARGET</tt></a></dt>
+ <dd>Specifies the name of the LLVM code generation target that the
+ current directory builds. Setting this variable enables additional rules to
+ build <tt>.inc</tt> files from <tt>.td</tt> files. </dd>
+ <dt><a name="TESTSUITE"><tt>TESTSUITE</tt></a></dt>
+ <dd>Specifies the directory of tests to run in <tt>llvm/test</tt>.</dd>
+ <dt><a name="TOOLNAME"><tt>TOOLNAME</tt></a></dt>
+ <dd>Specifies the name of the tool that the current directory should
+ build.</dd>
+ <dt><a name="TOOL_VERBOSE"><tt>TOOL_VERBOSE</tt></a></dt>
+ <dd>Implies VERBOSE and also tells each tool invoked to be verbose. This is
+ handy when you're trying to see the sub-tools invoked by each tool invoked
+ by the makefile. For example, this will pass <tt>-v</tt> to the GCC
+ compilers which causes it to print out the command lines it uses to invoke
+ sub-tools (compiler, assembler, linker).</dd>
+ <dt><a name="USEDLIBS"><tt>USEDLIBS</tt></a></dt>
+ <dd>Specifies the list of project libraries that will be linked into the
+ tool or library.</dd>
+ <dt><a name="VERBOSE"><tt>VERBOSE</tt></a></dt>
+ <dd>Tells the Makefile system to produce detailed output of what it is doing
+ instead of just summary comments. This will generate a LOT of output.</dd>
+ </dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="overvars">Override Variables</a></div>
+<div class="doc_text">
+ <p>Override variables can be used to override the default
+ values provided by the LLVM makefile system. These variables can be set in
+ several ways:</p>
+ <ul>
+ <li>In the environment (e.g. setenv, export) -- not recommended.</li>
+ <li>On the <tt>make</tt> command line -- recommended.</li>
+ <li>On the <tt>configure</tt> command line</li>
+ <li>In the Makefile (only <em>after</em> the inclusion of <a
+ href="#Makefile.common"><tt>$(LEVEL)/Makefile.common</tt></a>).</li>
+ </ul>
+ <p>The override variables are given below:</p>
+ <dl>
+ <dt><a name="AR"><tt>AR</tt></a> <small>(defaulted)</small></dt>
+ <dd>Specifies the path to the <tt>ar</tt> tool.</dd>
+ <dt><a name="BISON"><tt>BISON</tt></a><small>(configured)</small></dt>
+ <dd>Specifies the path to the <tt>bison</tt> tool.</dd>
+ <dt><a name="PROJ_OBJ_DIR"><tt>PROJ_OBJ_DIR</tt></a></dt>
+ <dd>The directory into which the products of build rules will be placed.
+ This might be the same as
+ <a href="#PROJ_SRC_DIR"><tt>PROJ_SRC_DIR</tt></a> but typically is
+ not.</dd>
+ <dt><a name="PROJ_SRC_DIR"><tt>PROJ_SRC_DIR</tt></a></dt>
+ <dd>The directory which contains the source files to be built.</dd>
+ <dt><a name="BZIP2"><tt>BZIP2</tt></a><small>(configured)</small></dt>
+ <dd>The path to the <tt>bzip2</tt> tool.</dd>
+ <dt><a name="CC"><tt>CC</tt></a><small>(configured)</small></dt>
+ <dd>The path to the 'C' compiler.</dd>
+ <dt><a name="CFLAGS"><tt>CFLAGS</tt></a></dt>
+ <dd>Additional flags to be passed to the 'C' compiler.</dd>
+ <dt><a name="CXX"><tt>CXX</tt></a></dt>
+ <dd>Specifies the path to the C++ compiler.</dd>
+ <dt><a name="CXXFLAGS"><tt>CXXFLAGS</tt></a></dt>
+ <dd>Additional flags to be passed to the C++ compiler.</dd>
+ <dt><a name="DATE"><tt>DATE<small>(configured)</small></tt></a></dt>
+ <dd>Specifies the path to the <tt>date</tt> program or any program that can
+ generate the current date and time on its standard output</dd>
+ <dt><a name="DOT"><tt>DOT</tt></a><small>(configured)</small></dt>
+ <dd>Specifies the path to the <tt>dot</tt> tool or <tt>false</tt> if there
+ isn't one.</dd>
+ <dt><a name="ECHO"><tt>ECHO</tt></a><small>(configured)</small></dt>
+ <dd>Specifies the path to the <tt>echo</tt> tool for printing output.</dd>
+ <dt><a name="ETAGS"><tt>ETAGS</tt></a><small>(configured)</small></dt>
+ <dd>Specifies the path to the <tt>etags</tt> tool.</dd>
+ <dt><a name="ETAGSFLAGS"><tt>ETAGSFLAGS</tt></a><small>(configured)</small>
+ </dt>
+ <dd>Provides flags to be passed to the <tt>etags</tt> tool.</dd>
+ <dt><a name="EXEEXT"><tt>EXEEXT</tt></a><small>(configured)</small></dt>
+ <dd>Provides the extension to be used on executables built by the makefiles.
+ The value may be empty on platforms that do not use file extensions for
+ executables (e.g. Unix).</dd>
+ <dt><a name="FLEX"><tt>FLEX</tt></a><small>(configured)</small></dt>
+ <dd>Specifies the path to the <tt>flex</tt> tool.</dd>
+ <dt><a name="INSTALL"><tt>INSTALL</tt></a><small>(configured)</small></dt>
+ <dd>Specifies the path to the <tt>install</tt> tool.</dd>
+ <dt><a name="LDFLAGS"><tt>LDFLAGS</tt></a><small>(configured)</small></dt>
+ <dd>Allows users to specify additional flags to pass to the linker.</dd>
+ <dt><a name="LIBS"><tt>LIBS</tt></a><small>(configured)</small></dt>
+ <dd>The list of libraries that should be linked with each tool.</dd>
+ <dt><a name="LIBTOOL"><tt>LIBTOOL</tt></a><small>(configured)</small></dt>
+ <dd>Specifies the path to the <tt>libtool</tt> tool. This tool is renamed
+ <tt>mklib</tt> by the <tt>configure</tt> script and always located in the
+ <dt><a name="LLVMAS"><tt>LLVMAS</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the <tt>llvm-as</tt> tool.</dd>
+ <dt><a name="LLVMGCC"><tt>LLVMGCC</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the LLVM version of the GCC 'C' Compiler</dd>
+ <dt><a name="LLVMGXX"><tt>LLVMGXX</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the LLVM version of the GCC C++ Compiler</dd>
+ <dt><a name="LLVMLD"><tt>LLVMLD</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the LLVM bitcode linker tool</dd>
+ <dt><a name="LLVM_OBJ_ROOT"><tt>LLVM_OBJ_ROOT</tt></a><small>(configured)
+ </small></dt>
+ <dd>Specifies the top directory into which the output of the build is
+ placed.</dd>
+ <dt><a name="LLVM_SRC_ROOT"><tt>LLVM_SRC_ROOT</tt></a><small>(configured)
+ </small></dt>
+ <dd>Specifies the top directory in which the sources are found.</dd>
+ <dt><a name="LLVM_TARBALL_NAME"><tt>LLVM_TARBALL_NAME</tt></a>
+ <small>(configured)</small></dt>
+ <dd>Specifies the name of the distribution tarball to create. This is
+ configured from the name of the project and its version number.</dd>
+ <dt><a name="MKDIR"><tt>MKDIR</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the <tt>mkdir</tt> tool that creates
+ directories.</dd>
+ <dt><a name="PLATFORMSTRIPOPTS"><tt>PLATFORMSTRIPOPTS</tt></a></dt>
+ <dd>The options to provide to the linker to specify that a stripped (no
+ symbols) executable should be built.</dd>
+ <dt><a name="RANLIB"><tt>RANLIB</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the <tt>ranlib</tt> tool.</dd>
+ <dt><a name="RM"><tt>RM</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the <tt>rm</tt> tool.</dd>
+ <dt><a name="SED"><tt>SED</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the <tt>sed</tt> tool.</dd>
+ <dt><a name="SHLIBEXT"><tt>SHLIBEXT</tt></a><small>(configured)</small></dt>
+ <dd>Provides the filename extension to use for shared libraries.</dd>
+ <dt><a name="TBLGEN"><tt>TBLGEN</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the <tt>tblgen</tt> tool.</dd>
+ <dt><a name="TAR"><tt>TAR</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the <tt>tar</tt> tool.</dd>
+ <dt><a name="ZIP"><tt>ZIP</tt></a><small>(defaulted)</small></dt>
+ <dd>Specifies the path to the <tt>zip</tt> tool.</dd>
+ </dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="getvars">Readable Variables</a></div>
+<div class="doc_text">
+ <p>Variables listed in the table below can be used by the user's Makefile but
+ should not be changed. Changing the value will generally cause the build to go
+ wrong, so don't do it.</p>
+ <dl>
+ <dt><a name="bindir"><tt>bindir</tt></a></dt>
+ <dd>The directory into which executables will ultimately be installed. This
+ value is derived from the <tt>--prefix</tt> option given to
+ <tt>configure</tt>.</dd>
+ <dt><a name="BuildMode"><tt>BuildMode</tt></a></dt>
+ <dd>The name of the type of build being performed: Debug, Release, or
+ Profile</dd>
+ <dt><a name="bitcode_libdir"><tt>bytecode_libdir</tt></a></dt>
+ <dd>The directory into which bitcode libraries will ultimately be
+ installed. This value is derived from the <tt>--prefix</tt> option given to
+ <tt>configure</tt>.</dd>
+ <dt><a name="ConfigureScriptFLAGS"><tt>ConfigureScriptFLAGS</tt></a></dt>
+ <dd>Additional flags given to the <tt>configure</tt> script when
+ reconfiguring.</dd>
+ <dt><a name="DistDir"><tt>DistDir</tt></a></dt>
+ <dd>The <em>current</em> directory for which a distribution copy is being
+ made.</dd>
+ <dt><a name="Echo"><tt>Echo</tt></a></dt>
+ <dd>The LLVM Makefile System output command. This provides the
+ <tt>llvm[n]</tt> prefix and starts with @ so the command itself is not
+ printed by <tt>make</tt>.</dd>
+ <dt><a name="EchoCmd"><tt>EchoCmd</tt></a></dt>
+ <dd> Same as <a href="#Echo"><tt>Echo</tt></a> but without the leading @.
+ </dd>
+ <dt><a name="includedir"><tt>includedir</tt></a></dt>
+ <dd>The directory into which include files will ultimately be installed.
+ This value is derived from the <tt>--prefix</tt> option given to
+ <tt>configure</tt>.</dd>
+ <dt><a name="libdir"><tt>libdir</tt></a></dt><dd></dd>
+ <dd>The directory into which native libraries will ultimately be installed.
+ This value is derived from the <tt>--prefix</tt> option given to
+ <tt>configure</tt>.</dd>
+ <dt><a name="LibDir"><tt>LibDir</tt></a></dt>
+ <dd>The configuration specific directory into which libraries are placed
+ before installation.</dd>
+ <dt><a name="MakefileConfig"><tt>MakefileConfig</tt></a></dt>
+ <dd>Full path of the <tt>Makefile.config</tt> file.</dd>
+ <dt><a name="MakefileConfigIn"><tt>MakefileConfigIn</tt></a></dt>
+ <dd>Full path of the <tt>Makefile.config.in</tt> file.</dd>
+ <dt><a name="ObjDir"><tt>ObjDir</tt></a></dt>
+ <dd>The configuration and directory specific directory where build objects
+ (compilation results) are placed.</dd>
+ <dt><a name="SubDirs"><tt>SubDirs</tt></a></dt>
+ <dd>The complete list of sub-directories of the current directory as
+ specified by other variables.</dd>
+ <dt><a name="Sources"><tt>Sources</tt></a></dt>
+ <dd>The complete list of source files.</dd>
+ <dt><a name="sysconfdir"><tt>sysconfdir</tt></a></dt>
+ <dd>The directory into which configuration files will ultimately be
+ installed. This value is derived from the <tt>--prefix</tt> option given to
+ <tt>configure</tt>.</dd>
+ <dt><a name="ToolDir"><tt>ToolDir</tt></a></dt>
+ <dd>The configuration specific directory into which executables are placed
+ before they are installed.</dd>
+ <dt><a name="TopDistDir"><tt>TopDistDir</tt></a></dt>
+ <dd>The top most directory into which the distribution files are copied.
+ </dd>
+ <dt><a name="Verb"><tt>Verb</tt></a></dt>
+ <dd>Use this as the first thing on your build script lines to enable or
+ disable verbose mode. It expands to either an @ (quiet mode) or nothing
+ (verbose mode). </dd>
+ </dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="intvars">Internal Variables</a></div>
+<div class="doc_text">
+ <p>Variables listed below are used by the LLVM Makefile System
+ and considered internal. You should not use these variables under any
+ circumstances.</p>
+ <p><tt>
+ Archive
+ AR.Flags
+ BaseNameSources
+ BCCompile.C
+ BCCompile.CXX
+ BCLinkLib
+ C.Flags
+ Compile.C
+ CompileCommonOpts
+ Compile.CXX
+ ConfigStatusScript
+ ConfigureScript
+ CPP.Flags
+ CPP.Flags
+ CXX.Flags
+ DependFiles
+ DestArchiveLib
+ DestBitcodeLib
+ DestModule
+ DestRelinkedLib
+ DestSharedLib
+ DestTool
+ DistAlways
+ DistCheckDir
+ DistCheckTop
+ DistFiles
+ DistName
+ DistOther
+ DistSources
+ DistSubDirs
+ DistTarBZ2
+ DistTarGZip
+ DistZip
+ ExtraLibs
+ FakeSources
+ INCFiles
+ InternalTargets
+ LD.Flags
+ LexFiles
+ LexOutput
+ LibName.A
+ LibName.BC
+ LibName.LA
+ LibName.O
+ LibTool.Flags
+ Link
+ LinkModule
+ LLVMLibDir
+ LLVMLibsOptions
+ LLVMLibsPaths
+ LLVMToolDir
+ LLVMUsedLibs
+ LocalTargets
+ LTCompile.C
+ LTCompile.CXX
+ LTInstall
+ Module
+ ObjectsBC
+ ObjectsLO
+ ObjectsO
+ ObjMakefiles
+ ParallelTargets
+ PreConditions
+ ProjLibsOptions
+ ProjLibsPaths
+ ProjUsedLibs
+ Ranlib
+ RecursiveTargets
+ Relink
+ SrcMakefiles
+ Strip
+ StripWarnMsg
+ TableGen
+ TDFiles
+ ToolBuildPath
+ TopLevelTargets
+ UserTargets
+ YaccFiles
+ YaccOutput
+ </tt></p>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/Passes.html b/docs/Passes.html
new file mode 100644
index 0000000..3edb121
--- /dev/null
+++ b/docs/Passes.html
@@ -0,0 +1,1026 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM's Analysis and Transform Passes</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">LLVM's Analysis and Transform Passes</div>
+
+<ol>
+ <li><a href="#intro">Introduction</a></li>
+ <li><a href="#analyses">Analysis Passes</a>
+ <li><a href="#transforms">Transform Passes</a></li>
+ <li><a href="#utilities">Utility Passes</a></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="intro">Introduction</a> </div>
+<div class="doc_text">
+ <p>This document serves as a high level summary of the optimization features
+ that LLVM provides. Optimizations are implemented as Passes that traverse some
+ portion of a program to either collect information or transform the program.
+ THe table below divides the passes that LLVM provides into three categories.
+ Analysis passes compute information that other passes can use or for debugging
+ or program visualization purposes. Transform passes can use (or invalidate)
+ the analysis passes. Transform passes all mutate the program in some way.
+ Utility passes provides ome utility but don't otherwise fit categorization.
+ For example passes to extract functions to bitcode or write a module to
+ bitcode are neither analysis nor transform passes.
+ <p>The table below provides a quick summary of each pass and links to the more
+ complete pass description later in the document.</p>
+</div>
+<div class="doc_text" >
+<table>
+<tr><th colspan="3"><b>ANALYSIS PASSES</b></th></tr>
+<tr><th>Option</th><th>Name</th><th>Directory</th></tr>
+<tr><td><a href="#aa-eval">-aa-eval</a></td><td>Exhaustive Alias Analysis Precision Evaluator</td></tr>
+<tr><td><a href="#anders-aa">-anders-aa</a></td><td>Andersen's Interprocedural Alias Analysis</td></tr>
+<tr><td><a href="#basicaa">-basicaa</a></td><td>Basic Alias Analysis (default AA impl)</td></tr>
+<tr><td><a href="#basiccg">-basiccg</a></td><td>Basic CallGraph Construction</td></tr>
+<tr><td><a href="#basicvn">-basicvn</a></td><td>Basic Value Numbering (default GVN impl)</td></tr>
+<tr><td><a href="#callgraph">-callgraph</a></td><td>Print a call graph</td></tr>
+<tr><td><a href="#callscc">-callscc</a></td><td>Print SCCs of the Call Graph</td></tr>
+<tr><td><a href="#cfgscc">-cfgscc</a></td><td>Print SCCs of each function CFG</td></tr>
+<tr><td><a href="#count-aa">-count-aa</a></td><td>Count Alias Analysis Query Responses</td></tr>
+<tr><td><a href="#debug-aa">-debug-aa</a></td><td>AA use debugger</td></tr>
+<tr><td><a href="#domfrontier">-domfrontier</a></td><td>Dominance Frontier Construction</td></tr>
+<tr><td><a href="#domset">-domset</a></td><td>Dominator Set Construction</td></tr>
+<tr><td><a href="#domtree">-domtree</a></td><td>Dominator Tree Construction</td></tr>
+<tr><td><a href="#etforest">-etforest</a></td><td>ET Forest Construction</td></tr>
+<tr><td><a href="#externalfnconstants">-externalfnconstants</a></td><td>Print external fn callsites passed constants</td></tr>
+<tr><td><a href="#globalsmodref-aa">-globalsmodref-aa</a></td><td>Simple mod/ref analysis for globals</td></tr>
+<tr><td><a href="#idom">-idom</a></td><td>Immediate Dominators Construction</td></tr>
+<tr><td><a href="#instcount">-instcount</a></td><td>Counts the various types of Instructions</td></tr>
+<tr><td><a href="#intervals">-intervals</a></td><td>Interval Partition Construction</td></tr>
+<tr><td><a href="#load-vn">-load-vn</a></td><td>Load Value Numbering</td></tr>
+<tr><td><a href="#loops">-loops</a></td><td>Natural Loop Construction</td></tr>
+<tr><td><a href="#no-aa">-no-aa</a></td><td>No Alias Analysis (always returns 'may' alias)</td></tr>
+<tr><td><a href="#no-profile">-no-profile</a></td><td>No Profile Information</td></tr>
+<tr><td><a href="#postdomfrontier">-postdomfrontier</a></td><td>Post-Dominance Frontier Construction</td></tr>
+<tr><td><a href="#postdomset">-postdomset</a></td><td>Post-Dominator Set Construction</td></tr>
+<tr><td><a href="#postdomtree">-postdomtree</a></td><td>Post-Dominator Tree Construction</td></tr>
+<tr><td><a href="#postetforest">-postetforest</a></td><td>Post-ET-Forest Construction</td></tr>
+<tr><td><a href="#postidom">-postidom</a></td><td>Immediate Post-Dominators Construction</td></tr>
+<tr><td><a href="#print">-print</a></td><td>Print function to stderr</td></tr>
+<tr><td><a href="#print-alias-sets">-print-alias-sets</a></td><td>Alias Set Printer</td></tr>
+<tr><td><a href="#print-callgraph">-print-callgraph</a></td><td>Print Call Graph to 'dot' file</td></tr>
+<tr><td><a href="#print-cfg">-print-cfg</a></td><td>Print CFG of function to 'dot' file</td></tr>
+<tr><td><a href="#print-cfg-only">-print-cfg-only</a></td><td>Print CFG of function to 'dot' file (with no function bodies)</td></tr>
+<tr><td><a href="#printm">-printm</a></td><td>Print module to stderr</td></tr>
+<tr><td><a href="#printusedtypes">-printusedtypes</a></td><td>Find Used Types</td></tr>
+<tr><td><a href="#profile-loader">-profile-loader</a></td><td>Load profile information from llvmprof.out</td></tr>
+<tr><td><a href="#scalar-evolution">-scalar-evolution</a></td><td>Scalar Evolution Analysis</td></tr>
+<tr><td><a href="#targetdata">-targetdata</a></td><td>Target Data Layout</td></tr>
+
+
+<tr><th colspan="3"><b>TRANSFORM PASSES</b></th></tr>
+<tr><th>Option</th><th>Name</th><th>Directory</th></tr>
+<tr><td><a href="#adce">-adce</a></td><td>Aggressive Dead Code Elimination</td></tr>
+<tr><td><a href="#argpromotion">-argpromotion</a></td><td>Promote 'by reference' arguments to scalars</td></tr>
+<tr><td><a href="#block-placement">-block-placement</a></td><td>Profile Guided Basic Block Placement</td></tr>
+<tr><td><a href="#break-crit-edges">-break-crit-edges</a></td><td>Break Critical Edges in CFG</td></tr>
+<tr><td><a href="#cee">-cee</a></td><td>Correlated Expression Elimination</td></tr>
+<tr><td><a href="#condprop">-condprop</a></td><td>Conditional Propagation</td></tr>
+<tr><td><a href="#constmerge">-constmerge</a></td><td>Merge Duplicate Global Constants</td></tr>
+<tr><td><a href="#constprop">-constprop</a></td><td>Simple constant propagation</td></tr>
+<tr><td><a href="#dce">-dce</a></td><td>Dead Code Elimination</td></tr>
+<tr><td><a href="#deadargelim">-deadargelim</a></td><td>Dead Argument Elimination</td></tr>
+<tr><td><a href="#deadtypeelim">-deadtypeelim</a></td><td>Dead Type Elimination</td></tr>
+<tr><td><a href="#die">-die</a></td><td>Dead Instruction Elimination</td></tr>
+<tr><td><a href="#dse">-dse</a></td><td>Dead Store Elimination</td></tr>
+<tr><td><a href="#gcse">-gcse</a></td><td>Global Common Subexpression Elimination</td></tr>
+<tr><td><a href="#globaldce">-globaldce</a></td><td>Dead Global Elimination</td></tr>
+<tr><td><a href="#globalopt">-globalopt</a></td><td>Global Variable Optimizer</td></tr>
+<tr><td><a href="#indmemrem">-indmemrem</a></td><td>Indirect Malloc and Free Removal</td></tr>
+<tr><td><a href="#indvars">-indvars</a></td><td>Canonicalize Induction Variables</td></tr>
+<tr><td><a href="#inline">-inline</a></td><td>Function Integration/Inlining</td></tr>
+<tr><td><a href="#insert-block-profiling">-insert-block-profiling</a></td><td>Insert instrumentation for block profiling</td></tr>
+<tr><td><a href="#insert-edge-profiling">-insert-edge-profiling</a></td><td>Insert instrumentation for edge profiling</td></tr>
+<tr><td><a href="#insert-function-profiling">-insert-function-profiling</a></td><td>Insert instrumentation for function profiling</td></tr>
+<tr><td><a href="#insert-null-profiling-rs">-insert-null-profiling-rs</a></td><td>Measure profiling framework overhead</td></tr>
+<tr><td><a href="#insert-rs-profiling-framework">-insert-rs-profiling-framework</a></td><td>Insert random sampling instrumentation framework</td></tr>
+<tr><td><a href="#instcombine">-instcombine</a></td><td>Combine redundant instructions</td></tr>
+<tr><td><a href="#internalize">-internalize</a></td><td>Internalize Global Symbols</td></tr>
+<tr><td><a href="#ipconstprop">-ipconstprop</a></td><td>Interprocedural constant propagation</td></tr>
+<tr><td><a href="#ipsccp">-ipsccp</a></td><td>Interprocedural Sparse Conditional Constant Propagation</td></tr>
+<tr><td><a href="#lcssa">-lcssa</a></td><td>Loop-Closed SSA Form Pass</td></tr>
+<tr><td><a href="#licm">-licm</a></td><td>Loop Invariant Code Motion</td></tr>
+<tr><td><a href="#loop-extract">-loop-extract</a></td><td>Extract loops into new functions</td></tr>
+<tr><td><a href="#loop-extract-single">-loop-extract-single</a></td><td>Extract at most one loop into a new function</td></tr>
+<tr><td><a href="#loop-reduce">-loop-reduce</a></td><td>Loop Strength Reduction</td></tr>
+<tr><td><a href="#loop-unroll">-loop-unroll</a></td><td>Unroll Loops</td></tr>
+<tr><td><a href="#loop-unswitch">-loop-unswitch</a></td><td>Unswitch Loops</td></tr>
+<tr><td><a href="#loopsimplify">-loopsimplify</a></td><td>Canonicalize Natural Loops</td></tr>
+<tr><td><a href="#lower-packed">-lower-packed</a></td><td>Lower Packed Operations</td></tr>
+<tr><td><a href="#lowerallocs">-lowerallocs</a></td><td>Lower allocations from instructions to calls</td></tr>
+<tr><td><a href="#lowergc">-lowergc</a></td><td>Lower GC intrinsics, for GCless code generators</td></tr>
+<tr><td><a href="#lowerinvoke">-lowerinvoke</a></td><td>Lower Invoke and Unwind</td></tr>
+<tr><td><a href="#lowerselect">-lowerselect</a></td><td>Lower Selects To Branches</td></tr>
+<tr><td><a href="#lowersetjmp">-lowersetjmp</a></td><td>Lower Set Jump</td></tr>
+<tr><td><a href="#lowerswitch">-lowerswitch</a></td><td>Lower SwitchInst's to branches</td></tr>
+<tr><td><a href="#mem2reg">-mem2reg</a></td><td>Promote Memory to Register</td></tr>
+<tr><td><a href="#mergereturn">-mergereturn</a></td><td>Unify Function Exit Nodes</td></tr>
+<tr><td><a href="#predsimplify">-predsimplify</a></td><td>Predicate Simplifier</td></tr>
+<tr><td><a href="#prune-eh">-prune-eh</a></td><td>Remove unused exception handling info</td></tr>
+<tr><td><a href="#raiseallocs">-raiseallocs</a></td><td>Raise allocations from calls to instructions</td></tr>
+<tr><td><a href="#reassociate">-reassociate</a></td><td>Reassociate Expressions</td></tr>
+<tr><td><a href="#reg2mem">-reg2mem</a></td><td>Demote Values to Memory</td></tr>
+<tr><td><a href="#scalarrepl">-scalarrepl</a></td><td>Scalar Replacement of Aggregates</td></tr>
+<tr><td><a href="#sccp">-sccp</a></td><td>Sparse Conditional Constant Propagation</td></tr>
+<tr><td><a href="#simplify-libcalls">-simplify-libcalls</a></td><td>Simplify well-known library calls</td></tr>
+<tr><td><a href="#simplifycfg">-simplifycfg</a></td><td>Simplify the CFG</td></tr>
+<tr><td><a href="#strip">-strip</a></td><td>Strip all symbols from a module</td></tr>
+<tr><td><a href="#tailcallelim">-tailcallelim</a></td><td>Tail Call Elimination</td></tr>
+<tr><td><a href="#tailduplicate">-tailduplicate</a></td><td>Tail Duplication</td></tr>
+<tr><th colspan="3"><b>UTILITY PASSES</b></th></tr>
+<tr><th>Option</th><th>Name</th><th>Directory</th></tr>
+<tr><td><a href="#deadarghaX0r">-deadarghaX0r</a></td><td>Dead Argument Hacking (BUGPOINT ONLY)</td></tr>
+<tr><td><a href="#extract-blocks">-extract-blocks</a></td><td>Extract Basic Blocks From Module (BUGPOINT ONLY)</td></tr>
+<tr><td><a href="#emitbitcode">-emitbitcode</a></td><td>Bitcode Writer</td></tr>
+<tr><td><a href="#verify">-verify</a></td><td>Module Verifier</td></tr>
+</table>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="example">Analysis Passes</a></div>
+<div class="doc_text">
+ <p>This section describes the LLVM Analysis Passes.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="aa-eval">Exhaustive Alias Analysis Precision Evaluator</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="anders-aa">Andersen's Interprocedural Alias Analysis</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="basicaa">Basic Alias Analysis</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="basiccg">Basic Call Graph Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="basicvn">Basic Value Numbering</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="callgraph">Print Call Graph</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="callscc">Print SCCs of the Call Graph</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="cfgscc">Print SCCs of each Function Control Flow Graph</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="count-aa">Count Alias Analysis Query Responses</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="debug-aa">Alias Analysis Usage Debugger</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="domfrontier">Dominance Frontier Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="domset">Dominator Set Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="domtree">Dominator Tree Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="etforest">ET Forest Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="basicvn">Basic Value Numbering</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="externalfnconstants">Print external fn callsites passed constants</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="globalsmodref-aa">Simple mod/ref analysis for globals</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="idom">Immediate Dominators Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="instcount">Counts the various types of Instructions</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="intervals">Interval Partition Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="load-vn">Load Value Numbering</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="loops">Natural Loop Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="no-aa">No Alias Analysis (always returns 'may' alias)</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="no-profile">No Profile Information</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="postdomfrontier">Post-Dominance Frontier Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="postdomset">Post-Dominator Set Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="postdomtree">Post-Dominator Tree Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="postetforest">Post-ET-Forest Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="postidom">Immediate Post-Dominators Construction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="print">Print function to stderr</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="print-alias-sets">Alias Set Printer</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="print-callgraph">Print Call Graph to 'dot' file</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="print-cfg">Print CFG of function to 'dot' file</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="print-cfg-only">Print CFG of function to 'dot' file (with no function bodies)</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="printm">Print module to stderr</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="printusedtypes">Find Used Types</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="profile-loader">Load profile information from llvmprof.out</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="scalar-evolution">Scalar Evolution Analysis</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="targetdata">Target Data Layout</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="transform">Transform Passes</a></div>
+<div class="doc_text">
+ <p>This section describes the LLVM Transform Passes.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="adce">Aggressive Dead Code Elimination</a>
+</div>
+<div class="doc_text">
+ <p>ADCE aggressively tries to eliminate code. This pass is similar to
+ <a href="#dce">DCE</a> but it assumes that values are dead until proven
+ otherwise. This is similar to <a href="#sccp">SCCP</a>, except applied to
+ the liveness of values.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="argpromotion">Promote 'by reference' arguments to scalars</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="block-placement">Profile Guided Basic Block Placement</a>
+</div>
+<div class="doc_text">
+ <p>This pass implements a very simple profile guided basic block placement
+ algorithm. The idea is to put frequently executed blocks together at the
+ start of the function, and hopefully increase the number of fall-through
+ conditional branches. If there is no profile information for a particular
+ function, this pass basically orders blocks in depth-first order.</p>
+ <p>The algorithm implemented here is basically "Algo1" from "Profile Guided
+ Code Positioning" by Pettis and Hansen, except that it uses basic block
+ counts instead of edge counts. This could be improved in many ways, but is
+ very simple for now.</p>
+ <p>Basically we "place" the entry block, then loop over all successors in a
+ DFO, placing the most frequently executed successor until we run out of
+ blocks. Did we mention that this was <b>extremely</b> simplistic? This is
+ also much slower than it could be. When it becomes important, this pass
+ will be rewritten to use a better algorithm, and then we can worry about
+ efficiency.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="break-crit-edges">Break Critical Edges in CFG</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="cee">Correlated Expression Elimination</a>
+</div>
+<div class="doc_text">
+ <p>Correlated Expression Elimination propagates information from conditional
+ branches to blocks dominated by destinations of the branch. It propagates
+ information from the condition check itself into the body of the branch,
+ allowing transformations like these for example:
+ <pre>
+ if (i == 7)
+ ... 4*i; // constant propagation
+
+ M = i+1; N = j+1;
+ if (i == j)
+ X = M-N; // = M-M == 0;
+ </pre></p>
+
+ <p>This is called Correlated Expression Elimination because we eliminate or
+ simplify expressions that are correlated with the direction of a branch. In
+ this way we use static information to give us some information about the
+ dynamic value of a variable.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="condprop">Conditional Propagation</a>
+</div>
+<div class="doc_text">
+ <p>This pass propagates information about conditional expressions through the
+ program, allowing it to eliminate conditional branches in some cases.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="constmerge">Merge Duplicate Global Constants</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="constprop">Constant Propagation</a>
+</div>
+<div class="doc_text">
+ <p>This file implements constant propagation and merging. It looks for
+ instructions involving only constant operands and replaces them with a
+ constant value instead of an instruction. For example:
+ <pre>add i32 1, 2</pre><br/>
+ becomes
+ <pre>i32 3</pre></p>
+ <p>NOTE: this pass has a habit of making definitions be dead. It is a good
+ idea to to run a <a href="#die">DIE</a> (Dead Instruction Elimination) pass
+ sometime after running this pass.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="dce">Dead Code Elimination</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="deadargelim">Dead Argument Elimination</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="deadtypeelim">Dead Type Elimination</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="die">Dead Instruction Elimination</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="dse">Dead Store Elimination</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="gcse">Global Common Subexpression Elimination</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="globaldce">Dead Global Elimination</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="globalopt">Global Variable Optimizer</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="indmemrem">Indirect Malloc and Free Removal</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="indvars">Canonicalize Induction Variables</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="inline">Function Integration/Inlining</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="insert-block-profiling">Insert instrumentation for block profiling</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="insert-edge-profiling">Insert instrumentation for edge profiling</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="insert-function-profiling">Insert instrumentation for function profiling</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="insert-null-profiling-rs">Measure profiling framework overhead</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="insert-rs-profiling-framework">Insert random sampling instrumentation framework</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="instcombine">Combine redundant instructions</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="internalize">Internalize Global Symbols</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="ipconstprop">Interprocedural constant propagation</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="ipsccp">Interprocedural Sparse Conditional Constant Propagation</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="lcssa">Loop-Closed SSA Form Pass</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="licm">Loop Invariant Code Motion</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="loop-extract">Extract loops into new functions</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="loop-extract-single">Extract at most one loop into a new function</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="loop-reduce">Loop Strength Reduction</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="loop-unroll">Unroll Loops</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="loop-unswitch">Unswitch Loops</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="loopsimplify">Canonicalize Natural Loops</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="lower-packed">Lower Packed Operations</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="lowerallocs">Lower allocations from instructions to calls</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="lowergc">Lower GC intrinsics, for GCless code generators</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="lowerinvoke">Lower Invoke and Unwind</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="lowerselect">Lower Selects To Branches</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="lowersetjmp">Lower Set Jump</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="lowerswitch">Lower SwitchInst's to branches</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="mem2reg">Promote Memory to Register</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="mergereturn">Unify Function Exit Nodes</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="predsimplify">Predicate Simplifier</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="prune-eh">Remove unused exception handling info</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="raiseallocs">Raise allocations from calls to instructions</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="reassociate">Reassociate Expressions</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="reg2mem">Demote Values to Memory</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="scalarrepl">Scalar Replacement of Aggregates</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="sccp">Sparse Conditional Constant Propagation</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="simplify-libcalls">Simplify well-known library calls</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="simplifycfg">Simplify the CFG</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="strip">Strip all symbols from a module</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="tailcallelim">Tail Call Elimination</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="tailduplicate">Tail Duplication</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="transform">Utility Passes</a></div>
+<div class="doc_text">
+ <p>This section describes the LLVM Utility Passes.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="deadarghaX0r">Dead Argument Hacking (BUGPOINT ONLY)</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="extract-blocks">Extract Basic Blocks From Module (BUGPOINT ONLY)</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="emitbitcode">Bitcode Writer</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+ <a name="verify">Module Verifier</a>
+</div>
+<div class="doc_text">
+ <p>Yet to be written.</p>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
new file mode 100644
index 0000000..ff18d1c
--- /dev/null
+++ b/docs/ProgrammersManual.html
@@ -0,0 +1,3090 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM Programmer's Manual</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ LLVM Programmer's Manual
+</div>
+
+<ol>
+ <li><a href="#introduction">Introduction</a></li>
+ <li><a href="#general">General Information</a>
+ <ul>
+ <li><a href="#stl">The C++ Standard Template Library</a></li>
+<!--
+ <li>The <tt>-time-passes</tt> option</li>
+ <li>How to use the LLVM Makefile system</li>
+ <li>How to write a regression test</li>
+
+-->
+ </ul>
+ </li>
+ <li><a href="#apis">Important and useful LLVM APIs</a>
+ <ul>
+ <li><a href="#isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt>
+and <tt>dyn_cast&lt;&gt;</tt> templates</a> </li>
+ <li><a href="#DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt>
+option</a>
+ <ul>
+ <li><a href="#DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt>
+and the <tt>-debug-only</tt> option</a> </li>
+ </ul>
+ </li>
+ <li><a href="#Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
+option</a></li>
+<!--
+ <li>The <tt>InstVisitor</tt> template
+ <li>The general graph API
+-->
+ <li><a href="#ViewGraph">Viewing graphs while debugging code</a></li>
+ </ul>
+ </li>
+ <li><a href="#datastructure">Picking the Right Data Structure for a Task</a>
+ <ul>
+ <li><a href="#ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
+ <ul>
+ <li><a href="#dss_fixedarrays">Fixed Size Arrays</a></li>
+ <li><a href="#dss_heaparrays">Heap Allocated Arrays</a></li>
+ <li><a href="#dss_smallvector">"llvm/ADT/SmallVector.h"</a></li>
+ <li><a href="#dss_vector">&lt;vector&gt;</a></li>
+ <li><a href="#dss_deque">&lt;deque&gt;</a></li>
+ <li><a href="#dss_list">&lt;list&gt;</a></li>
+ <li><a href="#dss_ilist">llvm/ADT/ilist</a></li>
+ <li><a href="#dss_other">Other Sequential Container Options</a></li>
+ </ul></li>
+ <li><a href="#ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
+ <ul>
+ <li><a href="#dss_sortedvectorset">A sorted 'vector'</a></li>
+ <li><a href="#dss_smallset">"llvm/ADT/SmallSet.h"</a></li>
+ <li><a href="#dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a></li>
+ <li><a href="#dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a></li>
+ <li><a href="#dss_set">&lt;set&gt;</a></li>
+ <li><a href="#dss_setvector">"llvm/ADT/SetVector.h"</a></li>
+ <li><a href="#dss_uniquevector">"llvm/ADT/UniqueVector.h"</a></li>
+ <li><a href="#dss_otherset">Other Set-Like ContainerOptions</a></li>
+ </ul></li>
+ <li><a href="#ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
+ <ul>
+ <li><a href="#dss_sortedvectormap">A sorted 'vector'</a></li>
+ <li><a href="#dss_stringmap">"llvm/ADT/StringMap.h"</a></li>
+ <li><a href="#dss_indexedmap">"llvm/ADT/IndexedMap.h"</a></li>
+ <li><a href="#dss_densemap">"llvm/ADT/DenseMap.h"</a></li>
+ <li><a href="#dss_map">&lt;map&gt;</a></li>
+ <li><a href="#dss_othermap">Other Map-Like Container Options</a></li>
+ </ul></li>
+ </ul>
+ </li>
+ <li><a href="#common">Helpful Hints for Common Operations</a>
+ <ul>
+ <li><a href="#inspection">Basic Inspection and Traversal Routines</a>
+ <ul>
+ <li><a href="#iterate_function">Iterating over the <tt>BasicBlock</tt>s
+in a <tt>Function</tt></a> </li>
+ <li><a href="#iterate_basicblock">Iterating over the <tt>Instruction</tt>s
+in a <tt>BasicBlock</tt></a> </li>
+ <li><a href="#iterate_institer">Iterating over the <tt>Instruction</tt>s
+in a <tt>Function</tt></a> </li>
+ <li><a href="#iterate_convert">Turning an iterator into a
+class pointer</a> </li>
+ <li><a href="#iterate_complex">Finding call sites: a more
+complex example</a> </li>
+ <li><a href="#calls_and_invokes">Treating calls and invokes
+the same way</a> </li>
+ <li><a href="#iterate_chains">Iterating over def-use &amp;
+use-def chains</a> </li>
+ </ul>
+ </li>
+ <li><a href="#simplechanges">Making simple changes</a>
+ <ul>
+ <li><a href="#schanges_creating">Creating and inserting new
+ <tt>Instruction</tt>s</a> </li>
+ <li><a href="#schanges_deleting">Deleting <tt>Instruction</tt>s</a> </li>
+ <li><a href="#schanges_replacing">Replacing an <tt>Instruction</tt>
+with another <tt>Value</tt></a> </li>
+ <li><a href="#schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a> </li>
+ </ul>
+ </li>
+<!--
+ <li>Working with the Control Flow Graph
+ <ul>
+ <li>Accessing predecessors and successors of a <tt>BasicBlock</tt>
+ <li>
+ <li>
+ </ul>
+-->
+ </ul>
+ </li>
+
+ <li><a href="#advanced">Advanced Topics</a>
+ <ul>
+ <li><a href="#TypeResolve">LLVM Type Resolution</a>
+ <ul>
+ <li><a href="#BuildRecType">Basic Recursive Type Construction</a></li>
+ <li><a href="#refineAbstractTypeTo">The <tt>refineAbstractTypeTo</tt> method</a></li>
+ <li><a href="#PATypeHolder">The PATypeHolder Class</a></li>
+ <li><a href="#AbstractTypeUser">The AbstractTypeUser Class</a></li>
+ </ul></li>
+
+ <li><a href="#SymbolTable">The <tt>ValueSymbolTable</tt> and <tt>TypeSymbolTable</tt> classes </a></li>
+ </ul></li>
+
+ <li><a href="#coreclasses">The Core LLVM Class Hierarchy Reference</a>
+ <ul>
+ <li><a href="#Type">The <tt>Type</tt> class</a> </li>
+ <li><a href="#Module">The <tt>Module</tt> class</a></li>
+ <li><a href="#Value">The <tt>Value</tt> class</a>
+ <ul>
+ <li><a href="#User">The <tt>User</tt> class</a>
+ <ul>
+ <li><a href="#Instruction">The <tt>Instruction</tt> class</a></li>
+ <li><a href="#Constant">The <tt>Constant</tt> class</a>
+ <ul>
+ <li><a href="#GlobalValue">The <tt>GlobalValue</tt> class</a>
+ <ul>
+ <li><a href="#Function">The <tt>Function</tt> class</a></li>
+ <li><a href="#GlobalVariable">The <tt>GlobalVariable</tt> class</a></li>
+ </ul>
+ </li>
+ </ul>
+ </li>
+ </ul>
+ </li>
+ <li><a href="#BasicBlock">The <tt>BasicBlock</tt> class</a></li>
+ <li><a href="#Argument">The <tt>Argument</tt> class</a></li>
+ </ul>
+ </li>
+ </ul>
+ </li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>,
+ <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a>,
+ <a href="mailto:jstanley@cs.uiuc.edu">Joel Stanley</a>, and
+ <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="introduction">Introduction </a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is meant to highlight some of the important classes and
+interfaces available in the LLVM source-base. This manual is not
+intended to explain what LLVM is, how it works, and what LLVM code looks
+like. It assumes that you know the basics of LLVM and are interested
+in writing transformations or otherwise analyzing or manipulating the
+code.</p>
+
+<p>This document should get you oriented so that you can find your
+way in the continuously growing source code that makes up the LLVM
+infrastructure. Note that this manual is not intended to serve as a
+replacement for reading the source code, so if you think there should be
+a method in one of these classes to do something, but it's not listed,
+check the source. Links to the <a href="/doxygen/">doxygen</a> sources
+are provided to make this as easy as possible.</p>
+
+<p>The first section of this document describes general information that is
+useful to know when working in the LLVM infrastructure, and the second describes
+the Core LLVM classes. In the future this manual will be extended with
+information describing how to use extension libraries, such as dominator
+information, CFG traversal routines, and useful utilities like the <tt><a
+href="/doxygen/InstVisitor_8h-source.html">InstVisitor</a></tt> template.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="general">General Information</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section contains general information that is useful if you are working
+in the LLVM source-base, but that isn't specific to any particular API.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="stl">The C++ Standard Template Library</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM makes heavy use of the C++ Standard Template Library (STL),
+perhaps much more than you are used to, or have seen before. Because of
+this, you might want to do a little background reading in the
+techniques used and capabilities of the library. There are many good
+pages that discuss the STL, and several books on the subject that you
+can get, so it will not be discussed in this document.</p>
+
+<p>Here are some useful links:</p>
+
+<ol>
+
+<li><a href="http://www.dinkumware.com/refxcpp.html">Dinkumware C++ Library
+reference</a> - an excellent reference for the STL and other parts of the
+standard C++ library.</li>
+
+<li><a href="http://www.tempest-sw.com/cpp/">C++ In a Nutshell</a> - This is an
+O'Reilly book in the making. It has a decent
+Standard Library
+Reference that rivals Dinkumware's, and is unfortunately no longer free since the book has been
+published.</li>
+
+<li><a href="http://www.parashift.com/c++-faq-lite/">C++ Frequently Asked
+Questions</a></li>
+
+<li><a href="http://www.sgi.com/tech/stl/">SGI's STL Programmer's Guide</a> -
+Contains a useful <a
+href="http://www.sgi.com/tech/stl/stl_introduction.html">Introduction to the
+STL</a>.</li>
+
+<li><a href="http://www.research.att.com/%7Ebs/C++.html">Bjarne Stroustrup's C++
+Page</a></li>
+
+<li><a href="http://64.78.49.204/">
+Bruce Eckel's Thinking in C++, 2nd ed. Volume 2 Revision 4.0 (even better, get
+the book).</a></li>
+
+</ol>
+
+<p>You are also encouraged to take a look at the <a
+href="CodingStandards.html">LLVM Coding Standards</a> guide which focuses on how
+to write maintainable code more than where to put your curly braces.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="stl">Other useful references</a>
+</div>
+
+<div class="doc_text">
+
+<ol>
+<li><a href="http://www.psc.edu/%7Esemke/cvs_branches.html">CVS
+Branch and Tag Primer</a></li>
+<li><a href="http://www.fortran-2000.com/ArnaudRecipes/sharedlib.html">Using
+static and shared libraries across platforms</a></li>
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="apis">Important and useful LLVM APIs</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Here we highlight some LLVM APIs that are generally useful and good to
+know about when writing transformations.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt> and
+ <tt>dyn_cast&lt;&gt;</tt> templates</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM source-base makes extensive use of a custom form of RTTI.
+These templates have many similarities to the C++ <tt>dynamic_cast&lt;&gt;</tt>
+operator, but they don't have some drawbacks (primarily stemming from
+the fact that <tt>dynamic_cast&lt;&gt;</tt> only works on classes that
+have a v-table). Because they are used so often, you must know what they
+do and how they work. All of these templates are defined in the <a
+ href="/doxygen/Casting_8h-source.html"><tt>llvm/Support/Casting.h</tt></a>
+file (note that you very rarely have to include this file directly).</p>
+
+<dl>
+ <dt><tt>isa&lt;&gt;</tt>: </dt>
+
+ <dd><p>The <tt>isa&lt;&gt;</tt> operator works exactly like the Java
+ "<tt>instanceof</tt>" operator. It returns true or false depending on whether
+ a reference or pointer points to an instance of the specified class. This can
+ be very useful for constraint checking of various sorts (example below).</p>
+ </dd>
+
+ <dt><tt>cast&lt;&gt;</tt>: </dt>
+
+ <dd><p>The <tt>cast&lt;&gt;</tt> operator is a "checked cast" operation. It
+ converts a pointer or reference from a base class to a derived cast, causing
+ an assertion failure if it is not really an instance of the right type. This
+ should be used in cases where you have some information that makes you believe
+ that something is of the right type. An example of the <tt>isa&lt;&gt;</tt>
+ and <tt>cast&lt;&gt;</tt> template is:</p>
+
+<div class="doc_code">
+<pre>
+static bool isLoopInvariant(const <a href="#Value">Value</a> *V, const Loop *L) {
+ if (isa&lt;<a href="#Constant">Constant</a>&gt;(V) || isa&lt;<a href="#Argument">Argument</a>&gt;(V) || isa&lt;<a href="#GlobalValue">GlobalValue</a>&gt;(V))
+ return true;
+
+ // <i>Otherwise, it must be an instruction...</i>
+ return !L-&gt;contains(cast&lt;<a href="#Instruction">Instruction</a>&gt;(V)-&gt;getParent());
+}
+</pre>
+</div>
+
+ <p>Note that you should <b>not</b> use an <tt>isa&lt;&gt;</tt> test followed
+ by a <tt>cast&lt;&gt;</tt>, for that use the <tt>dyn_cast&lt;&gt;</tt>
+ operator.</p>
+
+ </dd>
+
+ <dt><tt>dyn_cast&lt;&gt;</tt>:</dt>
+
+ <dd><p>The <tt>dyn_cast&lt;&gt;</tt> operator is a "checking cast" operation.
+ It checks to see if the operand is of the specified type, and if so, returns a
+ pointer to it (this operator does not work with references). If the operand is
+ not of the correct type, a null pointer is returned. Thus, this works very
+ much like the <tt>dynamic_cast&lt;&gt;</tt> operator in C++, and should be
+ used in the same circumstances. Typically, the <tt>dyn_cast&lt;&gt;</tt>
+ operator is used in an <tt>if</tt> statement or some other flow control
+ statement like this:</p>
+
+<div class="doc_code">
+<pre>
+if (<a href="#AllocationInst">AllocationInst</a> *AI = dyn_cast&lt;<a href="#AllocationInst">AllocationInst</a>&gt;(Val)) {
+ // <i>...</i>
+}
+</pre>
+</div>
+
+ <p>This form of the <tt>if</tt> statement effectively combines together a call
+ to <tt>isa&lt;&gt;</tt> and a call to <tt>cast&lt;&gt;</tt> into one
+ statement, which is very convenient.</p>
+
+ <p>Note that the <tt>dyn_cast&lt;&gt;</tt> operator, like C++'s
+ <tt>dynamic_cast&lt;&gt;</tt> or Java's <tt>instanceof</tt> operator, can be
+ abused. In particular, you should not use big chained <tt>if/then/else</tt>
+ blocks to check for lots of different variants of classes. If you find
+ yourself wanting to do this, it is much cleaner and more efficient to use the
+ <tt>InstVisitor</tt> class to dispatch over the instruction type directly.</p>
+
+ </dd>
+
+ <dt><tt>cast_or_null&lt;&gt;</tt>: </dt>
+
+ <dd><p>The <tt>cast_or_null&lt;&gt;</tt> operator works just like the
+ <tt>cast&lt;&gt;</tt> operator, except that it allows for a null pointer as an
+ argument (which it then propagates). This can sometimes be useful, allowing
+ you to combine several null checks into one.</p></dd>
+
+ <dt><tt>dyn_cast_or_null&lt;&gt;</tt>: </dt>
+
+ <dd><p>The <tt>dyn_cast_or_null&lt;&gt;</tt> operator works just like the
+ <tt>dyn_cast&lt;&gt;</tt> operator, except that it allows for a null pointer
+ as an argument (which it then propagates). This can sometimes be useful,
+ allowing you to combine several null checks into one.</p></dd>
+
+</dl>
+
+<p>These five templates can be used with any classes, whether they have a
+v-table or not. To add support for these templates, you simply need to add
+<tt>classof</tt> static methods to the class you are interested casting
+to. Describing this is currently outside the scope of this document, but there
+are lots of examples in the LLVM source base.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt> option</a>
+</div>
+
+<div class="doc_text">
+
+<p>Often when working on your pass you will put a bunch of debugging printouts
+and other code into your pass. After you get it working, you want to remove
+it, but you may need it again in the future (to work out new bugs that you run
+across).</p>
+
+<p> Naturally, because of this, you don't want to delete the debug printouts,
+but you don't want them to always be noisy. A standard compromise is to comment
+them out, allowing you to enable them if you need them in the future.</p>
+
+<p>The "<tt><a href="/doxygen/Debug_8h-source.html">llvm/Support/Debug.h</a></tt>"
+file provides a macro named <tt>DEBUG()</tt> that is a much nicer solution to
+this problem. Basically, you can put arbitrary code into the argument of the
+<tt>DEBUG</tt> macro, and it is only executed if '<tt>opt</tt>' (or any other
+tool) is run with the '<tt>-debug</tt>' command line argument:</p>
+
+<div class="doc_code">
+<pre>
+DOUT &lt;&lt; "I am here!\n";
+</pre>
+</div>
+
+<p>Then you can run your pass like this:</p>
+
+<div class="doc_code">
+<pre>
+$ opt &lt; a.bc &gt; /dev/null -mypass
+<i>&lt;no output&gt;</i>
+$ opt &lt; a.bc &gt; /dev/null -mypass -debug
+I am here!
+</pre>
+</div>
+
+<p>Using the <tt>DEBUG()</tt> macro instead of a home-brewed solution allows you
+to not have to create "yet another" command line option for the debug output for
+your pass. Note that <tt>DEBUG()</tt> macros are disabled for optimized builds,
+so they do not cause a performance impact at all (for the same reason, they
+should also not contain side-effects!).</p>
+
+<p>One additional nice thing about the <tt>DEBUG()</tt> macro is that you can
+enable or disable it directly in gdb. Just use "<tt>set DebugFlag=0</tt>" or
+"<tt>set DebugFlag=1</tt>" from the gdb if the program is running. If the
+program hasn't been started yet, you can always just run it with
+<tt>-debug</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt> and
+ the <tt>-debug-only</tt> option</a>
+</div>
+
+<div class="doc_text">
+
+<p>Sometimes you may find yourself in a situation where enabling <tt>-debug</tt>
+just turns on <b>too much</b> information (such as when working on the code
+generator). If you want to enable debug information with more fine-grained
+control, you define the <tt>DEBUG_TYPE</tt> macro and the <tt>-debug</tt> only
+option as follows:</p>
+
+<div class="doc_code">
+<pre>
+DOUT &lt;&lt; "No debug type\n";
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "foo"
+DOUT &lt;&lt; "'foo' debug type\n";
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "bar"
+DOUT &lt;&lt; "'bar' debug type\n";
+#undef DEBUG_TYPE
+#define DEBUG_TYPE ""
+DOUT &lt;&lt; "No debug type (2)\n";
+</pre>
+</div>
+
+<p>Then you can run your pass like this:</p>
+
+<div class="doc_code">
+<pre>
+$ opt &lt; a.bc &gt; /dev/null -mypass
+<i>&lt;no output&gt;</i>
+$ opt &lt; a.bc &gt; /dev/null -mypass -debug
+No debug type
+'foo' debug type
+'bar' debug type
+No debug type (2)
+$ opt &lt; a.bc &gt; /dev/null -mypass -debug-only=foo
+'foo' debug type
+$ opt &lt; a.bc &gt; /dev/null -mypass -debug-only=bar
+'bar' debug type
+</pre>
+</div>
+
+<p>Of course, in practice, you should only set <tt>DEBUG_TYPE</tt> at the top of
+a file, to specify the debug type for the entire module (if you do this before
+you <tt>#include "llvm/Support/Debug.h"</tt>, you don't have to insert the ugly
+<tt>#undef</tt>'s). Also, you should use names more meaningful than "foo" and
+"bar", because there is no system in place to ensure that names do not
+conflict. If two different modules use the same string, they will all be turned
+on when the name is specified. This allows, for example, all debug information
+for instruction scheduling to be enabled with <tt>-debug-type=InstrSched</tt>,
+even if the source lives in multiple files.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
+ option</a>
+</div>
+
+<div class="doc_text">
+
+<p>The "<tt><a
+href="/doxygen/Statistic_8h-source.html">llvm/ADT/Statistic.h</a></tt>" file
+provides a class named <tt>Statistic</tt> that is used as a unified way to
+keep track of what the LLVM compiler is doing and how effective various
+optimizations are. It is useful to see what optimizations are contributing to
+making a particular program run faster.</p>
+
+<p>Often you may run your pass on some big program, and you're interested to see
+how many times it makes a certain transformation. Although you can do this with
+hand inspection, or some ad-hoc method, this is a real pain and not very useful
+for big programs. Using the <tt>Statistic</tt> class makes it very easy to
+keep track of this information, and the calculated information is presented in a
+uniform manner with the rest of the passes being executed.</p>
+
+<p>There are many examples of <tt>Statistic</tt> uses, but the basics of using
+it are as follows:</p>
+
+<ol>
+ <li><p>Define your statistic like this:</p>
+
+<div class="doc_code">
+<pre>
+#define <a href="#DEBUG_TYPE">DEBUG_TYPE</a> "mypassname" <i>// This goes before any #includes.</i>
+STATISTIC(NumXForms, "The # of times I did stuff");
+</pre>
+</div>
+
+ <p>The <tt>STATISTIC</tt> macro defines a static variable, whose name is
+ specified by the first argument. The pass name is taken from the DEBUG_TYPE
+ macro, and the description is taken from the second argument. The variable
+ defined ("NumXForms" in this case) acts like an unsigned integer.</p></li>
+
+ <li><p>Whenever you make a transformation, bump the counter:</p>
+
+<div class="doc_code">
+<pre>
+++NumXForms; // <i>I did stuff!</i>
+</pre>
+</div>
+
+ </li>
+ </ol>
+
+ <p>That's all you have to do. To get '<tt>opt</tt>' to print out the
+ statistics gathered, use the '<tt>-stats</tt>' option:</p>
+
+<div class="doc_code">
+<pre>
+$ opt -stats -mypassname &lt; program.bc &gt; /dev/null
+<i>... statistics output ...</i>
+</pre>
+</div>
+
+ <p> When running <tt>opt</tt> on a C file from the SPEC benchmark
+suite, it gives a report that looks like this:</p>
+
+<div class="doc_code">
+<pre>
+ 7646 bitcodewriter - Number of normal instructions
+ 725 bitcodewriter - Number of oversized instructions
+ 129996 bitcodewriter - Number of bitcode bytes written
+ 2817 raise - Number of insts DCEd or constprop'd
+ 3213 raise - Number of cast-of-self removed
+ 5046 raise - Number of expression trees converted
+ 75 raise - Number of other getelementptr's formed
+ 138 raise - Number of load/store peepholes
+ 42 deadtypeelim - Number of unused typenames removed from symtab
+ 392 funcresolve - Number of varargs functions resolved
+ 27 globaldce - Number of global variables removed
+ 2 adce - Number of basic blocks removed
+ 134 cee - Number of branches revectored
+ 49 cee - Number of setcc instruction eliminated
+ 532 gcse - Number of loads removed
+ 2919 gcse - Number of instructions removed
+ 86 indvars - Number of canonical indvars added
+ 87 indvars - Number of aux indvars removed
+ 25 instcombine - Number of dead inst eliminate
+ 434 instcombine - Number of insts combined
+ 248 licm - Number of load insts hoisted
+ 1298 licm - Number of insts hoisted to a loop pre-header
+ 3 licm - Number of insts hoisted to multiple loop preds (bad, no loop pre-header)
+ 75 mem2reg - Number of alloca's promoted
+ 1444 cfgsimplify - Number of blocks simplified
+</pre>
+</div>
+
+<p>Obviously, with so many optimizations, having a unified framework for this
+stuff is very nice. Making your pass fit well into the framework makes it more
+maintainable and useful.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ViewGraph">Viewing graphs while debugging code</a>
+</div>
+
+<div class="doc_text">
+
+<p>Several of the important data structures in LLVM are graphs: for example
+CFGs made out of LLVM <a href="#BasicBlock">BasicBlock</a>s, CFGs made out of
+LLVM <a href="CodeGenerator.html#machinebasicblock">MachineBasicBlock</a>s, and
+<a href="CodeGenerator.html#selectiondag_intro">Instruction Selection
+DAGs</a>. In many cases, while debugging various parts of the compiler, it is
+nice to instantly visualize these graphs.</p>
+
+<p>LLVM provides several callbacks that are available in a debug build to do
+exactly that. If you call the <tt>Function::viewCFG()</tt> method, for example,
+the current LLVM tool will pop up a window containing the CFG for the function
+where each basic block is a node in the graph, and each node contains the
+instructions in the block. Similarly, there also exists
+<tt>Function::viewCFGOnly()</tt> (does not include the instructions), the
+<tt>MachineFunction::viewCFG()</tt> and <tt>MachineFunction::viewCFGOnly()</tt>,
+and the <tt>SelectionDAG::viewGraph()</tt> methods. Within GDB, for example,
+you can usually use something like <tt>call DAG.viewGraph()</tt> to pop
+up a window. Alternatively, you can sprinkle calls to these functions in your
+code in places you want to debug.</p>
+
+<p>Getting this to work requires a small amount of configuration. On Unix
+systems with X11, install the <a href="http://www.graphviz.org">graphviz</a>
+toolkit, and make sure 'dot' and 'gv' are in your path. If you are running on
+Mac OS/X, download and install the Mac OS/X <a
+href="http://www.pixelglow.com/graphviz/">Graphviz program</a>, and add
+<tt>/Applications/Graphviz.app/Contents/MacOS/</tt> (or wherever you install
+it) to your path. Once in your system and path are set up, rerun the LLVM
+configure script and rebuild LLVM to enable this functionality.</p>
+
+<p><tt>SelectionDAG</tt> has been extended to make it easier to locate
+<i>interesting</i> nodes in large complex graphs. From gdb, if you
+<tt>call DAG.setGraphColor(<i>node</i>, "<i>color</i>")</tt>, then the
+next <tt>call DAG.viewGraph()</tt> would highlight the node in the
+specified color (choices of colors can be found at <a
+href="http://www.graphviz.org/doc/info/colors.html">colors</a>.) More
+complex node attributes can be provided with <tt>call
+DAG.setGraphAttrs(<i>node</i>, "<i>attributes</i>")</tt> (choices can be
+found at <a href="http://www.graphviz.org/doc/info/attrs.html">Graph
+Attributes</a>.) If you want to restart and clear all the current graph
+attributes, then you can <tt>call DAG.clearGraphAttrs()</tt>. </p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="datastructure">Picking the Right Data Structure for a Task</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM has a plethora of data structures in the <tt>llvm/ADT/</tt> directory,
+ and we commonly use STL data structures. This section describes the trade-offs
+ you should consider when you pick one.</p>
+
+<p>
+The first step is a choose your own adventure: do you want a sequential
+container, a set-like container, or a map-like container? The most important
+thing when choosing a container is the algorithmic properties of how you plan to
+access the container. Based on that, you should use:</p>
+
+<ul>
+<li>a <a href="#ds_map">map-like</a> container if you need efficient look-up
+ of an value based on another value. Map-like containers also support
+ efficient queries for containment (whether a key is in the map). Map-like
+ containers generally do not support efficient reverse mapping (values to
+ keys). If you need that, use two maps. Some map-like containers also
+ support efficient iteration through the keys in sorted order. Map-like
+ containers are the most expensive sort, only use them if you need one of
+ these capabilities.</li>
+
+<li>a <a href="#ds_set">set-like</a> container if you need to put a bunch of
+ stuff into a container that automatically eliminates duplicates. Some
+ set-like containers support efficient iteration through the elements in
+ sorted order. Set-like containers are more expensive than sequential
+ containers.
+</li>
+
+<li>a <a href="#ds_sequential">sequential</a> container provides
+ the most efficient way to add elements and keeps track of the order they are
+ added to the collection. They permit duplicates and support efficient
+ iteration, but do not support efficient look-up based on a key.
+</li>
+
+</ul>
+
+<p>
+Once the proper category of container is determined, you can fine tune the
+memory use, constant factors, and cache behaviors of access by intelligently
+picking a member of the category. Note that constant factors and cache behavior
+can be a big deal. If you have a vector that usually only contains a few
+elements (but could contain many), for example, it's much better to use
+<a href="#dss_smallvector">SmallVector</a> than <a href="#dss_vector">vector</a>
+. Doing so avoids (relatively) expensive malloc/free calls, which dwarf the
+cost of adding the elements to the container. </p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
+</div>
+
+<div class="doc_text">
+There are a variety of sequential containers available for you, based on your
+needs. Pick the first in this section that will do what you want.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_fixedarrays">Fixed Size Arrays</a>
+</div>
+
+<div class="doc_text">
+<p>Fixed size arrays are very simple and very fast. They are good if you know
+exactly how many elements you have, or you have a (low) upper bound on how many
+you have.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_heaparrays">Heap Allocated Arrays</a>
+</div>
+
+<div class="doc_text">
+<p>Heap allocated arrays (new[] + delete[]) are also simple. They are good if
+the number of elements is variable, if you know how many elements you will need
+before the array is allocated, and if the array is usually large (if not,
+consider a <a href="#dss_smallvector">SmallVector</a>). The cost of a heap
+allocated array is the cost of the new/delete (aka malloc/free). Also note that
+if you are allocating an array of a type with a constructor, the constructor and
+destructors will be run for every element in the array (re-sizable vectors only
+construct those elements actually used).</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_smallvector">"llvm/ADT/SmallVector.h"</a>
+</div>
+
+<div class="doc_text">
+<p><tt>SmallVector&lt;Type, N&gt;</tt> is a simple class that looks and smells
+just like <tt>vector&lt;Type&gt;</tt>:
+it supports efficient iteration, lays out elements in memory order (so you can
+do pointer arithmetic between elements), supports efficient push_back/pop_back
+operations, supports efficient random access to its elements, etc.</p>
+
+<p>The advantage of SmallVector is that it allocates space for
+some number of elements (N) <b>in the object itself</b>. Because of this, if
+the SmallVector is dynamically smaller than N, no malloc is performed. This can
+be a big win in cases where the malloc/free call is far more expensive than the
+code that fiddles around with the elements.</p>
+
+<p>This is good for vectors that are "usually small" (e.g. the number of
+predecessors/successors of a block is usually less than 8). On the other hand,
+this makes the size of the SmallVector itself large, so you don't want to
+allocate lots of them (doing so will waste a lot of space). As such,
+SmallVectors are most useful when on the stack.</p>
+
+<p>SmallVector also provides a nice portable and efficient replacement for
+<tt>alloca</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_vector">&lt;vector&gt;</a>
+</div>
+
+<div class="doc_text">
+<p>
+std::vector is well loved and respected. It is useful when SmallVector isn't:
+when the size of the vector is often large (thus the small optimization will
+rarely be a benefit) or if you will be allocating many instances of the vector
+itself (which would waste space for elements that aren't in the container).
+vector is also useful when interfacing with code that expects vectors :).
+</p>
+
+<p>One worthwhile note about std::vector: avoid code like this:</p>
+
+<div class="doc_code">
+<pre>
+for ( ... ) {
+ std::vector&lt;foo&gt; V;
+ use V;
+}
+</pre>
+</div>
+
+<p>Instead, write this as:</p>
+
+<div class="doc_code">
+<pre>
+std::vector&lt;foo&gt; V;
+for ( ... ) {
+ use V;
+ V.clear();
+}
+</pre>
+</div>
+
+<p>Doing so will save (at least) one heap allocation and free per iteration of
+the loop.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_deque">&lt;deque&gt;</a>
+</div>
+
+<div class="doc_text">
+<p>std::deque is, in some senses, a generalized version of std::vector. Like
+std::vector, it provides constant time random access and other similar
+properties, but it also provides efficient access to the front of the list. It
+does not guarantee continuity of elements within memory.</p>
+
+<p>In exchange for this extra flexibility, std::deque has significantly higher
+constant factor costs than std::vector. If possible, use std::vector or
+something cheaper.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_list">&lt;list&gt;</a>
+</div>
+
+<div class="doc_text">
+<p>std::list is an extremely inefficient class that is rarely useful.
+It performs a heap allocation for every element inserted into it, thus having an
+extremely high constant factor, particularly for small data types. std::list
+also only supports bidirectional iteration, not random access iteration.</p>
+
+<p>In exchange for this high cost, std::list supports efficient access to both
+ends of the list (like std::deque, but unlike std::vector or SmallVector). In
+addition, the iterator invalidation characteristics of std::list are stronger
+than that of a vector class: inserting or removing an element into the list does
+not invalidate iterator or pointers to other elements in the list.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_ilist">llvm/ADT/ilist</a>
+</div>
+
+<div class="doc_text">
+<p><tt>ilist&lt;T&gt;</tt> implements an 'intrusive' doubly-linked list. It is
+intrusive, because it requires the element to store and provide access to the
+prev/next pointers for the list.</p>
+
+<p>ilist has the same drawbacks as std::list, and additionally requires an
+ilist_traits implementation for the element type, but it provides some novel
+characteristics. In particular, it can efficiently store polymorphic objects,
+the traits class is informed when an element is inserted or removed from the
+list, and ilists are guaranteed to support a constant-time splice operation.
+</p>
+
+<p>These properties are exactly what we want for things like Instructions and
+basic blocks, which is why these are implemented with ilists.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_other">Other Sequential Container options</a>
+</div>
+
+<div class="doc_text">
+<p>Other STL containers are available, such as std::string.</p>
+
+<p>There are also various STL adapter classes such as std::queue,
+std::priority_queue, std::stack, etc. These provide simplified access to an
+underlying container but don't affect the cost of the container itself.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
+</div>
+
+<div class="doc_text">
+
+<p>Set-like containers are useful when you need to canonicalize multiple values
+into a single representation. There are several different choices for how to do
+this, providing various trade-offs.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_sortedvectorset">A sorted 'vector'</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you intend to insert a lot of elements, then do a lot of queries, a
+great approach is to use a vector (or other sequential container) with
+std::sort+std::unique to remove duplicates. This approach works really well if
+your usage pattern has these two distinct phases (insert then query), and can be
+coupled with a good choice of <a href="#ds_sequential">sequential container</a>.
+</p>
+
+<p>
+This combination provides the several nice properties: the result data is
+contiguous in memory (good for cache locality), has few allocations, is easy to
+address (iterators in the final vector are just indices or pointers), and can be
+efficiently queried with a standard binary or radix search.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_smallset">"llvm/ADT/SmallSet.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you have a set-like data structure that is usually small and whose elements
+are reasonably small, a <tt>SmallSet&lt;Type, N&gt;</tt> is a good choice. This set
+has space for N elements in place (thus, if the set is dynamically smaller than
+N, no malloc traffic is required) and accesses them with a simple linear search.
+When the set grows beyond 'N' elements, it allocates a more expensive representation that
+guarantees efficient access (for most types, it falls back to std::set, but for
+pointers it uses something far better, <a
+href="#dss_smallptrset">SmallPtrSet</a>).</p>
+
+<p>The magic of this class is that it handles small sets extremely efficiently,
+but gracefully handles extremely large sets without loss of efficiency. The
+drawback is that the interface is quite small: it supports insertion, queries
+and erasing, but does not support iteration.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>SmallPtrSet has all the advantages of SmallSet (and a SmallSet of pointers is
+transparently implemented with a SmallPtrSet), but also supports iterators. If
+more than 'N' insertions are performed, a single quadratically
+probed hash table is allocated and grows as needed, providing extremely
+efficient access (constant time insertion/deleting/queries with low constant
+factors) and is very stingy with malloc traffic.</p>
+
+<p>Note that, unlike std::set, the iterators of SmallPtrSet are invalidated
+whenever an insertion occurs. Also, the values visited by the iterators are not
+visited in sorted order.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+FoldingSet is an aggregate class that is really good at uniquing
+expensive-to-create or polymorphic objects. It is a combination of a chained
+hash table with intrusive links (uniqued objects are required to inherit from
+FoldingSetNode) that uses <a href="#dss_smallvector">SmallVector</a> as part of
+its ID process.</p>
+
+<p>Consider a case where you want to implement a "getOrCreateFoo" method for
+a complex object (for example, a node in the code generator). The client has a
+description of *what* it wants to generate (it knows the opcode and all the
+operands), but we don't want to 'new' a node, then try inserting it into a set
+only to find out it already exists, at which point we would have to delete it
+and return the node that already exists.
+</p>
+
+<p>To support this style of client, FoldingSet perform a query with a
+FoldingSetNodeID (which wraps SmallVector) that can be used to describe the
+element that we want to query for. The query either returns the element
+matching the ID or it returns an opaque ID that indicates where insertion should
+take place. Construction of the ID usually does not require heap traffic.</p>
+
+<p>Because FoldingSet uses intrusive links, it can support polymorphic objects
+in the set (for example, you can have SDNode instances mixed with LoadSDNodes).
+Because the elements are individually allocated, pointers to the elements are
+stable: inserting or removing elements does not invalidate any pointers to other
+elements.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_set">&lt;set&gt;</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>std::set</tt> is a reasonable all-around set class, which is decent at
+many things but great at nothing. std::set allocates memory for each element
+inserted (thus it is very malloc intensive) and typically stores three pointers
+per element in the set (thus adding a large amount of per-element space
+overhead). It offers guaranteed log(n) performance, which is not particularly
+fast from a complexity standpoint (particularly if the elements of the set are
+expensive to compare, like strings), and has extremely high constant factors for
+lookup, insertion and removal.</p>
+
+<p>The advantages of std::set are that its iterators are stable (deleting or
+inserting an element from the set does not affect iterators or pointers to other
+elements) and that iteration over the set is guaranteed to be in sorted order.
+If the elements in the set are large, then the relative overhead of the pointers
+and malloc traffic is not a big deal, but if the elements of the set are small,
+std::set is almost never a good choice.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_setvector">"llvm/ADT/SetVector.h"</a>
+</div>
+
+<div class="doc_text">
+<p>LLVM's SetVector&lt;Type&gt; is an adapter class that combines your choice of
+a set-like container along with a <a href="#ds_sequential">Sequential
+Container</a>. The important property
+that this provides is efficient insertion with uniquing (duplicate elements are
+ignored) with iteration support. It implements this by inserting elements into
+both a set-like container and the sequential container, using the set-like
+container for uniquing and the sequential container for iteration.
+</p>
+
+<p>The difference between SetVector and other sets is that the order of
+iteration is guaranteed to match the order of insertion into the SetVector.
+This property is really important for things like sets of pointers. Because
+pointer values are non-deterministic (e.g. vary across runs of the program on
+different machines), iterating over the pointers in the set will
+not be in a well-defined order.</p>
+
+<p>
+The drawback of SetVector is that it requires twice as much space as a normal
+set and has the sum of constant factors from the set-like container and the
+sequential container that it uses. Use it *only* if you need to iterate over
+the elements in a deterministic order. SetVector is also expensive to delete
+elements out of (linear time), unless you use it's "pop_back" method, which is
+faster.
+</p>
+
+<p>SetVector is an adapter class that defaults to using std::vector and std::set
+for the underlying containers, so it is quite expensive. However,
+<tt>"llvm/ADT/SetVector.h"</tt> also provides a SmallSetVector class, which
+defaults to using a SmallVector and SmallSet of a specified size. If you use
+this, and if your sets are dynamically smaller than N, you will save a lot of
+heap traffic.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_uniquevector">"llvm/ADT/UniqueVector.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+UniqueVector is similar to <a href="#dss_setvector">SetVector</a>, but it
+retains a unique ID for each element inserted into the set. It internally
+contains a map and a vector, and it assigns a unique ID for each value inserted
+into the set.</p>
+
+<p>UniqueVector is very expensive: its cost is the sum of the cost of
+maintaining both the map and vector, it has high complexity, high constant
+factors, and produces a lot of malloc traffic. It should be avoided.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_otherset">Other Set-Like Container Options</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The STL provides several other options, such as std::multiset and the various
+"hash_set" like containers (whether from C++ TR1 or from the SGI library).</p>
+
+<p>std::multiset is useful if you're not interested in elimination of
+duplicates, but has all the drawbacks of std::set. A sorted vector (where you
+don't delete duplicate entries) or some other approach is almost always
+better.</p>
+
+<p>The various hash_set implementations (exposed portably by
+"llvm/ADT/hash_set") is a simple chained hashtable. This algorithm is as malloc
+intensive as std::set (performing an allocation for each element inserted,
+thus having really high constant factors) but (usually) provides O(1)
+insertion/deletion of elements. This can be useful if your elements are large
+(thus making the constant-factor cost relatively low) or if comparisons are
+expensive. Element iteration does not visit elements in a useful order.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
+</div>
+
+<div class="doc_text">
+Map-like containers are useful when you want to associate data to a key. As
+usual, there are a lot of different ways to do this. :)
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_sortedvectormap">A sorted 'vector'</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+If your usage pattern follows a strict insert-then-query approach, you can
+trivially use the same approach as <a href="#dss_sortedvectorset">sorted vectors
+for set-like containers</a>. The only difference is that your query function
+(which uses std::lower_bound to get efficient log(n) lookup) should only compare
+the key, not both the key and value. This yields the same advantages as sorted
+vectors for sets.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_stringmap">"llvm/ADT/StringMap.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+Strings are commonly used as keys in maps, and they are difficult to support
+efficiently: they are variable length, inefficient to hash and compare when
+long, expensive to copy, etc. StringMap is a specialized container designed to
+cope with these issues. It supports mapping an arbitrary range of bytes to an
+arbitrary other object.</p>
+
+<p>The StringMap implementation uses a quadratically-probed hash table, where
+the buckets store a pointer to the heap allocated entries (and some other
+stuff). The entries in the map must be heap allocated because the strings are
+variable length. The string data (key) and the element object (value) are
+stored in the same allocation with the string data immediately after the element
+object. This container guarantees the "<tt>(char*)(&amp;Value+1)</tt>" points
+to the key string for a value.</p>
+
+<p>The StringMap is very fast for several reasons: quadratic probing is very
+cache efficient for lookups, the hash value of strings in buckets is not
+recomputed when lookup up an element, StringMap rarely has to touch the
+memory for unrelated objects when looking up a value (even when hash collisions
+happen), hash table growth does not recompute the hash values for strings
+already in the table, and each pair in the map is store in a single allocation
+(the string data is stored in the same allocation as the Value of a pair).</p>
+
+<p>StringMap also provides query methods that take byte ranges, so it only ever
+copies a string if a value is inserted into the table.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_indexedmap">"llvm/ADT/IndexedMap.h"</a>
+</div>
+
+<div class="doc_text">
+<p>
+IndexedMap is a specialized container for mapping small dense integers (or
+values that can be mapped to small dense integers) to some other type. It is
+internally implemented as a vector with a mapping function that maps the keys to
+the dense integer range.
+</p>
+
+<p>
+This is useful for cases like virtual registers in the LLVM code generator: they
+have a dense mapping that is offset by a compile-time constant (the first
+virtual register ID).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_densemap">"llvm/ADT/DenseMap.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+DenseMap is a simple quadratically probed hash table. It excels at supporting
+small keys and values: it uses a single allocation to hold all of the pairs that
+are currently inserted in the map. DenseMap is a great way to map pointers to
+pointers, or map other small types to each other.
+</p>
+
+<p>
+There are several aspects of DenseMap that you should be aware of, however. The
+iterators in a densemap are invalidated whenever an insertion occurs, unlike
+map. Also, because DenseMap allocates space for a large number of key/value
+pairs (it starts with 64 by default), it will waste a lot of space if your keys
+or values are large. Finally, you must implement a partial specialization of
+DenseMapKeyInfo for the key that you want, if it isn't already supported. This
+is required to tell DenseMap about two special marker values (which can never be
+inserted into the map) that it needs internally.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_map">&lt;map&gt;</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+std::map has similar characteristics to <a href="#dss_set">std::set</a>: it uses
+a single allocation per pair inserted into the map, it offers log(n) lookup with
+an extremely large constant factor, imposes a space penalty of 3 pointers per
+pair in the map, etc.</p>
+
+<p>std::map is most useful when your keys or values are very large, if you need
+to iterate over the collection in sorted order, or if you need stable iterators
+into the map (i.e. they don't get invalidated if an insertion or deletion of
+another element takes place).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="dss_othermap">Other Map-Like Container Options</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The STL provides several other options, such as std::multimap and the various
+"hash_map" like containers (whether from C++ TR1 or from the SGI library).</p>
+
+<p>std::multimap is useful if you want to map a key to multiple values, but has
+all the drawbacks of std::map. A sorted vector or some other approach is almost
+always better.</p>
+
+<p>The various hash_map implementations (exposed portably by
+"llvm/ADT/hash_map") are simple chained hash tables. This algorithm is as
+malloc intensive as std::map (performing an allocation for each element
+inserted, thus having really high constant factors) but (usually) provides O(1)
+insertion/deletion of elements. This can be useful if your elements are large
+(thus making the constant-factor cost relatively low) or if comparisons are
+expensive. Element iteration does not visit elements in a useful order.</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="common">Helpful Hints for Common Operations</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section describes how to perform some very simple transformations of
+LLVM code. This is meant to give examples of common idioms used, showing the
+practical side of LLVM transformations. <p> Because this is a "how-to" section,
+you should also read about the main classes that you will be working with. The
+<a href="#coreclasses">Core LLVM Class Hierarchy Reference</a> contains details
+and descriptions of the main classes that you should know about.</p>
+
+</div>
+
+<!-- NOTE: this section should be heavy on example code -->
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="inspection">Basic Inspection and Traversal Routines</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM compiler infrastructure have many different data structures that may
+be traversed. Following the example of the C++ standard template library, the
+techniques used to traverse these various data structures are all basically the
+same. For a enumerable sequence of values, the <tt>XXXbegin()</tt> function (or
+method) returns an iterator to the start of the sequence, the <tt>XXXend()</tt>
+function returns an iterator pointing to one past the last valid element of the
+sequence, and there is some <tt>XXXiterator</tt> data type that is common
+between the two operations.</p>
+
+<p>Because the pattern for iteration is common across many different aspects of
+the program representation, the standard template library algorithms may be used
+on them, and it is easier to remember how to iterate. First we show a few common
+examples of the data structures that need to be traversed. Other data
+structures are traversed in very similar ways.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="iterate_function">Iterating over the </a><a
+ href="#BasicBlock"><tt>BasicBlock</tt></a>s in a <a
+ href="#Function"><tt>Function</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>It's quite common to have a <tt>Function</tt> instance that you'd like to
+transform in some way; in particular, you'd like to manipulate its
+<tt>BasicBlock</tt>s. To facilitate this, you'll need to iterate over all of
+the <tt>BasicBlock</tt>s that constitute the <tt>Function</tt>. The following is
+an example that prints the name of a <tt>BasicBlock</tt> and the number of
+<tt>Instruction</tt>s it contains:</p>
+
+<div class="doc_code">
+<pre>
+// <i>func is a pointer to a Function instance</i>
+for (Function::iterator i = func-&gt;begin(), e = func-&gt;end(); i != e; ++i)
+ // <i>Print out the name of the basic block if it has one, and then the</i>
+ // <i>number of instructions that it contains</i>
+ llvm::cerr &lt;&lt; "Basic block (name=" &lt;&lt; i-&gt;getName() &lt;&lt; ") has "
+ &lt;&lt; i-&gt;size() &lt;&lt; " instructions.\n";
+</pre>
+</div>
+
+<p>Note that i can be used as if it were a pointer for the purposes of
+invoking member functions of the <tt>Instruction</tt> class. This is
+because the indirection operator is overloaded for the iterator
+classes. In the above code, the expression <tt>i-&gt;size()</tt> is
+exactly equivalent to <tt>(*i).size()</tt> just like you'd expect.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="iterate_basicblock">Iterating over the </a><a
+ href="#Instruction"><tt>Instruction</tt></a>s in a <a
+ href="#BasicBlock"><tt>BasicBlock</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>Just like when dealing with <tt>BasicBlock</tt>s in <tt>Function</tt>s, it's
+easy to iterate over the individual instructions that make up
+<tt>BasicBlock</tt>s. Here's a code snippet that prints out each instruction in
+a <tt>BasicBlock</tt>:</p>
+
+<div class="doc_code">
+<pre>
+// <i>blk is a pointer to a BasicBlock instance</i>
+for (BasicBlock::iterator i = blk-&gt;begin(), e = blk-&gt;end(); i != e; ++i)
+ // <i>The next statement works since operator&lt;&lt;(ostream&amp;,...)</i>
+ // <i>is overloaded for Instruction&amp;</i>
+ llvm::cerr &lt;&lt; *i &lt;&lt; "\n";
+</pre>
+</div>
+
+<p>However, this isn't really the best way to print out the contents of a
+<tt>BasicBlock</tt>! Since the ostream operators are overloaded for virtually
+anything you'll care about, you could have just invoked the print routine on the
+basic block itself: <tt>llvm::cerr &lt;&lt; *blk &lt;&lt; "\n";</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="iterate_institer">Iterating over the </a><a
+ href="#Instruction"><tt>Instruction</tt></a>s in a <a
+ href="#Function"><tt>Function</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>If you're finding that you commonly iterate over a <tt>Function</tt>'s
+<tt>BasicBlock</tt>s and then that <tt>BasicBlock</tt>'s <tt>Instruction</tt>s,
+<tt>InstIterator</tt> should be used instead. You'll need to include <a
+href="/doxygen/InstIterator_8h-source.html"><tt>llvm/Support/InstIterator.h</tt></a>,
+and then instantiate <tt>InstIterator</tt>s explicitly in your code. Here's a
+small example that shows how to dump all instructions in a function to the standard error stream:<p>
+
+<div class="doc_code">
+<pre>
+#include "<a href="/doxygen/InstIterator_8h-source.html">llvm/Support/InstIterator.h</a>"
+
+// <i>F is a pointer to a Function instance</i>
+for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
+ llvm::cerr &lt;&lt; *i &lt;&lt; "\n";
+</pre>
+</div>
+
+<p>Easy, isn't it? You can also use <tt>InstIterator</tt>s to fill a
+work list with its initial contents. For example, if you wanted to
+initialize a work list to contain all instructions in a <tt>Function</tt>
+F, all you would need to do is something like:</p>
+
+<div class="doc_code">
+<pre>
+std::set&lt;Instruction*&gt; worklist;
+worklist.insert(inst_begin(F), inst_end(F));
+</pre>
+</div>
+
+<p>The STL set <tt>worklist</tt> would now contain all instructions in the
+<tt>Function</tt> pointed to by F.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="iterate_convert">Turning an iterator into a class pointer (and
+ vice-versa)</a>
+</div>
+
+<div class="doc_text">
+
+<p>Sometimes, it'll be useful to grab a reference (or pointer) to a class
+instance when all you've got at hand is an iterator. Well, extracting
+a reference or a pointer from an iterator is very straight-forward.
+Assuming that <tt>i</tt> is a <tt>BasicBlock::iterator</tt> and <tt>j</tt>
+is a <tt>BasicBlock::const_iterator</tt>:</p>
+
+<div class="doc_code">
+<pre>
+Instruction&amp; inst = *i; // <i>Grab reference to instruction reference</i>
+Instruction* pinst = &amp;*i; // <i>Grab pointer to instruction reference</i>
+const Instruction&amp; inst = *j;
+</pre>
+</div>
+
+<p>However, the iterators you'll be working with in the LLVM framework are
+special: they will automatically convert to a ptr-to-instance type whenever they
+need to. Instead of dereferencing the iterator and then taking the address of
+the result, you can simply assign the iterator to the proper pointer type and
+you get the dereference and address-of operation as a result of the assignment
+(behind the scenes, this is a result of overloading casting mechanisms). Thus
+the last line of the last example,</p>
+
+<div class="doc_code">
+<pre>
+Instruction* pinst = &amp;*i;
+</pre>
+</div>
+
+<p>is semantically equivalent to</p>
+
+<div class="doc_code">
+<pre>
+Instruction* pinst = i;
+</pre>
+</div>
+
+<p>It's also possible to turn a class pointer into the corresponding iterator,
+and this is a constant time operation (very efficient). The following code
+snippet illustrates use of the conversion constructors provided by LLVM
+iterators. By using these, you can explicitly grab the iterator of something
+without actually obtaining it via iteration over some structure:</p>
+
+<div class="doc_code">
+<pre>
+void printNextInstruction(Instruction* inst) {
+ BasicBlock::iterator it(inst);
+ ++it; // <i>After this line, it refers to the instruction after *inst</i>
+ if (it != inst-&gt;getParent()-&gt;end()) llvm::cerr &lt;&lt; *it &lt;&lt; "\n";
+}
+</pre>
+</div>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+ <a name="iterate_complex">Finding call sites: a slightly more complex
+ example</a>
+</div>
+
+<div class="doc_text">
+
+<p>Say that you're writing a FunctionPass and would like to count all the
+locations in the entire module (that is, across every <tt>Function</tt>) where a
+certain function (i.e., some <tt>Function</tt>*) is already in scope. As you'll
+learn later, you may want to use an <tt>InstVisitor</tt> to accomplish this in a
+much more straight-forward manner, but this example will allow us to explore how
+you'd do it if you didn't have <tt>InstVisitor</tt> around. In pseudo-code, this
+is what we want to do:</p>
+
+<div class="doc_code">
+<pre>
+initialize callCounter to zero
+for each Function f in the Module
+ for each BasicBlock b in f
+ for each Instruction i in b
+ if (i is a CallInst and calls the given function)
+ increment callCounter
+</pre>
+</div>
+
+<p>And the actual code is (remember, because we're writing a
+<tt>FunctionPass</tt>, our <tt>FunctionPass</tt>-derived class simply has to
+override the <tt>runOnFunction</tt> method):</p>
+
+<div class="doc_code">
+<pre>
+Function* targetFunc = ...;
+
+class OurFunctionPass : public FunctionPass {
+ public:
+ OurFunctionPass(): callCounter(0) { }
+
+ virtual runOnFunction(Function&amp; F) {
+ for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
+ for (BasicBlock::iterator i = b-&gt;begin(); ie = b-&gt;end(); i != ie; ++i) {
+ if (<a href="#CallInst">CallInst</a>* callInst = <a href="#isa">dyn_cast</a>&lt;<a
+ href="#CallInst">CallInst</a>&gt;(&amp;*i)) {
+ // <i>We know we've encountered a call instruction, so we</i>
+ // <i>need to determine if it's a call to the</i>
+ // <i>function pointed to by m_func or not</i>
+
+ if (callInst-&gt;getCalledFunction() == targetFunc)
+ ++callCounter;
+ }
+ }
+ }
+ }
+
+ private:
+ unsigned callCounter;
+};
+</pre>
+</div>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+ <a name="calls_and_invokes">Treating calls and invokes the same way</a>
+</div>
+
+<div class="doc_text">
+
+<p>You may have noticed that the previous example was a bit oversimplified in
+that it did not deal with call sites generated by 'invoke' instructions. In
+this, and in other situations, you may find that you want to treat
+<tt>CallInst</tt>s and <tt>InvokeInst</tt>s the same way, even though their
+most-specific common base class is <tt>Instruction</tt>, which includes lots of
+less closely-related things. For these cases, LLVM provides a handy wrapper
+class called <a
+href="http://llvm.org/doxygen/classllvm_1_1CallSite.html"><tt>CallSite</tt></a>.
+It is essentially a wrapper around an <tt>Instruction</tt> pointer, with some
+methods that provide functionality common to <tt>CallInst</tt>s and
+<tt>InvokeInst</tt>s.</p>
+
+<p>This class has "value semantics": it should be passed by value, not by
+reference and it should not be dynamically allocated or deallocated using
+<tt>operator new</tt> or <tt>operator delete</tt>. It is efficiently copyable,
+assignable and constructable, with costs equivalents to that of a bare pointer.
+If you look at its definition, it has only a single pointer member.</p>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+ <a name="iterate_chains">Iterating over def-use &amp; use-def chains</a>
+</div>
+
+<div class="doc_text">
+
+<p>Frequently, we might have an instance of the <a
+href="/doxygen/classllvm_1_1Value.html">Value Class</a> and we want to
+determine which <tt>User</tt>s use the <tt>Value</tt>. The list of all
+<tt>User</tt>s of a particular <tt>Value</tt> is called a <i>def-use</i> chain.
+For example, let's say we have a <tt>Function*</tt> named <tt>F</tt> to a
+particular function <tt>foo</tt>. Finding all of the instructions that
+<i>use</i> <tt>foo</tt> is as simple as iterating over the <i>def-use</i> chain
+of <tt>F</tt>:</p>
+
+<div class="doc_code">
+<pre>
+Function* F = ...;
+
+for (Value::use_iterator i = F-&gt;use_begin(), e = F-&gt;use_end(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast&lt;Instruction&gt;(*i)) {
+ llvm::cerr &lt;&lt; "F is used in instruction:\n";
+ llvm::cerr &lt;&lt; *Inst &lt;&lt; "\n";
+ }
+</pre>
+</div>
+
+<p>Alternately, it's common to have an instance of the <a
+href="/doxygen/classllvm_1_1User.html">User Class</a> and need to know what
+<tt>Value</tt>s are used by it. The list of all <tt>Value</tt>s used by a
+<tt>User</tt> is known as a <i>use-def</i> chain. Instances of class
+<tt>Instruction</tt> are common <tt>User</tt>s, so we might want to iterate over
+all of the values that a particular instruction uses (that is, the operands of
+the particular <tt>Instruction</tt>):</p>
+
+<div class="doc_code">
+<pre>
+Instruction* pi = ...;
+
+for (User::op_iterator i = pi-&gt;op_begin(), e = pi-&gt;op_end(); i != e; ++i) {
+ Value* v = *i;
+ // <i>...</i>
+}
+</pre>
+</div>
+
+<!--
+ def-use chains ("finding all users of"): Value::use_begin/use_end
+ use-def chains ("finding all values used"): User::op_begin/op_end [op=operand]
+-->
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="simplechanges">Making simple changes</a>
+</div>
+
+<div class="doc_text">
+
+<p>There are some primitive transformation operations present in the LLVM
+infrastructure that are worth knowing about. When performing
+transformations, it's fairly common to manipulate the contents of basic
+blocks. This section describes some of the common methods for doing so
+and gives example code.</p>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+ <a name="schanges_creating">Creating and inserting new
+ <tt>Instruction</tt>s</a>
+</div>
+
+<div class="doc_text">
+
+<p><i>Instantiating Instructions</i></p>
+
+<p>Creation of <tt>Instruction</tt>s is straight-forward: simply call the
+constructor for the kind of instruction to instantiate and provide the necessary
+parameters. For example, an <tt>AllocaInst</tt> only <i>requires</i> a
+(const-ptr-to) <tt>Type</tt>. Thus:</p>
+
+<div class="doc_code">
+<pre>
+AllocaInst* ai = new AllocaInst(Type::IntTy);
+</pre>
+</div>
+
+<p>will create an <tt>AllocaInst</tt> instance that represents the allocation of
+one integer in the current stack frame, at run time. Each <tt>Instruction</tt>
+subclass is likely to have varying default parameters which change the semantics
+of the instruction, so refer to the <a
+href="/doxygen/classllvm_1_1Instruction.html">doxygen documentation for the subclass of
+Instruction</a> that you're interested in instantiating.</p>
+
+<p><i>Naming values</i></p>
+
+<p>It is very useful to name the values of instructions when you're able to, as
+this facilitates the debugging of your transformations. If you end up looking
+at generated LLVM machine code, you definitely want to have logical names
+associated with the results of instructions! By supplying a value for the
+<tt>Name</tt> (default) parameter of the <tt>Instruction</tt> constructor, you
+associate a logical name with the result of the instruction's execution at
+run time. For example, say that I'm writing a transformation that dynamically
+allocates space for an integer on the stack, and that integer is going to be
+used as some kind of index by some other code. To accomplish this, I place an
+<tt>AllocaInst</tt> at the first point in the first <tt>BasicBlock</tt> of some
+<tt>Function</tt>, and I'm intending to use it within the same
+<tt>Function</tt>. I might do:</p>
+
+<div class="doc_code">
+<pre>
+AllocaInst* pa = new AllocaInst(Type::IntTy, 0, "indexLoc");
+</pre>
+</div>
+
+<p>where <tt>indexLoc</tt> is now the logical name of the instruction's
+execution value, which is a pointer to an integer on the run time stack.</p>
+
+<p><i>Inserting instructions</i></p>
+
+<p>There are essentially two ways to insert an <tt>Instruction</tt>
+into an existing sequence of instructions that form a <tt>BasicBlock</tt>:</p>
+
+<ul>
+ <li>Insertion into an explicit instruction list
+
+ <p>Given a <tt>BasicBlock* pb</tt>, an <tt>Instruction* pi</tt> within that
+ <tt>BasicBlock</tt>, and a newly-created instruction we wish to insert
+ before <tt>*pi</tt>, we do the following: </p>
+
+<div class="doc_code">
+<pre>
+BasicBlock *pb = ...;
+Instruction *pi = ...;
+Instruction *newInst = new Instruction(...);
+
+pb-&gt;getInstList().insert(pi, newInst); // <i>Inserts newInst before pi in pb</i>
+</pre>
+</div>
+
+ <p>Appending to the end of a <tt>BasicBlock</tt> is so common that
+ the <tt>Instruction</tt> class and <tt>Instruction</tt>-derived
+ classes provide constructors which take a pointer to a
+ <tt>BasicBlock</tt> to be appended to. For example code that
+ looked like: </p>
+
+<div class="doc_code">
+<pre>
+BasicBlock *pb = ...;
+Instruction *newInst = new Instruction(...);
+
+pb-&gt;getInstList().push_back(newInst); // <i>Appends newInst to pb</i>
+</pre>
+</div>
+
+ <p>becomes: </p>
+
+<div class="doc_code">
+<pre>
+BasicBlock *pb = ...;
+Instruction *newInst = new Instruction(..., pb);
+</pre>
+</div>
+
+ <p>which is much cleaner, especially if you are creating
+ long instruction streams.</p></li>
+
+ <li>Insertion into an implicit instruction list
+
+ <p><tt>Instruction</tt> instances that are already in <tt>BasicBlock</tt>s
+ are implicitly associated with an existing instruction list: the instruction
+ list of the enclosing basic block. Thus, we could have accomplished the same
+ thing as the above code without being given a <tt>BasicBlock</tt> by doing:
+ </p>
+
+<div class="doc_code">
+<pre>
+Instruction *pi = ...;
+Instruction *newInst = new Instruction(...);
+
+pi-&gt;getParent()-&gt;getInstList().insert(pi, newInst);
+</pre>
+</div>
+
+ <p>In fact, this sequence of steps occurs so frequently that the
+ <tt>Instruction</tt> class and <tt>Instruction</tt>-derived classes provide
+ constructors which take (as a default parameter) a pointer to an
+ <tt>Instruction</tt> which the newly-created <tt>Instruction</tt> should
+ precede. That is, <tt>Instruction</tt> constructors are capable of
+ inserting the newly-created instance into the <tt>BasicBlock</tt> of a
+ provided instruction, immediately before that instruction. Using an
+ <tt>Instruction</tt> constructor with a <tt>insertBefore</tt> (default)
+ parameter, the above code becomes:</p>
+
+<div class="doc_code">
+<pre>
+Instruction* pi = ...;
+Instruction* newInst = new Instruction(..., pi);
+</pre>
+</div>
+
+ <p>which is much cleaner, especially if you're creating a lot of
+ instructions and adding them to <tt>BasicBlock</tt>s.</p></li>
+</ul>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+ <a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a>
+</div>
+
+<div class="doc_text">
+
+<p>Deleting an instruction from an existing sequence of instructions that form a
+<a href="#BasicBlock"><tt>BasicBlock</tt></a> is very straight-forward. First,
+you must have a pointer to the instruction that you wish to delete. Second, you
+need to obtain the pointer to that instruction's basic block. You use the
+pointer to the basic block to get its list of instructions and then use the
+erase function to remove your instruction. For example:</p>
+
+<div class="doc_code">
+<pre>
+<a href="#Instruction">Instruction</a> *I = .. ;
+<a href="#BasicBlock">BasicBlock</a> *BB = I-&gt;getParent();
+
+BB-&gt;getInstList().erase(I);
+</pre>
+</div>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+ <a name="schanges_replacing">Replacing an <tt>Instruction</tt> with another
+ <tt>Value</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p><i>Replacing individual instructions</i></p>
+
+<p>Including "<a href="/doxygen/BasicBlockUtils_8h-source.html">llvm/Transforms/Utils/BasicBlockUtils.h</a>"
+permits use of two very useful replace functions: <tt>ReplaceInstWithValue</tt>
+and <tt>ReplaceInstWithInst</tt>.</p>
+
+<h4><a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a></h4>
+
+<ul>
+ <li><tt>ReplaceInstWithValue</tt>
+
+ <p>This function replaces all uses (within a basic block) of a given
+ instruction with a value, and then removes the original instruction. The
+ following example illustrates the replacement of the result of a particular
+ <tt>AllocaInst</tt> that allocates memory for a single integer with a null
+ pointer to an integer.</p>
+
+<div class="doc_code">
+<pre>
+AllocaInst* instToReplace = ...;
+BasicBlock::iterator ii(instToReplace);
+
+ReplaceInstWithValue(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
+ Constant::getNullValue(PointerType::get(Type::IntTy)));
+</pre></div></li>
+
+ <li><tt>ReplaceInstWithInst</tt>
+
+ <p>This function replaces a particular instruction with another
+ instruction. The following example illustrates the replacement of one
+ <tt>AllocaInst</tt> with another.</p>
+
+<div class="doc_code">
+<pre>
+AllocaInst* instToReplace = ...;
+BasicBlock::iterator ii(instToReplace);
+
+ReplaceInstWithInst(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
+ new AllocaInst(Type::IntTy, 0, "ptrToReplacedInt"));
+</pre></div></li>
+</ul>
+
+<p><i>Replacing multiple uses of <tt>User</tt>s and <tt>Value</tt>s</i></p>
+
+<p>You can use <tt>Value::replaceAllUsesWith</tt> and
+<tt>User::replaceUsesOfWith</tt> to change more than one use at a time. See the
+doxygen documentation for the <a href="/doxygen/classllvm_1_1Value.html">Value Class</a>
+and <a href="/doxygen/classllvm_1_1User.html">User Class</a>, respectively, for more
+information.</p>
+
+<!-- Value::replaceAllUsesWith User::replaceUsesOfWith Point out:
+include/llvm/Transforms/Utils/ especially BasicBlockUtils.h with:
+ReplaceInstWithValue, ReplaceInstWithInst -->
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+ <a name="schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a>
+</div>
+
+<div class="doc_text">
+
+<p>Deleting a global variable from a module is just as easy as deleting an
+Instruction. First, you must have a pointer to the global variable that you wish
+ to delete. You use this pointer to erase it from its parent, the module.
+ For example:</p>
+
+<div class="doc_code">
+<pre>
+<a href="#GlobalVariable">GlobalVariable</a> *GV = .. ;
+
+GV-&gt;eraseFromParent();
+</pre>
+</div>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="advanced">Advanced Topics</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+This section describes some of the advanced or obscure API's that most clients
+do not need to be aware of. These API's tend manage the inner workings of the
+LLVM system, and only need to be accessed in unusual circumstances.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="TypeResolve">LLVM Type Resolution</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The LLVM type system has a very simple goal: allow clients to compare types for
+structural equality with a simple pointer comparison (aka a shallow compare).
+This goal makes clients much simpler and faster, and is used throughout the LLVM
+system.
+</p>
+
+<p>
+Unfortunately achieving this goal is not a simple matter. In particular,
+recursive types and late resolution of opaque types makes the situation very
+difficult to handle. Fortunately, for the most part, our implementation makes
+most clients able to be completely unaware of the nasty internal details. The
+primary case where clients are exposed to the inner workings of it are when
+building a recursive type. In addition to this case, the LLVM bitcode reader,
+assembly parser, and linker also have to be aware of the inner workings of this
+system.
+</p>
+
+<p>
+For our purposes below, we need three concepts. First, an "Opaque Type" is
+exactly as defined in the <a href="LangRef.html#t_opaque">language
+reference</a>. Second an "Abstract Type" is any type which includes an
+opaque type as part of its type graph (for example "<tt>{ opaque, i32 }</tt>").
+Third, a concrete type is a type that is not an abstract type (e.g. "<tt>{ i32,
+float }</tt>").
+</p>
+
+</div>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="BuildRecType">Basic Recursive Type Construction</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+Because the most common question is "how do I build a recursive type with LLVM",
+we answer it now and explain it as we go. Here we include enough to cause this
+to be emitted to an output .ll file:
+</p>
+
+<div class="doc_code">
+<pre>
+%mylist = type { %mylist*, i32 }
+</pre>
+</div>
+
+<p>
+To build this, use the following LLVM APIs:
+</p>
+
+<div class="doc_code">
+<pre>
+// <i>Create the initial outer struct</i>
+<a href="#PATypeHolder">PATypeHolder</a> StructTy = OpaqueType::get();
+std::vector&lt;const Type*&gt; Elts;
+Elts.push_back(PointerType::get(StructTy));
+Elts.push_back(Type::IntTy);
+StructType *NewSTy = StructType::get(Elts);
+
+// <i>At this point, NewSTy = "{ opaque*, i32 }". Tell VMCore that</i>
+// <i>the struct and the opaque type are actually the same.</i>
+cast&lt;OpaqueType&gt;(StructTy.get())-&gt;<a href="#refineAbstractTypeTo">refineAbstractTypeTo</a>(NewSTy);
+
+// <i>NewSTy is potentially invalidated, but StructTy (a <a href="#PATypeHolder">PATypeHolder</a>) is</i>
+// <i>kept up-to-date</i>
+NewSTy = cast&lt;StructType&gt;(StructTy.get());
+
+// <i>Add a name for the type to the module symbol table (optional)</i>
+MyModule-&gt;addTypeName("mylist", NewSTy);
+</pre>
+</div>
+
+<p>
+This code shows the basic approach used to build recursive types: build a
+non-recursive type using 'opaque', then use type unification to close the cycle.
+The type unification step is performed by the <tt><a
+href="#refineAbstractTypeTo">refineAbstractTypeTo</a></tt> method, which is
+described next. After that, we describe the <a
+href="#PATypeHolder">PATypeHolder class</a>.
+</p>
+
+</div>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="refineAbstractTypeTo">The <tt>refineAbstractTypeTo</tt> method</a>
+</div>
+
+<div class="doc_text">
+<p>
+The <tt>refineAbstractTypeTo</tt> method starts the type unification process.
+While this method is actually a member of the DerivedType class, it is most
+often used on OpaqueType instances. Type unification is actually a recursive
+process. After unification, types can become structurally isomorphic to
+existing types, and all duplicates are deleted (to preserve pointer equality).
+</p>
+
+<p>
+In the example above, the OpaqueType object is definitely deleted.
+Additionally, if there is an "{ \2*, i32}" type already created in the system,
+the pointer and struct type created are <b>also</b> deleted. Obviously whenever
+a type is deleted, any "Type*" pointers in the program are invalidated. As
+such, it is safest to avoid having <i>any</i> "Type*" pointers to abstract types
+live across a call to <tt>refineAbstractTypeTo</tt> (note that non-abstract
+types can never move or be deleted). To deal with this, the <a
+href="#PATypeHolder">PATypeHolder</a> class is used to maintain a stable
+reference to a possibly refined type, and the <a
+href="#AbstractTypeUser">AbstractTypeUser</a> class is used to update more
+complex datastructures.
+</p>
+
+</div>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="PATypeHolder">The PATypeHolder Class</a>
+</div>
+
+<div class="doc_text">
+<p>
+PATypeHolder is a form of a "smart pointer" for Type objects. When VMCore
+happily goes about nuking types that become isomorphic to existing types, it
+automatically updates all PATypeHolder objects to point to the new type. In the
+example above, this allows the code to maintain a pointer to the resultant
+resolved recursive type, even though the Type*'s are potentially invalidated.
+</p>
+
+<p>
+PATypeHolder is an extremely light-weight object that uses a lazy union-find
+implementation to update pointers. For example the pointer from a Value to its
+Type is maintained by PATypeHolder objects.
+</p>
+
+</div>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="AbstractTypeUser">The AbstractTypeUser Class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+Some data structures need more to perform more complex updates when types get
+resolved. To support this, a class can derive from the AbstractTypeUser class.
+This class
+allows it to get callbacks when certain types are resolved. To register to get
+callbacks for a particular type, the DerivedType::{add/remove}AbstractTypeUser
+methods can be called on a type. Note that these methods only work for <i>
+ abstract</i> types. Concrete types (those that do not include any opaque
+objects) can never be refined.
+</p>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="SymbolTable">The <tt>ValueSymbolTable</tt> and
+ <tt>TypeSymbolTable</tt> classes</a>
+</div>
+
+<div class="doc_text">
+<p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1ValueSymbolTable.html">
+ValueSymbolTable</a></tt> class provides a symbol table that the <a
+href="#Function"><tt>Function</tt></a> and <a href="#Module">
+<tt>Module</tt></a> classes use for naming value definitions. The symbol table
+can provide a name for any <a href="#Value"><tt>Value</tt></a>.
+The <tt><a href="http://llvm.org/doxygen/classllvm_1_1TypeSymbolTable.html">
+TypeSymbolTable</a></tt> class is used by the <tt>Module</tt> class to store
+names for types.</p>
+
+<p>Note that the <tt>SymbolTable</tt> class should not be directly accessed
+by most clients. It should only be used when iteration over the symbol table
+names themselves are required, which is very special purpose. Note that not
+all LLVM
+<a href="#Value">Value</a>s have names, and those without names (i.e. they have
+an empty name) do not exist in the symbol table.
+</p>
+
+<p>These symbol tables support iteration over the values/types in the symbol
+table with <tt>begin/end/iterator</tt> and supports querying to see if a
+specific name is in the symbol table (with <tt>lookup</tt>). The
+<tt>ValueSymbolTable</tt> class exposes no public mutator methods, instead,
+simply call <tt>setName</tt> on a value, which will autoinsert it into the
+appropriate symbol table. For types, use the Module::addTypeName method to
+insert entries into the symbol table.</p>
+
+</div>
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="coreclasses">The Core LLVM Class Hierarchy Reference </a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p><tt>#include "<a href="/doxygen/Type_8h-source.html">llvm/Type.h</a>"</tt>
+<br>doxygen info: <a href="/doxygen/classllvm_1_1Type.html">Type Class</a></p>
+
+<p>The Core LLVM classes are the primary means of representing the program
+being inspected or transformed. The core LLVM classes are defined in
+header files in the <tt>include/llvm/</tt> directory, and implemented in
+the <tt>lib/VMCore</tt> directory.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="Type">The <tt>Type</tt> class and Derived Types</a>
+</div>
+
+<div class="doc_text">
+
+ <p><tt>Type</tt> is a superclass of all type classes. Every <tt>Value</tt> has
+ a <tt>Type</tt>. <tt>Type</tt> cannot be instantiated directly but only
+ through its subclasses. Certain primitive types (<tt>VoidType</tt>,
+ <tt>LabelType</tt>, <tt>FloatType</tt> and <tt>DoubleType</tt>) have hidden
+ subclasses. They are hidden because they offer no useful functionality beyond
+ what the <tt>Type</tt> class offers except to distinguish themselves from
+ other subclasses of <tt>Type</tt>.</p>
+ <p>All other types are subclasses of <tt>DerivedType</tt>. Types can be
+ named, but this is not a requirement. There exists exactly
+ one instance of a given shape at any one time. This allows type equality to
+ be performed with address equality of the Type Instance. That is, given two
+ <tt>Type*</tt> values, the types are identical if the pointers are identical.
+ </p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="m_Value">Important Public Methods</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+ <li><tt>bool isInteger() const</tt>: Returns true for any integer type.</li>
+
+ <li><tt>bool isFloatingPoint()</tt>: Return true if this is one of the two
+ floating point types.</li>
+
+ <li><tt>bool isAbstract()</tt>: Return true if the type is abstract (contains
+ an OpaqueType anywhere in its definition).</li>
+
+ <li><tt>bool isSized()</tt>: Return true if the type has known size. Things
+ that don't have a size are abstract types, labels and void.</li>
+
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="m_Value">Important Derived Types</a>
+</div>
+<div class="doc_text">
+<dl>
+ <dt><tt>IntegerType</tt></dt>
+ <dd>Subclass of DerivedType that represents integer types of any bit width.
+ Any bit width between <tt>IntegerType::MIN_INT_BITS</tt> (1) and
+ <tt>IntegerType::MAX_INT_BITS</tt> (~8 million) can be represented.
+ <ul>
+ <li><tt>static const IntegerType* get(unsigned NumBits)</tt>: get an integer
+ type of a specific bit width.</li>
+ <li><tt>unsigned getBitWidth() const</tt>: Get the bit width of an integer
+ type.</li>
+ </ul>
+ </dd>
+ <dt><tt>SequentialType</tt></dt>
+ <dd>This is subclassed by ArrayType and PointerType
+ <ul>
+ <li><tt>const Type * getElementType() const</tt>: Returns the type of each
+ of the elements in the sequential type. </li>
+ </ul>
+ </dd>
+ <dt><tt>ArrayType</tt></dt>
+ <dd>This is a subclass of SequentialType and defines the interface for array
+ types.
+ <ul>
+ <li><tt>unsigned getNumElements() const</tt>: Returns the number of
+ elements in the array. </li>
+ </ul>
+ </dd>
+ <dt><tt>PointerType</tt></dt>
+ <dd>Subclass of SequentialType for pointer types.</dd>
+ <dt><tt>VectorType</tt></dt>
+ <dd>Subclass of SequentialType for vector types. A
+ vector type is similar to an ArrayType but is distinguished because it is
+ a first class type wherease ArrayType is not. Vector types are used for
+ vector operations and are usually small vectors of of an integer or floating
+ point type.</dd>
+ <dt><tt>StructType</tt></dt>
+ <dd>Subclass of DerivedTypes for struct types.</dd>
+ <dt><tt><a name="FunctionType">FunctionType</a></tt></dt>
+ <dd>Subclass of DerivedTypes for function types.
+ <ul>
+ <li><tt>bool isVarArg() const</tt>: Returns true if its a vararg
+ function</li>
+ <li><tt> const Type * getReturnType() const</tt>: Returns the
+ return type of the function.</li>
+ <li><tt>const Type * getParamType (unsigned i)</tt>: Returns
+ the type of the ith parameter.</li>
+ <li><tt> const unsigned getNumParams() const</tt>: Returns the
+ number of formal parameters.</li>
+ </ul>
+ </dd>
+ <dt><tt>OpaqueType</tt></dt>
+ <dd>Sublcass of DerivedType for abstract types. This class
+ defines no content and is used as a placeholder for some other type. Note
+ that OpaqueType is used (temporarily) during type resolution for forward
+ references of types. Once the referenced type is resolved, the OpaqueType
+ is replaced with the actual type. OpaqueType can also be used for data
+ abstraction. At link time opaque types can be resolved to actual types
+ of the same name.</dd>
+</dl>
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="Module">The <tt>Module</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a
+href="/doxygen/Module_8h-source.html">llvm/Module.h</a>"</tt><br> doxygen info:
+<a href="/doxygen/classllvm_1_1Module.html">Module Class</a></p>
+
+<p>The <tt>Module</tt> class represents the top level structure present in LLVM
+programs. An LLVM module is effectively either a translation unit of the
+original program or a combination of several translation units merged by the
+linker. The <tt>Module</tt> class keeps track of a list of <a
+href="#Function"><tt>Function</tt></a>s, a list of <a
+href="#GlobalVariable"><tt>GlobalVariable</tt></a>s, and a <a
+href="#SymbolTable"><tt>SymbolTable</tt></a>. Additionally, it contains a few
+helpful member functions that try to make common operations easy.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="m_Module">Important Public Members of the <tt>Module</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+ <li><tt>Module::Module(std::string name = "")</tt></li>
+</ul>
+
+<p>Constructing a <a href="#Module">Module</a> is easy. You can optionally
+provide a name for it (probably based on the name of the translation unit).</p>
+
+<ul>
+ <li><tt>Module::iterator</tt> - Typedef for function list iterator<br>
+ <tt>Module::const_iterator</tt> - Typedef for const_iterator.<br>
+
+ <tt>begin()</tt>, <tt>end()</tt>
+ <tt>size()</tt>, <tt>empty()</tt>
+
+ <p>These are forwarding methods that make it easy to access the contents of
+ a <tt>Module</tt> object's <a href="#Function"><tt>Function</tt></a>
+ list.</p></li>
+
+ <li><tt>Module::FunctionListType &amp;getFunctionList()</tt>
+
+ <p> Returns the list of <a href="#Function"><tt>Function</tt></a>s. This is
+ necessary to use when you need to update the list or perform a complex
+ action that doesn't have a forwarding method.</p>
+
+ <p><!-- Global Variable --></p></li>
+</ul>
+
+<hr>
+
+<ul>
+ <li><tt>Module::global_iterator</tt> - Typedef for global variable list iterator<br>
+
+ <tt>Module::const_global_iterator</tt> - Typedef for const_iterator.<br>
+
+ <tt>global_begin()</tt>, <tt>global_end()</tt>
+ <tt>global_size()</tt>, <tt>global_empty()</tt>
+
+ <p> These are forwarding methods that make it easy to access the contents of
+ a <tt>Module</tt> object's <a
+ href="#GlobalVariable"><tt>GlobalVariable</tt></a> list.</p></li>
+
+ <li><tt>Module::GlobalListType &amp;getGlobalList()</tt>
+
+ <p>Returns the list of <a
+ href="#GlobalVariable"><tt>GlobalVariable</tt></a>s. This is necessary to
+ use when you need to update the list or perform a complex action that
+ doesn't have a forwarding method.</p>
+
+ <p><!-- Symbol table stuff --> </p></li>
+</ul>
+
+<hr>
+
+<ul>
+ <li><tt><a href="#SymbolTable">SymbolTable</a> *getSymbolTable()</tt>
+
+ <p>Return a reference to the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
+ for this <tt>Module</tt>.</p>
+
+ <p><!-- Convenience methods --></p></li>
+</ul>
+
+<hr>
+
+<ul>
+ <li><tt><a href="#Function">Function</a> *getFunction(const std::string
+ &amp;Name, const <a href="#FunctionType">FunctionType</a> *Ty)</tt>
+
+ <p>Look up the specified function in the <tt>Module</tt> <a
+ href="#SymbolTable"><tt>SymbolTable</tt></a>. If it does not exist, return
+ <tt>null</tt>.</p></li>
+
+ <li><tt><a href="#Function">Function</a> *getOrInsertFunction(const
+ std::string &amp;Name, const <a href="#FunctionType">FunctionType</a> *T)</tt>
+
+ <p>Look up the specified function in the <tt>Module</tt> <a
+ href="#SymbolTable"><tt>SymbolTable</tt></a>. If it does not exist, add an
+ external declaration for the function and return it.</p></li>
+
+ <li><tt>std::string getTypeName(const <a href="#Type">Type</a> *Ty)</tt>
+
+ <p>If there is at least one entry in the <a
+ href="#SymbolTable"><tt>SymbolTable</tt></a> for the specified <a
+ href="#Type"><tt>Type</tt></a>, return it. Otherwise return the empty
+ string.</p></li>
+
+ <li><tt>bool addTypeName(const std::string &amp;Name, const <a
+ href="#Type">Type</a> *Ty)</tt>
+
+ <p>Insert an entry in the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
+ mapping <tt>Name</tt> to <tt>Ty</tt>. If there is already an entry for this
+ name, true is returned and the <a
+ href="#SymbolTable"><tt>SymbolTable</tt></a> is not modified.</p></li>
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="Value">The <tt>Value</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a href="/doxygen/Value_8h-source.html">llvm/Value.h</a>"</tt>
+<br>
+doxygen info: <a href="/doxygen/classllvm_1_1Value.html">Value Class</a></p>
+
+<p>The <tt>Value</tt> class is the most important class in the LLVM Source
+base. It represents a typed value that may be used (among other things) as an
+operand to an instruction. There are many different types of <tt>Value</tt>s,
+such as <a href="#Constant"><tt>Constant</tt></a>s,<a
+href="#Argument"><tt>Argument</tt></a>s. Even <a
+href="#Instruction"><tt>Instruction</tt></a>s and <a
+href="#Function"><tt>Function</tt></a>s are <tt>Value</tt>s.</p>
+
+<p>A particular <tt>Value</tt> may be used many times in the LLVM representation
+for a program. For example, an incoming argument to a function (represented
+with an instance of the <a href="#Argument">Argument</a> class) is "used" by
+every instruction in the function that references the argument. To keep track
+of this relationship, the <tt>Value</tt> class keeps a list of all of the <a
+href="#User"><tt>User</tt></a>s that is using it (the <a
+href="#User"><tt>User</tt></a> class is a base class for all nodes in the LLVM
+graph that can refer to <tt>Value</tt>s). This use list is how LLVM represents
+def-use information in the program, and is accessible through the <tt>use_</tt>*
+methods, shown below.</p>
+
+<p>Because LLVM is a typed representation, every LLVM <tt>Value</tt> is typed,
+and this <a href="#Type">Type</a> is available through the <tt>getType()</tt>
+method. In addition, all LLVM values can be named. The "name" of the
+<tt>Value</tt> is a symbolic string printed in the LLVM code:</p>
+
+<div class="doc_code">
+<pre>
+%<b>foo</b> = add i32 1, 2
+</pre>
+</div>
+
+<p><a name="nameWarning">The name of this instruction is "foo".</a> <b>NOTE</b>
+that the name of any value may be missing (an empty string), so names should
+<b>ONLY</b> be used for debugging (making the source code easier to read,
+debugging printouts), they should not be used to keep track of values or map
+between them. For this purpose, use a <tt>std::map</tt> of pointers to the
+<tt>Value</tt> itself instead.</p>
+
+<p>One important aspect of LLVM is that there is no distinction between an SSA
+variable and the operation that produces it. Because of this, any reference to
+the value produced by an instruction (or the value available as an incoming
+argument, for example) is represented as a direct pointer to the instance of
+the class that
+represents this value. Although this may take some getting used to, it
+simplifies the representation and makes it easier to manipulate.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="m_Value">Important Public Members of the <tt>Value</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+ <li><tt>Value::use_iterator</tt> - Typedef for iterator over the
+use-list<br>
+ <tt>Value::use_const_iterator</tt> - Typedef for const_iterator over
+the use-list<br>
+ <tt>unsigned use_size()</tt> - Returns the number of users of the
+value.<br>
+ <tt>bool use_empty()</tt> - Returns true if there are no users.<br>
+ <tt>use_iterator use_begin()</tt> - Get an iterator to the start of
+the use-list.<br>
+ <tt>use_iterator use_end()</tt> - Get an iterator to the end of the
+use-list.<br>
+ <tt><a href="#User">User</a> *use_back()</tt> - Returns the last
+element in the list.
+ <p> These methods are the interface to access the def-use
+information in LLVM. As with all other iterators in LLVM, the naming
+conventions follow the conventions defined by the <a href="#stl">STL</a>.</p>
+ </li>
+ <li><tt><a href="#Type">Type</a> *getType() const</tt>
+ <p>This method returns the Type of the Value.</p>
+ </li>
+ <li><tt>bool hasName() const</tt><br>
+ <tt>std::string getName() const</tt><br>
+ <tt>void setName(const std::string &amp;Name)</tt>
+ <p> This family of methods is used to access and assign a name to a <tt>Value</tt>,
+be aware of the <a href="#nameWarning">precaution above</a>.</p>
+ </li>
+ <li><tt>void replaceAllUsesWith(Value *V)</tt>
+
+ <p>This method traverses the use list of a <tt>Value</tt> changing all <a
+ href="#User"><tt>User</tt>s</a> of the current value to refer to
+ "<tt>V</tt>" instead. For example, if you detect that an instruction always
+ produces a constant value (for example through constant folding), you can
+ replace all uses of the instruction with the constant like this:</p>
+
+<div class="doc_code">
+<pre>
+Inst-&gt;replaceAllUsesWith(ConstVal);
+</pre>
+</div>
+
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="User">The <tt>User</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+<tt>#include "<a href="/doxygen/User_8h-source.html">llvm/User.h</a>"</tt><br>
+doxygen info: <a href="/doxygen/classllvm_1_1User.html">User Class</a><br>
+Superclass: <a href="#Value"><tt>Value</tt></a></p>
+
+<p>The <tt>User</tt> class is the common base class of all LLVM nodes that may
+refer to <a href="#Value"><tt>Value</tt></a>s. It exposes a list of "Operands"
+that are all of the <a href="#Value"><tt>Value</tt></a>s that the User is
+referring to. The <tt>User</tt> class itself is a subclass of
+<tt>Value</tt>.</p>
+
+<p>The operands of a <tt>User</tt> point directly to the LLVM <a
+href="#Value"><tt>Value</tt></a> that it refers to. Because LLVM uses Static
+Single Assignment (SSA) form, there can only be one definition referred to,
+allowing this direct connection. This connection provides the use-def
+information in LLVM.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="m_User">Important Public Members of the <tt>User</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>User</tt> class exposes the operand list in two ways: through
+an index access interface and through an iterator based interface.</p>
+
+<ul>
+ <li><tt>Value *getOperand(unsigned i)</tt><br>
+ <tt>unsigned getNumOperands()</tt>
+ <p> These two methods expose the operands of the <tt>User</tt> in a
+convenient form for direct access.</p></li>
+
+ <li><tt>User::op_iterator</tt> - Typedef for iterator over the operand
+list<br>
+ <tt>op_iterator op_begin()</tt> - Get an iterator to the start of
+the operand list.<br>
+ <tt>op_iterator op_end()</tt> - Get an iterator to the end of the
+operand list.
+ <p> Together, these methods make up the iterator based interface to
+the operands of a <tt>User</tt>.</p></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="Instruction">The <tt>Instruction</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "</tt><tt><a
+href="/doxygen/Instruction_8h-source.html">llvm/Instruction.h</a>"</tt><br>
+doxygen info: <a href="/doxygen/classllvm_1_1Instruction.html">Instruction Class</a><br>
+Superclasses: <a href="#User"><tt>User</tt></a>, <a
+href="#Value"><tt>Value</tt></a></p>
+
+<p>The <tt>Instruction</tt> class is the common base class for all LLVM
+instructions. It provides only a few methods, but is a very commonly used
+class. The primary data tracked by the <tt>Instruction</tt> class itself is the
+opcode (instruction type) and the parent <a
+href="#BasicBlock"><tt>BasicBlock</tt></a> the <tt>Instruction</tt> is embedded
+into. To represent a specific type of instruction, one of many subclasses of
+<tt>Instruction</tt> are used.</p>
+
+<p> Because the <tt>Instruction</tt> class subclasses the <a
+href="#User"><tt>User</tt></a> class, its operands can be accessed in the same
+way as for other <a href="#User"><tt>User</tt></a>s (with the
+<tt>getOperand()</tt>/<tt>getNumOperands()</tt> and
+<tt>op_begin()</tt>/<tt>op_end()</tt> methods).</p> <p> An important file for
+the <tt>Instruction</tt> class is the <tt>llvm/Instruction.def</tt> file. This
+file contains some meta-data about the various different types of instructions
+in LLVM. It describes the enum values that are used as opcodes (for example
+<tt>Instruction::Add</tt> and <tt>Instruction::ICmp</tt>), as well as the
+concrete sub-classes of <tt>Instruction</tt> that implement the instruction (for
+example <tt><a href="#BinaryOperator">BinaryOperator</a></tt> and <tt><a
+href="#CmpInst">CmpInst</a></tt>). Unfortunately, the use of macros in
+this file confuses doxygen, so these enum values don't show up correctly in the
+<a href="/doxygen/classllvm_1_1Instruction.html">doxygen output</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="s_Instruction">Important Subclasses of the <tt>Instruction</tt>
+ class</a>
+</div>
+<div class="doc_text">
+ <ul>
+ <li><tt><a name="BinaryOperator">BinaryOperator</a></tt>
+ <p>This subclasses represents all two operand instructions whose operands
+ must be the same type, except for the comparison instructions.</p></li>
+ <li><tt><a name="CastInst">CastInst</a></tt>
+ <p>This subclass is the parent of the 12 casting instructions. It provides
+ common operations on cast instructions.</p>
+ <li><tt><a name="CmpInst">CmpInst</a></tt>
+ <p>This subclass respresents the two comparison instructions,
+ <a href="LangRef.html#i_icmp">ICmpInst</a> (integer opreands), and
+ <a href="LangRef.html#i_fcmp">FCmpInst</a> (floating point operands).</p>
+ <li><tt><a name="TerminatorInst">TerminatorInst</a></tt>
+ <p>This subclass is the parent of all terminator instructions (those which
+ can terminate a block).</p>
+ </ul>
+ </div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="m_Instruction">Important Public Members of the <tt>Instruction</tt>
+ class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+ <li><tt><a href="#BasicBlock">BasicBlock</a> *getParent()</tt>
+ <p>Returns the <a href="#BasicBlock"><tt>BasicBlock</tt></a> that
+this <tt>Instruction</tt> is embedded into.</p></li>
+ <li><tt>bool mayWriteToMemory()</tt>
+ <p>Returns true if the instruction writes to memory, i.e. it is a
+ <tt>call</tt>,<tt>free</tt>,<tt>invoke</tt>, or <tt>store</tt>.</p></li>
+ <li><tt>unsigned getOpcode()</tt>
+ <p>Returns the opcode for the <tt>Instruction</tt>.</p></li>
+ <li><tt><a href="#Instruction">Instruction</a> *clone() const</tt>
+ <p>Returns another instance of the specified instruction, identical
+in all ways to the original except that the instruction has no parent
+(ie it's not embedded into a <a href="#BasicBlock"><tt>BasicBlock</tt></a>),
+and it has no name</p></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="Constant">The <tt>Constant</tt> class and subclasses</a>
+</div>
+
+<div class="doc_text">
+
+<p>Constant represents a base class for different types of constants. It
+is subclassed by ConstantInt, ConstantArray, etc. for representing
+the various types of Constants. <a href="#GlobalValue">GlobalValue</a> is also
+a subclass, which represents the address of a global variable or function.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Important Subclasses of Constant </div>
+<div class="doc_text">
+<ul>
+ <li>ConstantInt : This subclass of Constant represents an integer constant of
+ any width.
+ <ul>
+ <li><tt>const APInt&amp; getValue() const</tt>: Returns the underlying
+ value of this constant, an APInt value.</li>
+ <li><tt>int64_t getSExtValue() const</tt>: Converts the underlying APInt
+ value to an int64_t via sign extension. If the value (not the bit width)
+ of the APInt is too large to fit in an int64_t, an assertion will result.
+ For this reason, use of this method is discouraged.</li>
+ <li><tt>uint64_t getZExtValue() const</tt>: Converts the underlying APInt
+ value to a uint64_t via zero extension. IF the value (not the bit width)
+ of the APInt is too large to fit in a uint64_t, an assertion will result.
+ For this reason, use of this method is discouraged.</li>
+ <li><tt>static ConstantInt* get(const APInt&amp; Val)</tt>: Returns the
+ ConstantInt object that represents the value provided by <tt>Val</tt>.
+ The type is implied as the IntegerType that corresponds to the bit width
+ of <tt>Val</tt>.</li>
+ <li><tt>static ConstantInt* get(const Type *Ty, uint64_t Val)</tt>:
+ Returns the ConstantInt object that represents the value provided by
+ <tt>Val</tt> for integer type <tt>Ty</tt>.</li>
+ </ul>
+ </li>
+ <li>ConstantFP : This class represents a floating point constant.
+ <ul>
+ <li><tt>double getValue() const</tt>: Returns the underlying value of
+ this constant. </li>
+ </ul>
+ </li>
+ <li>ConstantArray : This represents a constant array.
+ <ul>
+ <li><tt>const std::vector&lt;Use&gt; &amp;getValues() const</tt>: Returns
+ a vector of component constants that makeup this array. </li>
+ </ul>
+ </li>
+ <li>ConstantStruct : This represents a constant struct.
+ <ul>
+ <li><tt>const std::vector&lt;Use&gt; &amp;getValues() const</tt>: Returns
+ a vector of component constants that makeup this array. </li>
+ </ul>
+ </li>
+ <li>GlobalValue : This represents either a global variable or a function. In
+ either case, the value is a constant fixed address (after linking).
+ </li>
+</ul>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="GlobalValue">The <tt>GlobalValue</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a
+href="/doxygen/GlobalValue_8h-source.html">llvm/GlobalValue.h</a>"</tt><br>
+doxygen info: <a href="/doxygen/classllvm_1_1GlobalValue.html">GlobalValue
+Class</a><br>
+Superclasses: <a href="#Constant"><tt>Constant</tt></a>,
+<a href="#User"><tt>User</tt></a>, <a href="#Value"><tt>Value</tt></a></p>
+
+<p>Global values (<a href="#GlobalVariable"><tt>GlobalVariable</tt></a>s or <a
+href="#Function"><tt>Function</tt></a>s) are the only LLVM values that are
+visible in the bodies of all <a href="#Function"><tt>Function</tt></a>s.
+Because they are visible at global scope, they are also subject to linking with
+other globals defined in different translation units. To control the linking
+process, <tt>GlobalValue</tt>s know their linkage rules. Specifically,
+<tt>GlobalValue</tt>s know whether they have internal or external linkage, as
+defined by the <tt>LinkageTypes</tt> enumeration.</p>
+
+<p>If a <tt>GlobalValue</tt> has internal linkage (equivalent to being
+<tt>static</tt> in C), it is not visible to code outside the current translation
+unit, and does not participate in linking. If it has external linkage, it is
+visible to external code, and does participate in linking. In addition to
+linkage information, <tt>GlobalValue</tt>s keep track of which <a
+href="#Module"><tt>Module</tt></a> they are currently part of.</p>
+
+<p>Because <tt>GlobalValue</tt>s are memory objects, they are always referred to
+by their <b>address</b>. As such, the <a href="#Type"><tt>Type</tt></a> of a
+global is always a pointer to its contents. It is important to remember this
+when using the <tt>GetElementPtrInst</tt> instruction because this pointer must
+be dereferenced first. For example, if you have a <tt>GlobalVariable</tt> (a
+subclass of <tt>GlobalValue)</tt> that is an array of 24 ints, type <tt>[24 x
+i32]</tt>, then the <tt>GlobalVariable</tt> is a pointer to that array. Although
+the address of the first element of this array and the value of the
+<tt>GlobalVariable</tt> are the same, they have different types. The
+<tt>GlobalVariable</tt>'s type is <tt>[24 x i32]</tt>. The first element's type
+is <tt>i32.</tt> Because of this, accessing a global value requires you to
+dereference the pointer with <tt>GetElementPtrInst</tt> first, then its elements
+can be accessed. This is explained in the <a href="LangRef.html#globalvars">LLVM
+Language Reference Manual</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="m_GlobalValue">Important Public Members of the <tt>GlobalValue</tt>
+ class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+ <li><tt>bool hasInternalLinkage() const</tt><br>
+ <tt>bool hasExternalLinkage() const</tt><br>
+ <tt>void setInternalLinkage(bool HasInternalLinkage)</tt>
+ <p> These methods manipulate the linkage characteristics of the <tt>GlobalValue</tt>.</p>
+ <p> </p>
+ </li>
+ <li><tt><a href="#Module">Module</a> *getParent()</tt>
+ <p> This returns the <a href="#Module"><tt>Module</tt></a> that the
+GlobalValue is currently embedded into.</p></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="Function">The <tt>Function</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a
+href="/doxygen/Function_8h-source.html">llvm/Function.h</a>"</tt><br> doxygen
+info: <a href="/doxygen/classllvm_1_1Function.html">Function Class</a><br>
+Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>,
+<a href="#Constant"><tt>Constant</tt></a>,
+<a href="#User"><tt>User</tt></a>,
+<a href="#Value"><tt>Value</tt></a></p>
+
+<p>The <tt>Function</tt> class represents a single procedure in LLVM. It is
+actually one of the more complex classes in the LLVM heirarchy because it must
+keep track of a large amount of data. The <tt>Function</tt> class keeps track
+of a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, a list of formal
+<a href="#Argument"><tt>Argument</tt></a>s, and a
+<a href="#SymbolTable"><tt>SymbolTable</tt></a>.</p>
+
+<p>The list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s is the most
+commonly used part of <tt>Function</tt> objects. The list imposes an implicit
+ordering of the blocks in the function, which indicate how the code will be
+layed out by the backend. Additionally, the first <a
+href="#BasicBlock"><tt>BasicBlock</tt></a> is the implicit entry node for the
+<tt>Function</tt>. It is not legal in LLVM to explicitly branch to this initial
+block. There are no implicit exit nodes, and in fact there may be multiple exit
+nodes from a single <tt>Function</tt>. If the <a
+href="#BasicBlock"><tt>BasicBlock</tt></a> list is empty, this indicates that
+the <tt>Function</tt> is actually a function declaration: the actual body of the
+function hasn't been linked in yet.</p>
+
+<p>In addition to a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, the
+<tt>Function</tt> class also keeps track of the list of formal <a
+href="#Argument"><tt>Argument</tt></a>s that the function receives. This
+container manages the lifetime of the <a href="#Argument"><tt>Argument</tt></a>
+nodes, just like the <a href="#BasicBlock"><tt>BasicBlock</tt></a> list does for
+the <a href="#BasicBlock"><tt>BasicBlock</tt></a>s.</p>
+
+<p>The <a href="#SymbolTable"><tt>SymbolTable</tt></a> is a very rarely used
+LLVM feature that is only used when you have to look up a value by name. Aside
+from that, the <a href="#SymbolTable"><tt>SymbolTable</tt></a> is used
+internally to make sure that there are not conflicts between the names of <a
+href="#Instruction"><tt>Instruction</tt></a>s, <a
+href="#BasicBlock"><tt>BasicBlock</tt></a>s, or <a
+href="#Argument"><tt>Argument</tt></a>s in the function body.</p>
+
+<p>Note that <tt>Function</tt> is a <a href="#GlobalValue">GlobalValue</a>
+and therefore also a <a href="#Constant">Constant</a>. The value of the function
+is its address (after linking) which is guaranteed to be constant.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="m_Function">Important Public Members of the <tt>Function</tt>
+ class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+ <li><tt>Function(const </tt><tt><a href="#FunctionType">FunctionType</a>
+ *Ty, LinkageTypes Linkage, const std::string &amp;N = "", Module* Parent = 0)</tt>
+
+ <p>Constructor used when you need to create new <tt>Function</tt>s to add
+ the the program. The constructor must specify the type of the function to
+ create and what type of linkage the function should have. The <a
+ href="#FunctionType"><tt>FunctionType</tt></a> argument
+ specifies the formal arguments and return value for the function. The same
+ <a href="#FunctionType"><tt>FunctionType</tt></a> value can be used to
+ create multiple functions. The <tt>Parent</tt> argument specifies the Module
+ in which the function is defined. If this argument is provided, the function
+ will automatically be inserted into that module's list of
+ functions.</p></li>
+
+ <li><tt>bool isExternal()</tt>
+
+ <p>Return whether or not the <tt>Function</tt> has a body defined. If the
+ function is "external", it does not have a body, and thus must be resolved
+ by linking with a function defined in a different translation unit.</p></li>
+
+ <li><tt>Function::iterator</tt> - Typedef for basic block list iterator<br>
+ <tt>Function::const_iterator</tt> - Typedef for const_iterator.<br>
+
+ <tt>begin()</tt>, <tt>end()</tt>
+ <tt>size()</tt>, <tt>empty()</tt>
+
+ <p>These are forwarding methods that make it easy to access the contents of
+ a <tt>Function</tt> object's <a href="#BasicBlock"><tt>BasicBlock</tt></a>
+ list.</p></li>
+
+ <li><tt>Function::BasicBlockListType &amp;getBasicBlockList()</tt>
+
+ <p>Returns the list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s. This
+ is necessary to use when you need to update the list or perform a complex
+ action that doesn't have a forwarding method.</p></li>
+
+ <li><tt>Function::arg_iterator</tt> - Typedef for the argument list
+iterator<br>
+ <tt>Function::const_arg_iterator</tt> - Typedef for const_iterator.<br>
+
+ <tt>arg_begin()</tt>, <tt>arg_end()</tt>
+ <tt>arg_size()</tt>, <tt>arg_empty()</tt>
+
+ <p>These are forwarding methods that make it easy to access the contents of
+ a <tt>Function</tt> object's <a href="#Argument"><tt>Argument</tt></a>
+ list.</p></li>
+
+ <li><tt>Function::ArgumentListType &amp;getArgumentList()</tt>
+
+ <p>Returns the list of <a href="#Argument"><tt>Argument</tt></a>s. This is
+ necessary to use when you need to update the list or perform a complex
+ action that doesn't have a forwarding method.</p></li>
+
+ <li><tt><a href="#BasicBlock">BasicBlock</a> &amp;getEntryBlock()</tt>
+
+ <p>Returns the entry <a href="#BasicBlock"><tt>BasicBlock</tt></a> for the
+ function. Because the entry block for the function is always the first
+ block, this returns the first block of the <tt>Function</tt>.</p></li>
+
+ <li><tt><a href="#Type">Type</a> *getReturnType()</tt><br>
+ <tt><a href="#FunctionType">FunctionType</a> *getFunctionType()</tt>
+
+ <p>This traverses the <a href="#Type"><tt>Type</tt></a> of the
+ <tt>Function</tt> and returns the return type of the function, or the <a
+ href="#FunctionType"><tt>FunctionType</tt></a> of the actual
+ function.</p></li>
+
+ <li><tt><a href="#SymbolTable">SymbolTable</a> *getSymbolTable()</tt>
+
+ <p> Return a pointer to the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
+ for this <tt>Function</tt>.</p></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="GlobalVariable">The <tt>GlobalVariable</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a
+href="/doxygen/GlobalVariable_8h-source.html">llvm/GlobalVariable.h</a>"</tt>
+<br>
+doxygen info: <a href="/doxygen/classllvm_1_1GlobalVariable.html">GlobalVariable
+ Class</a><br>
+Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>,
+<a href="#Constant"><tt>Constant</tt></a>,
+<a href="#User"><tt>User</tt></a>,
+<a href="#Value"><tt>Value</tt></a></p>
+
+<p>Global variables are represented with the (suprise suprise)
+<tt>GlobalVariable</tt> class. Like functions, <tt>GlobalVariable</tt>s are also
+subclasses of <a href="#GlobalValue"><tt>GlobalValue</tt></a>, and as such are
+always referenced by their address (global values must live in memory, so their
+"name" refers to their constant address). See
+<a href="#GlobalValue"><tt>GlobalValue</tt></a> for more on this. Global
+variables may have an initial value (which must be a
+<a href="#Constant"><tt>Constant</tt></a>), and if they have an initializer,
+they may be marked as "constant" themselves (indicating that their contents
+never change at runtime).</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="m_GlobalVariable">Important Public Members of the
+ <tt>GlobalVariable</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+ <li><tt>GlobalVariable(const </tt><tt><a href="#Type">Type</a> *Ty, bool
+ isConstant, LinkageTypes&amp; Linkage, <a href="#Constant">Constant</a>
+ *Initializer = 0, const std::string &amp;Name = "", Module* Parent = 0)</tt>
+
+ <p>Create a new global variable of the specified type. If
+ <tt>isConstant</tt> is true then the global variable will be marked as
+ unchanging for the program. The Linkage parameter specifies the type of
+ linkage (internal, external, weak, linkonce, appending) for the variable. If
+ the linkage is InternalLinkage, WeakLinkage, or LinkOnceLinkage,&nbsp; then
+ the resultant global variable will have internal linkage. AppendingLinkage
+ concatenates together all instances (in different translation units) of the
+ variable into a single variable but is only applicable to arrays. &nbsp;See
+ the <a href="LangRef.html#modulestructure">LLVM Language Reference</a> for
+ further details on linkage types. Optionally an initializer, a name, and the
+ module to put the variable into may be specified for the global variable as
+ well.</p></li>
+
+ <li><tt>bool isConstant() const</tt>
+
+ <p>Returns true if this is a global variable that is known not to
+ be modified at runtime.</p></li>
+
+ <li><tt>bool hasInitializer()</tt>
+
+ <p>Returns true if this <tt>GlobalVariable</tt> has an intializer.</p></li>
+
+ <li><tt><a href="#Constant">Constant</a> *getInitializer()</tt>
+
+ <p>Returns the intial value for a <tt>GlobalVariable</tt>. It is not legal
+ to call this method if there is no initializer.</p></li>
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="BasicBlock">The <tt>BasicBlock</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a
+href="/doxygen/BasicBlock_8h-source.html">llvm/BasicBlock.h</a>"</tt><br>
+doxygen info: <a href="/doxygen/structllvm_1_1BasicBlock.html">BasicBlock
+Class</a><br>
+Superclass: <a href="#Value"><tt>Value</tt></a></p>
+
+<p>This class represents a single entry multiple exit section of the code,
+commonly known as a basic block by the compiler community. The
+<tt>BasicBlock</tt> class maintains a list of <a
+href="#Instruction"><tt>Instruction</tt></a>s, which form the body of the block.
+Matching the language definition, the last element of this list of instructions
+is always a terminator instruction (a subclass of the <a
+href="#TerminatorInst"><tt>TerminatorInst</tt></a> class).</p>
+
+<p>In addition to tracking the list of instructions that make up the block, the
+<tt>BasicBlock</tt> class also keeps track of the <a
+href="#Function"><tt>Function</tt></a> that it is embedded into.</p>
+
+<p>Note that <tt>BasicBlock</tt>s themselves are <a
+href="#Value"><tt>Value</tt></a>s, because they are referenced by instructions
+like branches and can go in the switch tables. <tt>BasicBlock</tt>s have type
+<tt>label</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="m_BasicBlock">Important Public Members of the <tt>BasicBlock</tt>
+ class</a>
+</div>
+
+<div class="doc_text">
+<ul>
+
+<li><tt>BasicBlock(const std::string &amp;Name = "", </tt><tt><a
+ href="#Function">Function</a> *Parent = 0)</tt>
+
+<p>The <tt>BasicBlock</tt> constructor is used to create new basic blocks for
+insertion into a function. The constructor optionally takes a name for the new
+block, and a <a href="#Function"><tt>Function</tt></a> to insert it into. If
+the <tt>Parent</tt> parameter is specified, the new <tt>BasicBlock</tt> is
+automatically inserted at the end of the specified <a
+href="#Function"><tt>Function</tt></a>, if not specified, the BasicBlock must be
+manually inserted into the <a href="#Function"><tt>Function</tt></a>.</p></li>
+
+<li><tt>BasicBlock::iterator</tt> - Typedef for instruction list iterator<br>
+<tt>BasicBlock::const_iterator</tt> - Typedef for const_iterator.<br>
+<tt>begin()</tt>, <tt>end()</tt>, <tt>front()</tt>, <tt>back()</tt>,
+<tt>size()</tt>, <tt>empty()</tt>
+STL-style functions for accessing the instruction list.
+
+<p>These methods and typedefs are forwarding functions that have the same
+semantics as the standard library methods of the same names. These methods
+expose the underlying instruction list of a basic block in a way that is easy to
+manipulate. To get the full complement of container operations (including
+operations to update the list), you must use the <tt>getInstList()</tt>
+method.</p></li>
+
+<li><tt>BasicBlock::InstListType &amp;getInstList()</tt>
+
+<p>This method is used to get access to the underlying container that actually
+holds the Instructions. This method must be used when there isn't a forwarding
+function in the <tt>BasicBlock</tt> class for the operation that you would like
+to perform. Because there are no forwarding functions for "updating"
+operations, you need to use this if you want to update the contents of a
+<tt>BasicBlock</tt>.</p></li>
+
+<li><tt><a href="#Function">Function</a> *getParent()</tt>
+
+<p> Returns a pointer to <a href="#Function"><tt>Function</tt></a> the block is
+embedded into, or a null pointer if it is homeless.</p></li>
+
+<li><tt><a href="#TerminatorInst">TerminatorInst</a> *getTerminator()</tt>
+
+<p> Returns a pointer to the terminator instruction that appears at the end of
+the <tt>BasicBlock</tt>. If there is no terminator instruction, or if the last
+instruction in the block is not a terminator, then a null pointer is
+returned.</p></li>
+
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="Argument">The <tt>Argument</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>This subclass of Value defines the interface for incoming formal
+arguments to a function. A Function maintains a list of its formal
+arguments. An argument has a pointer to the parent Function.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/Projects.html b/docs/Projects.html
new file mode 100644
index 0000000..cb7a7c1
--- /dev/null
+++ b/docs/Projects.html
@@ -0,0 +1,460 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>Creating an LLVM Project</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Creating an LLVM Project</div>
+
+<ol>
+<li><a href="#overview">Overview</a></li>
+<li><a href="#create">Create a project from the Sample Project</a></li>
+<li><a href="#source">Source tree layout</a></li>
+<li><a href="#makefiles">Writing LLVM-style Makefiles</a>
+ <ol>
+ <li><a href="#reqVars">Required Variables</a></li>
+ <li><a href="#varsBuildDir">Variables for Building Subdirectories</a></li>
+ <li><a href="#varsBuildLib">Variables for Building Libraries</a></li>
+ <li><a href="#varsBuildProg">Variables for Building Programs</a></li>
+ <li><a href="#miscVars">Miscellaneous Variables</a></li>
+ </ol></li>
+<li><a href="#objcode">Placement of object code</a></li>
+<li><a href="#help">Further help</a></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by John Criswell</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="overview">Overview</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM build system is designed to facilitate the building of third party
+projects that use LLVM header files, libraries, and tools. In order to use
+these facilities, a Makefile from a project must do the following things:</p>
+
+<ol>
+ <li>Set <tt>make</tt> variables. There are several variables that a Makefile
+ needs to set to use the LLVM build system:
+ <ul>
+ <li><tt>PROJECT_NAME</tt> - The name by which your project is known.</li>
+ <li><tt>LLVM_SRC_ROOT</tt> - The root of the LLVM source tree.</li>
+ <li><tt>LLVM_OBJ_ROOT</tt> - The root of the LLVM object tree.</li>
+ <li><tt>PROJ_SRC_ROOT</tt> - The root of the project's source tree.</li>
+ <li><tt>PROJ_OBJ_ROOT</tt> - The root of the project's object tree.</li>
+ <li><tt>PROJ_INSTALL_ROOT</tt> - The root installation directory.</li>
+ <li><tt>LEVEL</tt> - The relative path from the current directory to the
+ project's root ($PROJ_OBJ_ROOT).</li>
+ </ul></li>
+ <li>Include <tt>Makefile.config</tt> from <tt>$(LLVM_OBJ_ROOT)</tt>.</li>
+ <li>Include <tt>Makefile.rules</tt> from <tt>$(LLVM_SRC_ROOT)</tt>.</li>
+</ol>
+
+<p>There are two ways that you can set all of these variables:</p>
+<ol>
+ <li>You can write your own Makefiles which hard-code these values.</li>
+ <li>You can use the pre-made LLVM sample project. This sample project
+ includes Makefiles, a configure script that can be used to configure the
+ location of LLVM, and the ability to support multiple object directories
+ from a single source directory.</li>
+</ol>
+
+<p>This document assumes that you will base your project on the LLVM sample
+project found in <tt>llvm/projects/sample</tt>. If you want to devise your own
+build system, studying the sample project and LLVM Makefiles will probably
+provide enough information on how to write your own Makefiles.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="create">Create a Project from the Sample Project</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Follow these simple steps to start your project:</p>
+
+<ol>
+<li>Copy the <tt>llvm/projects/sample</tt> directory to any place of your
+choosing. You can place it anywhere you like. Rename the directory to match
+the name of your project.</li>
+
+<li>
+If you downloaded LLVM using Subversion, remove all the directories named .svn
+(and all the files therein) from your project's new source tree. This will
+keep Subversion from thinking that your project is inside
+<tt>llvm/trunk/projects/sample</tt>.</li>
+
+<li>Add your source code and Makefiles to your source tree.</li>
+
+<li>If you want your project to be configured with the <tt>configure</tt> script
+then you need to edit <tt>autoconf/configure.ac</tt> as follows:
+ <ul>
+ <li><b>AC_INIT</b>. Place the name of your project, its version number and
+ a contact email address for your project as the arguments to this macro</li>
+ <li><b>AC_CONFIG_AUX_DIR</b>. If your project isn't in the
+ <tt>llvm/projects</tt> directory then you might need to adjust this so that
+ it specifies a relative path to the <tt>llvm/autoconf</tt> directory.</li>
+ <li><b>LLVM_CONFIG_PROJECT</b>. Just leave this alone.</li>
+ <li><b>AC_CONFIG_SRCDIR</b>. Specify a path to a file name that identifies
+ your project; or just leave it at <tt>Makefile.common.in</tt></li>
+ <li><b>AC_CONFIG_FILES</b>. Do not change.</li>
+ <li><b>AC_CONFIG_MAKEFILE</b>. Use one of these macros for each Makefile
+ that your project uses. This macro arranges for your makefiles to be copied
+ from the source directory, unmodified, to the build directory.</li>
+ </ul>
+</li>
+
+<li>After updating <tt>autoconf/configure.ac</tt>, regenerate the
+configure script with these commands:
+
+<div class="doc_code">
+<p><tt>% cd autoconf<br>
+ % AutoRegen.sh</tt></p>
+</div>
+
+<p>You must be using Autoconf version 2.59 or later and your aclocal version
+should 1.9 or later.</p></li>
+
+<li>Run <tt>configure</tt> in the directory in which you want to place
+object code. Use the following options to tell your project where it
+can find LLVM:
+
+ <dl>
+ <dt><tt>--with-llvmsrc=&lt;directory&gt;</tt></dt>
+ <dd>Tell your project where the LLVM source tree is located.</dd>
+ <dt><br/><tt>--with-llvmobj=&lt;directory&gt;</tt></dt>
+ <dd>Tell your project where the LLVM object tree is located.</dd>
+ <dt><br/><tt>--prefix=&lt;directory&gt;</tt></dt>
+ <dd>Tell your project where it should get installed.</dd>
+ </dl>
+</ol>
+
+<p>That's it! Now all you have to do is type <tt>gmake</tt> (or <tt>make</tt>
+if your on a GNU/Linux system) in the root of your object directory, and your
+project should build.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="source">Source Tree Layout</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>In order to use the LLVM build system, you will want to organize your
+source code so that it can benefit from the build system's features.
+Mainly, you want your source tree layout to look similar to the LLVM
+source tree layout. The best way to do this is to just copy the
+project tree from <tt>llvm/projects/sample</tt> and modify it to meet
+your needs, but you can certainly add to it if you want.</p>
+
+<p>Underneath your top level directory, you should have the following
+directories:</p>
+
+<dl>
+ <dt><b>lib</b>
+ <dd>
+ This subdirectory should contain all of your library source
+ code. For each library that you build, you will have one
+ directory in <b>lib</b> that will contain that library's source
+ code.
+
+ <p>
+ Libraries can be object files, archives, or dynamic libraries.
+ The <b>lib</b> directory is just a convenient place for libraries
+ as it places them all in a directory from which they can be linked
+ later.
+
+ <dt><b>include</b>
+ <dd>
+ This subdirectory should contain any header files that are
+ global to your project. By global, we mean that they are used
+ by more than one library or executable of your project.
+ <p>
+ By placing your header files in <b>include</b>, they will be
+ found automatically by the LLVM build system. For example, if
+ you have a file <b>include/jazz/note.h</b>, then your source
+ files can include it simply with <b>#include "jazz/note.h"</b>.
+
+ <dt><b>tools</b>
+ <dd>
+ This subdirectory should contain all of your source
+ code for executables. For each program that you build, you
+ will have one directory in <b>tools</b> that will contain that
+ program's source code.
+ <p>
+
+ <dt><b>test</b>
+ <dd>
+ This subdirectory should contain tests that verify that your code
+ works correctly. Automated tests are especially useful.
+ <p>
+ Currently, the LLVM build system provides basic support for tests.
+ The LLVM system provides the following:
+ <ul>
+ <li>
+ LLVM provides a tcl procedure that is used by Dejagnu to run
+ tests. It can be found in <tt>llvm/lib/llvm-dg.exp</tt>. This
+ test procedure uses RUN lines in the actual test case to determine
+ how to run the test. See the <a
+ href="TestingGuide.html">TestingGuide</a> for more details. You
+ can easily write Makefile support similar to the Makefiles in
+ <tt>llvm/test</tt> to use Dejagnu to run your project's tests.<br/></li>
+ <li>
+ LLVM contains an optional package called <tt>llvm-test</tt>
+ which provides benchmarks and programs that are known to compile with the
+ LLVM GCC front ends. You can use these
+ programs to test your code, gather statistics information, and
+ compare it to the current LLVM performance statistics.
+ <br/>Currently, there is no way to hook your tests directly into the
+ <tt>llvm/test</tt> testing harness. You will simply
+ need to find a way to use the source provided within that directory
+ on your own.
+ </ul>
+</dl>
+
+<p>Typically, you will want to build your <b>lib</b> directory first followed by
+your <b>tools</b> directory.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="makefiles">Writing LLVM Style Makefiles</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM build system provides a convenient way to build libraries and
+executables. Most of your project Makefiles will only need to define a few
+variables. Below is a list of the variables one can set and what they can
+do:</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="reqVars">Required Variables</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+ <dt>LEVEL
+ <dd>
+ This variable is the relative path from this Makefile to the
+ top directory of your project's source code. For example, if
+ your source code is in <tt>/tmp/src</tt>, then the Makefile in
+ <tt>/tmp/src/jump/high</tt> would set <tt>LEVEL</tt> to <tt>"../.."</tt>.
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="varsBuildDir">Variables for Building Subdirectories</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+ <dt>DIRS
+ <dd>
+ This is a space separated list of subdirectories that should be
+ built. They will be built, one at a time, in the order
+ specified.
+ <p>
+
+ <dt>PARALLEL_DIRS
+ <dd>
+ This is a list of directories that can be built in parallel.
+ These will be built after the directories in DIRS have been
+ built.
+ <p>
+
+ <dt>OPTIONAL_DIRS
+ <dd>
+ This is a list of directories that can be built if they exist,
+ but will not cause an error if they do not exist. They are
+ built serially in the order in which they are listed.
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="varsBuildLib">Variables for Building Libraries</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+ <dt>LIBRARYNAME
+ <dd>
+ This variable contains the base name of the library that will
+ be built. For example, to build a library named
+ <tt>libsample.a</tt>, LIBRARYNAME should be set to
+ <tt>sample</tt>.
+ <p>
+
+ <dt>BUILD_ARCHIVE
+ <dd>
+ By default, a library is a <tt>.o</tt> file that is linked
+ directly into a program. To build an archive (also known as
+ a static library), set the BUILD_ARCHIVE variable.
+ <p>
+
+ <dt>SHARED_LIBRARY
+ <dd>
+ If SHARED_LIBRARY is defined in your Makefile, a shared
+ (or dynamic) library will be built.
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="varsBuildProg">Variables for Building Programs</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+ <dt>TOOLNAME
+ <dd>
+ This variable contains the name of the program that will
+ be built. For example, to build an executable named
+ <tt>sample</tt>, TOOLNAME should be set to <tt>sample</tt>.
+ <p>
+
+ <dt>USEDLIBS
+ <dd>
+ This variable holds a space separated list of libraries that
+ should be linked into the program. These libraries must either
+ be LLVM libraries or libraries that come from your <b>lib</b>
+ directory. The libraries must be specified by their base name.
+ For example, to link libsample.a, you would set USEDLIBS to
+ <tt>sample</tt>.
+ <p>
+ Note that this works only for statically linked libraries.
+ <p>
+
+ <dt>LIBS
+ <dd>
+ To link dynamic libraries, add <tt>-l&lt;library base name&gt;</tt> to
+ the LIBS variable. The LLVM build system will look in the same places
+ for dynamic libraries as it does for static libraries.
+ <p>
+ For example, to link <tt>libsample.so</tt>, you would have the
+ following line in your <tt>Makefile</tt>:
+ <p>
+ <tt>
+ LIBS += -lsample
+ </tt>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="miscVars">Miscellaneous Variables</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+ <dt>ExtraSource
+ <dd>
+ This variable contains a space separated list of extra source
+ files that need to be built. It is useful for including the
+ output of Lex and Yacc programs.
+ <p>
+
+ <dt>CFLAGS
+ <dt>CPPFLAGS
+ <dd>
+ This variable can be used to add options to the C and C++
+ compiler, respectively. It is typically used to add options
+ that tell the compiler the location of additional directories
+ to search for header files.
+ <p>
+ It is highly suggested that you append to CFLAGS and CPPFLAGS as
+ opposed to overwriting them. The master Makefiles may already
+ have useful options in them that you may not want to overwrite.
+ <p>
+</dl>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="objcode">Placement of Object Code</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The final location of built libraries and executables will depend upon
+whether you do a Debug, Release, or Profile build.</p>
+
+<dl>
+ <dt>Libraries
+ <dd>
+ All libraries (static and dynamic) will be stored in
+ <tt>PROJ_OBJ_ROOT/&lt;type&gt;/lib</tt>, where type is <tt>Debug</tt>,
+ <tt>Release</tt>, or <tt>Profile</tt> for a debug, optimized, or
+ profiled build, respectively.<p>
+
+ <dt>Executables
+ <dd>All executables will be stored in
+ <tt>PROJ_OBJ_ROOT/&lt;type&gt;/bin</tt>, where type is <tt>Debug</tt>,
+ <tt>Release</tt>, or <tt>Profile</tt> for a debug, optimized, or profiled
+ build, respectively.
+</dl>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="help">Further Help</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you have any questions or need any help creating an LLVM project,
+the LLVM team would be more than happy to help. You can always post your
+questions to the <a
+href="http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM Developers
+Mailing List</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:criswell@uiuc.edu">John Criswell</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+ <br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
new file mode 100644
index 0000000..03d2db1
--- /dev/null
+++ b/docs/ReleaseNotes.html
@@ -0,0 +1,861 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+ <title>LLVM 2.0 Release Notes</title>
+</head>
+<body>
+
+<div class="doc_title">LLVM 2.0 Release Notes</div>
+
+<ol>
+ <li><a href="#intro">Introduction</a></li>
+ <li><a href="#whatsnew">What's New?</a></li>
+ <li><a href="GettingStarted.html">Installation Instructions</a></li>
+ <li><a href="#portability">Portability and Supported Platforms</a></li>
+ <li><a href="#knownproblems">Known Problems</a>
+ <li><a href="#additionalinfo">Additional Information</a></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by the <a href="http://llvm.org">LLVM Team</a><p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="intro">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document contains the release notes for the LLVM compiler
+infrastructure, release 2.0. Here we describe the status of LLVM, including
+major improvements from the previous release and any known problems. All LLVM
+releases may be downloaded from the <a href="http://llvm.org/releases/">LLVM
+releases web site</a>.</p>
+
+<p>For more information about LLVM, including information about the latest
+release, please check out the <a href="http://llvm.org/">main LLVM
+web site</a>. If you have questions or comments, the <a
+href="http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM developer's mailing
+list</a> is a good place to send them.</p>
+
+<p>Note that if you are reading this file from a Subversion checkout or the
+main LLVM web page,
+this document applies to the <i>next</i> release, not the current one. To see
+the release notes for the current or previous releases, see the <a
+href="http://llvm.org/releases/">releases page</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="whatsnew">What's New?</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This is the eleventh public release of the LLVM Compiler Infrastructure.
+Being the first major release since 1.0, this release is different in several
+ways from our previous releases:</p>
+
+<ol>
+<li>We took this as an opportunity to
+break backwards compatibility with the LLVM 1.x bytecode and .ll file format.
+If you have LLVM 1.9 .ll files that you would like to upgrade to LLVM 2.x, we
+recommend the use of the stand alone <a href="#llvm-upgrade">llvm-upgrade</a>
+tool (which is included with 2.0). We intend to keep compatibility with .ll
+and .bc formats within the 2.x release series, like we did within the 1.x
+series.</li>
+<li>There are several significant change to the LLVM IR and internal APIs, such
+ as a major overhaul of the type system, the completely new bitcode file
+ format, etc (described below).</li>
+<li>We designed the release around a 6 month release cycle instead of the usual
+ 3-month cycle. This gave us extra time to develop and test some of the
+ more invasive features in this release.</li>
+<li>LLVM 2.0 no longer supports the llvm-gcc3 front-end. Users are required to
+ upgrade to llvm-gcc4. llvm-gcc4 includes many features over
+ llvm-gcc3, is faster, and is <a href="CFEBuildInstrs.html">much easier to
+ build from source</a>.</li>
+</ol>
+
+<p>Note that while this is a major version bump, this release has been
+ extensively tested on a wide range of software. It is easy to say that this
+ is our best release yet, in terms of both features and correctness. This is
+ the first LLVM release to correctly compile and optimize major software like
+ LLVM itself, Mozilla/Seamonkey, Qt 4.3rc1, kOffice, etc out of the box on
+ linux/x86.
+ </p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="newfeatures">New Features in LLVM 2.0</a>
+</div>
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection"><a name="majorchanges">Major Changes</a></div>
+<div class="doc_text">
+
+<p>Changes to the LLVM IR itself:</p>
+
+<ul>
+
+<li>Integer types are now completely signless. This means that we
+ have types like i8/i16/i32 instead of ubyte/sbyte/short/ushort/int
+ etc. LLVM operations that depend on sign have been split up into
+ separate instructions (<a href="http://llvm.org/PR950">PR950</a>). This
+ eliminates cast instructions that just change the sign of the operands (e.g.
+ int -> uint), which reduces the size of the IR and makes optimizers
+ simpler to write.</li>
+
+<li>Integer types with arbitrary bitwidths (e.g. i13, i36, i42, i1057, etc) are
+ now supported in the LLVM IR and optimizations (<a
+ href="http://llvm.org/PR1043">PR1043</a>). However, neither llvm-gcc
+ (<a href="http://llvm.org/PR1284">PR1284</a>) nor the native code generators
+ (<a href="http://llvm.org/PR1270">PR1270</a>) support non-standard width
+ integers yet.</li>
+
+<li>'Type planes' have been removed (<a href="http://llvm.org/PR411">PR411</a>).
+ It is no longer possible to have two values with the same name in the
+ same symbol table. This simplifies LLVM internals, allowing significant
+ speedups.</li>
+
+<li>Global variables and functions in .ll files are now prefixed with
+ @ instead of % (<a href="http://llvm.org/PR645">PR645</a>).</li>
+
+<li>The LLVM 1.x "bytecode" format has been replaced with a
+ completely new binary representation, named 'bitcode'. The <a
+ href="BitCodeFormat.html">Bitcode Format</a> brings a
+ number of advantages to the LLVM over the old bytecode format: it is denser
+ (files are smaller), more extensible, requires less memory to read,
+ is easier to keep backwards compatible (so LLVM 2.5 will read 2.0 .bc
+ files), and has many other nice features.</li>
+
+<li>Load and store instructions now track the alignment of their pointer
+ (<a href="http://www.llvm.org/PR400">PR400</a>). This allows the IR to
+ express loads that are not sufficiently aligned (e.g. due to '<tt>#pragma
+ packed</tt>') or to capture extra alignment information.</li>
+</ul>
+
+<p>Major new features:</p>
+
+<ul>
+
+<li>A number of ELF features are now supported by LLVM, including 'visibility',
+ extern weak linkage, Thread Local Storage (TLS) with the <tt>__thread</tt>
+ keyword, and symbol aliases.
+ Among other things, this means that many of the special options needed to
+ configure llvm-gcc on linux are no longer needed, and special hacks to build
+ large C++ libraries like Qt are not needed.</li>
+
+<li>LLVM now has a new MSIL backend. <tt>llc -march=msil</tt> will now turn LLVM
+ into MSIL (".net") bytecode. This is still fairly early development
+ with a number of limitations.</li>
+
+<li>A new <a href="CommandGuide/html/llvm-upgrade.html">llvm-upgrade</a> tool
+ exists to migrates LLVM 1.9 .ll files to LLVM 2.0 syntax.</li>
+</ul>
+
+</div>
+
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection"><a name="llvmgccfeatures">llvm-gcc
+Improvements</a></div>
+<div class="doc_text">
+<p>New features include:
+</p>
+
+<ul>
+<li>Precompiled Headers (PCH) are now supported.</li>
+
+<li>"<tt>#pragma packed</tt>" is now supported, as are the various features
+ described above (visibility, extern weak linkage, __thread, aliases,
+ etc).</li>
+
+<li>Tracking function parameter/result attributes is now possible.</li>
+
+<li>Many internal enhancements have been added, such as improvements to
+ NON_LVALUE_EXPR, arrays with non-zero base, structs with variable sized
+ fields, VIEW_CONVERT_EXPR, CEIL_DIV_EXPR, nested functions, and many other
+ things. This is primarily to supports non-C GCC front-ends, like Ada.</li>
+
+<li>It is simpler to configure llvm-gcc for linux.</li>
+
+</ul>
+
+</div>
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection"><a name="optimizer">Optimizer
+Improvements</a></div>
+
+<div class="doc_text">
+<p>New features include:
+</p>
+
+<ul>
+<li>The <a href="WritingAnLLVMPass.html">pass manager</a> has been entirely
+ rewritten, making it significantly smaller, simpler, and more extensible.
+ Support has been added to run <tt>FunctionPass</tt>es interlaced with
+ <tt>CallGraphSCCPass</tt>es, we now support loop transformations
+ explicitly with <tt>LoopPass</tt>, and <tt>ModulePass</tt>es may now use the
+ result of <tt>FunctionPass</tt>es.</li>
+
+<li>LLVM 2.0 includes a new loop rotation pass, which converts "for loops" into
+ "do/while loops", where the condition is at the bottom of the loop.</li>
+
+<li>The Loop Strength Reduction pass has been improved, and we now support
+ sinking expressions across blocks to reduce register pressure.</li>
+
+<li>The <tt>-scalarrepl</tt> pass can now promote unions containing FP values
+ into a register, it can also handle unions of vectors of the same
+ size.</li>
+
+<li>The [Post]DominatorSet classes have been removed from LLVM and clients
+ switched to use the more-efficient ETForest class instead.</li>
+
+<li>The ImmediateDominator class has also been removed, and clients have been
+ switched to use DominatorTree instead.</li>
+
+<li>The predicate simplifier pass has been improved, making it able to do
+ simple value range propagation and eliminate more conditionals. However,
+ note that predsimplify is not enabled by default in llvm-gcc.</li>
+
+</ul>
+
+</div>
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection"><a name="codegen">Code
+Generator Enhancements</a></div>
+
+<div class="doc_text">
+<p>
+New features include:
+</p>
+
+<ul>
+
+<li>LLVM now supports software floating point, which allows LLVM to target
+ chips that don't have hardware FPUs (e.g. ARM thumb mode).</li>
+
+<li>A new register scavenger has been implemented, which is useful for
+ finding free registers after register allocation. This is useful when
+ rewriting frame references on RISC targets, for example.</li>
+
+<li>Heuristics have been added to avoid coalescing vregs with very large live
+ ranges to physregs. This was bad because it effectively pinned the physical
+ register for the entire lifetime of the virtual register (<a
+ href="http://llvm.org/PR711">PR711</a>).</li>
+
+<li>Support now exists for very simple (but still very useful)
+ rematerialization the register allocator, enough to move
+ instructions like "load immediate" and constant pool loads.</li>
+
+<li>Switch statement lowering is significantly better, improving codegen for
+ sparse switches that have dense subregions, and implemented support
+ for the shift/and trick.</li>
+
+<li>LLVM now supports tracking physreg sub-registers and super-registers
+ in the code generator, and includes extensive register
+ allocator changes to track them.</li>
+
+<li>There is initial support for virtreg sub-registers
+ (<a href="http://llvm.org/PR1350">PR1350</a>).</li>
+
+</ul>
+
+<p>
+Other improvements include:
+</p>
+
+<ul>
+
+<li>Inline assembly support is much more solid that before.
+ The two primary features still missing are support for 80-bit floating point
+ stack registers on X86 (<a href="http://llvm.org/PR879">PR879</a>), and
+ support for inline asm in the C backend (<a
+ href="http://llvm.org/PR802">PR802</a>).</li>
+
+<li>DWARF debug information generation has been improved. LLVM now passes
+ most of the GDB testsuite on MacOS and debug info is more dense.</li>
+
+<li>Codegen support for Zero-cost DWARF exception handling has been added (<a
+ href="http://llvm.org/PR592">PR592</a>). It is mostly
+ complete and just in need of continued bug fixes and optimizations at
+ this point. However, support in llvm-g++ is disabled with an
+ #ifdef for the 2.0 release (<a
+ href="http://llvm.org/PR870">PR870</a>).</li>
+
+<li>The code generator now has more accurate and general hooks for
+ describing addressing modes ("isLegalAddressingMode") to
+ optimizations like loop strength reduction and code sinking.</li>
+
+<li>Progress has been made on a direct Mach-o .o file writer. Many small
+ apps work, but it is still not quite complete.</li>
+
+</ul>
+
+<p>In addition, the LLVM target description format has itself been extended in
+ several ways:</p>
+
+<ul>
+<li>TargetData now supports better target parameterization in
+ the .ll/.bc files, eliminating the 'pointersize/endianness' attributes
+ in the files (<a href="http://llvm.org/PR761">PR761</a>).</li>
+
+<li>TargetData was generalized for finer grained alignment handling,
+ handling of vector alignment, and handling of preferred alignment</li>
+
+<li>LLVM now supports describing target calling conventions
+ explicitly in .td files, reducing the amount of C++ code that needs
+ to be written for a port.</li>
+
+</ul>
+
+</div>
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection"><a name="specifictargets">Target-Specific
+Improvements</a></div>
+
+<div class="doc_text">
+
+<p>X86-specific Code Generator Enhancements:
+</p>
+
+<ul>
+<li>The MMX instruction set is now supported through intrinsics.</li>
+<li>The scheduler was improved to better reduce register pressure on
+ X86 and other targets that are register pressure sensitive.</li>
+<li>Linux/x86-64 support is much better.</li>
+<li>PIC support for linux/x86 has been added.</li>
+<li>The X86 backend now supports the GCC regparm attribute.</li>
+<li>LLVM now supports inline asm with multiple constraint letters per operand
+ (like "mri") which is common in X86 inline asms.</li>
+</ul>
+
+<p>ARM-specific Code Generator Enhancements:</p>
+
+<ul>
+<li>The ARM code generator is now stable and fully supported.</li>
+
+<li>There are major new features, including support for ARM
+ v4-v6 chips, vfp support, soft float point support, pre/postinc support,
+ load/store multiple generation, constant pool entry motion (to support
+ large functions), inline asm support, weak linkage support, static
+ ctor/dtor support and many bug fixes.</li>
+
+<li>Added support for Thumb code generation (<tt>llc -march=thumb</tt>).</li>
+
+<li>The ARM backend now supports the ARM AAPCS/EABI ABI and PIC codegen on
+ arm/linux.</li>
+
+<li>Several bugs were fixed for DWARF debug info generation on arm/linux.</li>
+
+</ul>
+
+<p>PowerPC-specific Code Generator Enhancements:</p>
+
+<ul>
+<li>The PowerPC 64 JIT now supports addressing code loaded above the 2G
+ boundary.</li>
+
+<li>Improved support for the Linux/ppc ABI and the linux/ppc JIT is fully
+ functional now. llvm-gcc and static compilation are not fully supported
+ yet though.</li>
+
+<li>Many PowerPC 64 bug fixes.</li>
+
+</ul>
+
+</div>
+
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection"><a name="other">Other Improvements</a></div>
+<div class="doc_text">
+
+<p>More specific changes include:</p>
+
+<ul>
+<li>LLVM no longer relies on static destructors to shut itself down. Instead,
+ it lazily initializes itself and shuts down when <tt>llvm_shutdown()</tt> is
+ explicitly called.</li>
+
+<li>LLVM now has significantly fewer static constructors, reducing startup time.
+ </li>
+
+<li>Several classes have been refactored to reduce the amount of code that
+ gets linked into apps that use the JIT.</li>
+
+<li>Construction of intrinsic function declarations has been simplified.</li>
+
+<li>The gccas/gccld tools have been replaced with small shell scripts.</li>
+
+<li>Support has been added to llvm-test for running on low-memory
+ or slow machines (make SMALL_PROBLEM_SIZE=1).</li>
+
+</ul>
+</div>
+
+<!--_________________________________________________________________________-->
+<div class="doc_subsubsection"><a name="apichanges">API Changes</a></div>
+<div class="doc_text">
+
+<p>LLVM 2.0 contains a revamp of the type system and several other significant
+internal changes. If you are programming to the C++ API, be aware of the
+following major changes:</p>
+
+<ul>
+<li>Pass registration is slightly different in LLVM 2.0 (you now need an
+ <tt>intptr_t</tt> in your constructor), as explained in the <a
+ href="WritingAnLLVMPass.html#basiccode">Writing an LLVM Pass</a>
+ document.</li>
+
+<li><tt>ConstantBool</tt>, <tt>ConstantIntegral</tt> and <tt>ConstantInt</tt>
+ classes have been merged together, we now just have
+ <tt>ConstantInt</tt>.</li>
+
+<li><tt>Type::IntTy</tt>, <tt>Type::UIntTy</tt>, <tt>Type::SByteTy</tt>, ... are
+ replaced by <tt>Type::Int8Ty</tt>, <tt>Type::Int16Ty</tt>, etc. LLVM types
+ have always corresponded to fixed size types
+ (e.g. long was always 64-bits), but the type system no longer includes
+ information about the sign of the type. Also, the
+ <tt>Type::isPrimitiveType()</tt> method now returns false for integers.</li>
+
+<li>Several classes (<tt>CallInst</tt>, <tt>GetElementPtrInst</tt>,
+ <tt>ConstantArray</tt>, etc), that once took <tt>std::vector</tt> as
+ arguments now take ranges instead. For example, you can create a
+ <tt>GetElementPtrInst</tt> with code like:
+
+ <pre>
+ Value *Ops[] = { Op1, Op2, Op3 };
+ GEP = new GetElementPtrInst(BasePtr, Ops, 3);
+ </pre>
+
+ This avoids creation of a temporary vector (and a call to malloc/free). If
+ you have an <tt>std::vector</tt>, use code like this:
+ <pre>
+ std::vector&lt;Value*&gt; Ops = ...;
+ GEP = new GetElementPtrInst(BasePtr, &amp;Ops[0], Ops.size());
+ </pre>
+
+ </li>
+
+<li><tt>CastInst</tt> is now abstract and its functionality is split into
+ several parts, one for each of the <a href="LangRef.html#convertops">new
+ cast instructions</a>.</li>
+
+<li><tt>Instruction::getNext()/getPrev()</tt> are now private (along with
+ <tt>BasicBlock::getNext</tt>, etc), for efficiency reasons (they are now no
+ longer just simple pointers). Please use <tt>BasicBlock::iterator</tt>, etc
+ instead.
+</li>
+
+<li><tt>Module::getNamedFunction()</tt> is now called
+ <tt>Module::getFunction()</tt>.</li>
+
+<li><tt>SymbolTable.h</tt> has been split into <tt>ValueSymbolTable.h</tt> and
+<tt>TypeSymbolTable.h</tt>.</li>
+</ul>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="portability">Portability and Supported Platforms</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM is known to work on the following platforms:</p>
+
+<ul>
+<li>Intel and AMD machines running Red Hat Linux, Fedora Core and FreeBSD
+ (and probably other unix-like systems).</li>
+<li>PowerPC and X86-based Mac OS X systems, running 10.2 and above in 32-bit and
+ 64-bit modes.</li>
+<li>Intel and AMD machines running on Win32 using MinGW libraries (native)</li>
+<li>Intel and AMD machines running on Win32 with the Cygwin libraries (limited
+ support is available for native builds with Visual C++).</li>
+<li>Sun UltraSPARC workstations running Solaris 8.</li>
+<li>Alpha-based machines running Debian GNU/Linux.</li>
+<li>Itanium-based machines running Linux and HP-UX.</li>
+</ul>
+
+<p>The core LLVM infrastructure uses
+<a href="http://www.gnu.org/software/autoconf/">GNU autoconf</a> to adapt itself
+to the machine and operating system on which it is built. However, minor
+porting may be required to get LLVM to work on new platforms. We welcome your
+portability patches and reports of successful builds or error messages.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="knownproblems">Known Problems</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section contains all known problems with the LLVM system, listed by
+component. As new problems are discovered, they will be added to these
+sections. If you run into a problem, please check the <a
+href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
+there isn't already one.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="experimental">Experimental features included with this release</a>
+</div>
+
+<div class="doc_text">
+
+<p>The following components of this LLVM release are either untested, known to
+be broken or unreliable, or are in early development. These components should
+not be relied on, and bugs should not be filed against them, but they may be
+useful to some people. In particular, if you would like to work on one of these
+components, please contact us on the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
+
+<ul>
+<li>The <tt>-cee</tt> pass is known to be buggy, and may be removed in in a
+ future release.</li>
+<li>C++ EH support is disabled for this release.</li>
+<li>The MSIL backend is experimental.</li>
+<li>The IA64 code generator is experimental.</li>
+<li>The Alpha JIT is experimental.</li>
+<li>"<tt>-filetype=asm</tt>" (the default) is the only supported value for the
+ <tt>-filetype</tt> llc option.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="x86-be">Known problems with the X86 back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>The X86 backend does not yet support <a href="http://llvm.org/PR879">inline
+ assembly that uses the X86 floating point stack</a>.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ppc-be">Known problems with the PowerPC back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li><a href="http://llvm.org/PR642">PowerPC backend does not correctly
+implement ordered FP comparisons</a>.</li>
+<li>The Linux PPC32/ABI support needs testing for the interpreter and static
+compilation, and lacks support for debug information.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="arm-be">Known problems with the ARM back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>Thumb mode works only on ARMv6 or higher processors. On sub-ARMv6
+processors, thumb program can crash or produces wrong
+results (<a href="http://llvm.org/PR1388">PR1388</a>).</li>
+<li>Compilation for ARM Linux OABI (old ABI) is supported, but not fully tested.
+</li>
+<li>There is a bug in QEMU-ARM (<= 0.9.0) which causes it to incorrectly execute
+programs compiled with LLVM. Please use more recent versions of QEMU.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="sparc-be">Known problems with the SPARC back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>The SPARC backend only supports the 32-bit SPARC ABI (-m32), it does not
+ support the 64-bit SPARC ABI (-m64).</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="alpha-be">Known problems with the Alpha back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+
+<li>On 21164s, some rare FP arithmetic sequences which may trap do not have the
+appropriate nops inserted to ensure restartability.</li>
+
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ia64-be">Known problems with the IA64 back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+
+<li>C++ programs are likely to fail on IA64, as calls to <tt>setjmp</tt> are
+made where the argument is not 16-byte aligned, as required on IA64. (Strictly
+speaking this is not a bug in the IA64 back-end; it will also be encountered
+when building C++ programs using the C back-end.)</li>
+
+<li>The C++ front-end does not use <a href="http://llvm.org/PR406">IA64
+ABI compliant layout of v-tables</a>. In particular, it just stores function
+pointers instead of function descriptors in the vtable. This bug prevents
+mixing C++ code compiled with LLVM with C++ objects compiled by other C++
+compilers.</li>
+
+<li>There are a few ABI violations which will lead to problems when mixing LLVM
+output with code built with other compilers, particularly for floating-point
+programs.</li>
+
+<li>Defining vararg functions is not supported (but calling them is ok).</li>
+
+<li>The Itanium backend has bitrotted somewhat.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="c-be">Known problems with the C back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li><a href="http://llvm.org/PR802">The C backend does not support inline
+ assembly code</a>.</li>
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="c-fe">Known problems with the C front-end</a>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Bugs</div>
+
+<div class="doc_text">
+
+<p>llvm-gcc4 does not currently support <a href="http://llvm.org/PR869">Link-Time
+Optimization</a> on most platforms "out-of-the-box". Please inquire on the
+llvmdev mailing list if you are interested.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ Notes
+</div>
+
+<div class="doc_text">
+<ul>
+
+<li><p>"long double" is silently transformed by the front-end into "double". There
+is no support for floating point data types of any size other than 32 and 64
+bits.</p></li>
+
+<li><p>llvm-gcc does <b>not</b> support <tt>__builtin_apply</tt> yet.
+ See <a href="http://gcc.gnu.org/onlinedocs/gcc/Constructing-Calls.html#Constructing%20Calls">Constructing Calls</a>: Dispatching a call to another function.</p>
+</li>
+
+<li><p>llvm-gcc <b>partially</b> supports these GCC extensions:</p>
+ <ol>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Nested-Functions.html#Nested%20Functions">Nested Functions</a>: As in Algol and Pascal, lexical scoping of functions.<br>
+ Nested functions are supported, but llvm-gcc does not support non-local
+ gotos or taking the address of a nested function.</li>
+
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html#Function%20Attributes">Function Attributes</a>:
+
+ Declaring that functions have no side effects or that they can never
+ return.<br>
+
+ <b>Supported:</b> <tt>alias</tt>, <tt>always_inline</tt>, <tt>cdecl</tt>,
+ <tt>constructor</tt>, <tt>destructor</tt>,
+ <tt>deprecated</tt>, <tt>fastcall</tt>, <tt>format</tt>,
+ <tt>format_arg</tt>, <tt>non_null</tt>, <tt>noreturn</tt>, <tt>regparm</tt>
+ <tt>section</tt>, <tt>stdcall</tt>, <tt>unused</tt>, <tt>used</tt>,
+ <tt>visibility</tt>, <tt>warn_unused_result</tt>, <tt>weak</tt><br>
+
+ <b>Ignored:</b> <tt>noinline</tt>, <tt>pure</tt>, <tt>const</tt>, <tt>nothrow</tt>,
+ <tt>malloc</tt>, <tt>no_instrument_function</tt></li>
+ </ol>
+</li>
+
+<li><p>llvm-gcc supports the vast majority of GCC extensions, including:</p>
+
+ <ol>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Pragmas.html#Pragmas">Pragmas</a>: Pragmas accepted by GCC.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Local-Labels.html#Local%20Labels">Local Labels</a>: Labels local to a block.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html#Other%20Builtins">Other Builtins</a>:
+ Other built-in functions.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Variable-Attributes.html#Variable%20Attributes">Variable Attributes</a>:
+ Specifying attributes of variables.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html#Type%20Attributes">Type Attributes</a>: Specifying attributes of types.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Thread_002dLocal.html">Thread-Local</a>: Per-thread variables.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Variable-Length.html#Variable%20Length">Variable Length</a>:
+ Arrays whose length is computed at run time.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html#Labels%20as%20Values">Labels as Values</a>: Getting pointers to labels and computed gotos.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Statement-Exprs.html#Statement%20Exprs">Statement Exprs</a>: Putting statements and declarations inside expressions.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Typeof.html#Typeof">Typeof</a>: <code>typeof</code>: referring to the type of an expression.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc-3.4.0/gcc/Lvalues.html#Lvalues">Lvalues</a>: Using <code>?:</code>, "<code>,</code>" and casts in lvalues.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Conditionals.html#Conditionals">Conditionals</a>: Omitting the middle operand of a <code>?:</code> expression.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Long-Long.html#Long%20Long">Long Long</a>: Double-word integers.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Complex.html#Complex">Complex</a>: Data types for complex numbers.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Hex-Floats.html#Hex%20Floats">Hex Floats</a>:Hexadecimal floating-point constants.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html#Zero%20Length">Zero Length</a>: Zero-length arrays.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Empty-Structures.html#Empty%20Structures">Empty Structures</a>: Structures with no members.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Variadic-Macros.html#Variadic%20Macros">Variadic Macros</a>: Macros with a variable number of arguments.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Escaped-Newlines.html#Escaped%20Newlines">Escaped Newlines</a>: Slightly looser rules for escaped newlines.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Extended%20Asm">Extended Asm</a>: Assembler instructions with C expressions as operands.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Constraints.html#Constraints">Constraints</a>: Constraints for asm operands.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Asm-Labels.html#Asm%20Labels">Asm Labels</a>: Specifying the assembler name to use for a C symbol.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Explicit-Reg-Vars.html#Explicit%20Reg%20Vars">Explicit Reg Vars</a>: Defining variables residing in specified registers.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html#Vector%20Extensions">Vector Extensions</a>: Using vector instructions through built-in functions.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Target-Builtins.html#Target%20Builtins">Target Builtins</a>: Built-in functions specific to particular targets.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Subscripting.html#Subscripting">Subscripting</a>: Any array can be subscripted, even if not an lvalue.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Pointer-Arith.html#Pointer%20Arith">Pointer Arith</a>: Arithmetic on <code>void</code>-pointers and function pointers.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Initializers.html#Initializers">Initializers</a>: Non-constant initializers.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Compound-Literals.html#Compound%20Literals">Compound Literals</a>: Compound literals give structures, unions,
+or arrays as values.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html#Designated%20Inits">Designated Inits</a>: Labeling elements of initializers.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Cast-to-Union.html#Cast%20to%20Union">Cast to Union</a>: Casting to union type from any member of the union.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Case-Ranges.html#Case%20Ranges">Case Ranges</a>: `case 1 ... 9' and such.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Mixed-Declarations.html#Mixed%20Declarations">Mixed Declarations</a>: Mixing declarations and code.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Function-Prototypes.html#Function%20Prototypes">Function Prototypes</a>: Prototype declarations and old-style definitions.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Comments.html#C_002b_002b-Comments">C++ Comments</a>: C++ comments are recognized.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Dollar-Signs.html#Dollar%20Signs">Dollar Signs</a>: Dollar sign is allowed in identifiers.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Character-Escapes.html#Character%20Escapes">Character Escapes</a>: <code>\e</code> stands for the character &lt;ESC&gt;.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Alignment.html#Alignment">Alignment</a>: Inquiring about the alignment of a type or variable.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Inline.html#Inline">Inline</a>: Defining inline functions (as fast as macros).</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Alternate-Keywords.html#Alternate%20Keywords">Alternate Keywords</a>:<code>__const__</code>, <code>__asm__</code>, etc., for header files.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html#Incomplete%20Enums">Incomplete Enums</a>: <code>enum foo;</code>, with details to follow.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Function-Names.html#Function%20Names">Function Names</a>: Printable strings which are the name of the current function.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Return-Address.html#Return%20Address">Return Address</a>: Getting the return or frame address of a function.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Unnamed-Fields.html#Unnamed%20Fields">Unnamed Fields</a>: Unnamed struct/union fields within structs/unions.</li>
+ <li><a href="http://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html#Attribute%20Syntax">Attribute Syntax</a>: Formal syntax for attributes.</li>
+ </ol></li>
+
+</ul>
+
+<p>If you run into GCC extensions which have not been included in any of these
+lists, please let us know (also including whether or not they work).</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="c++-fe">Known problems with the C++ front-end</a>
+</div>
+
+<div class="doc_text">
+
+<p>The C++ front-end is considered to be fully
+tested and works for a number of non-trivial programs, including LLVM
+itself, Qt, Mozilla, etc.</p>
+
+<ul>
+<li>llvm-gcc4 only has partial support for <a href="http://llvm.org/PR870">C++
+Exception Handling</a>, and it is not enabled by default.</li>
+
+<!-- NO EH Support!
+
+<li>Destructors for local objects are not always run when a <tt>longjmp</tt> is
+ performed. In particular, destructors for objects in the <tt>longjmp</tt>ing
+ function and in the <tt>setjmp</tt> receiver function may not be run.
+ Objects in intervening stack frames will be destroyed, however (which is
+ better than most compilers).</li>
+
+<li>The LLVM C++ front-end follows the <a
+ href="http://www.codesourcery.com/cxx-abi">Itanium C++ ABI</a>.
+ This document, which is not Itanium specific, specifies a standard for name
+ mangling, class layout, v-table layout, RTTI formats, and other C++
+ representation issues. Because we use this API, code generated by the LLVM
+ compilers should be binary compatible with machine code generated by other
+ Itanium ABI C++ compilers (such as G++, the Intel and HP compilers, etc).
+ <i>However</i>, the exception handling mechanism used by llvm-gcc3 is very
+ different from the model used in the Itanium ABI, so <b>exceptions will not
+ interact correctly</b>. </li>
+-->
+</ul>
+
+</div>
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="additionalinfo">Additional Information</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>A wide variety of additional information is available on the <a
+href="http://llvm.org">LLVM web page</a>, in particular in the <a
+href="http://llvm.org/docs/">documentation</a> section. The web page also
+contains versions of the API documentation which is up-to-date with the
+Subversion version of the source code.
+You can access versions of these documents specific to this release by going
+into the "<tt>llvm/doc/</tt>" directory in the LLVM tree.</p>
+
+<p>If you have any questions or comments about LLVM, please feel free to contact
+us via the <a href="http://llvm.org/docs/#maillist"> mailing
+lists</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
new file mode 100644
index 0000000..73a45cb
--- /dev/null
+++ b/docs/SourceLevelDebugging.html
@@ -0,0 +1,1782 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>Source Level Debugging with LLVM</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Source Level Debugging with LLVM</div>
+
+<table class="layout" style="width:100%">
+ <tr class="layout">
+ <td class="left">
+<ul>
+ <li><a href="#introduction">Introduction</a>
+ <ol>
+ <li><a href="#phil">Philosophy behind LLVM debugging information</a></li>
+ <li><a href="#consumers">Debug information consumers</a></li>
+ <li><a href="#debugopt">Debugging optimized code</a></li>
+ </ol></li>
+ <li><a href="#format">Debugging information format</a>
+ <ol>
+ <li><a href="#debug_info_descriptors">Debug information descriptors</a>
+ <ul>
+ <li><a href="#format_anchors">Anchor descriptors</a></li>
+ <li><a href="#format_compile_units">Compile unit descriptors</a></li>
+ <li><a href="#format_global_variables">Global variable descriptors</a></li>
+ <li><a href="#format_subprograms">Subprogram descriptors</a></li>
+ <li><a href="#format_blocks">Block descriptors</a></li>
+ <li><a href="#format_basic_type">Basic type descriptors</a></li>
+ <li><a href="#format_derived_type">Derived type descriptors</a></li>
+ <li><a href="#format_composite_type">Composite type descriptors</a></li>
+ <li><a href="#format_subrange">Subrange descriptors</a></li>
+ <li><a href="#format_enumeration">Enumerator descriptors</a></li>
+ <li><a href="#format_variables">Local variables</a></li>
+ </ul></li>
+ <li><a href="#format_common_intrinsics">Debugger intrinsic functions</a>
+ <ul>
+ <li><a href="#format_common_stoppoint">llvm.dbg.stoppoint</a></li>
+ <li><a href="#format_common_func_start">llvm.dbg.func.start</a></li>
+ <li><a href="#format_common_region_start">llvm.dbg.region.start</a></li>
+ <li><a href="#format_common_region_end">llvm.dbg.region.end</a></li>
+ <li><a href="#format_common_declare">llvm.dbg.declare</a></li>
+ </ul></li>
+ <li><a href="#format_common_stoppoints">Representing stopping points in the
+ source program</a></li>
+ </ol></li>
+ <li><a href="#ccxx_frontend">C/C++ front-end specific debug information</a>
+ <ol>
+ <li><a href="#ccxx_compile_units">C/C++ source file information</a></li>
+ <li><a href="#ccxx_global_variable">C/C++ global variable information</a></li>
+ <li><a href="#ccxx_subprogram">C/C++ function information</a></li>
+ <li><a href="#ccxx_basic_types">C/C++ basic types</a></li>
+ <li><a href="#ccxx_derived_types">C/C++ derived types</a></li>
+ <li><a href="#ccxx_composite_types">C/C++ struct/union types</a></li>
+ <li><a href="#ccxx_enumeration_types">C/C++ enumeration types</a></li>
+ </ol></li>
+</ul>
+</td>
+<td class="right">
+<img src="img/venusflytrap.jpg" alt="A leafy and green bug eater" width="247"
+height="369">
+</td>
+</tr></table>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+ and <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is the central repository for all information pertaining to
+debug information in LLVM. It describes the <a href="#format">actual format
+that the LLVM debug information</a> takes, which is useful for those interested
+in creating front-ends or dealing directly with the information. Further, this
+document provides specifc examples of what debug information for C/C++.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="phil">Philosophy behind LLVM debugging information</a>
+</div>
+
+<div class="doc_text">
+
+<p>The idea of the LLVM debugging information is to capture how the important
+pieces of the source-language's Abstract Syntax Tree map onto LLVM code.
+Several design aspects have shaped the solution that appears here. The
+important ones are:</p>
+
+<ul>
+<li>Debugging information should have very little impact on the rest of the
+compiler. No transformations, analyses, or code generators should need to be
+modified because of debugging information.</li>
+
+<li>LLVM optimizations should interact in <a href="#debugopt">well-defined and
+easily described ways</a> with the debugging information.</li>
+
+<li>Because LLVM is designed to support arbitrary programming languages,
+LLVM-to-LLVM tools should not need to know anything about the semantics of the
+source-level-language.</li>
+
+<li>Source-level languages are often <b>widely</b> different from one another.
+LLVM should not put any restrictions of the flavor of the source-language, and
+the debugging information should work with any language.</li>
+
+<li>With code generator support, it should be possible to use an LLVM compiler
+to compile a program to native machine code and standard debugging formats.
+This allows compatibility with traditional machine-code level debuggers, like
+GDB or DBX.</li>
+
+</ul>
+
+<p>The approach used by the LLVM implementation is to use a small set of <a
+href="#format_common_intrinsics">intrinsic functions</a> to define a mapping
+between LLVM program objects and the source-level objects. The description of
+the source-level program is maintained in LLVM global variables in an <a
+href="#ccxx_frontend">implementation-defined format</a> (the C/C++ front-end
+currently uses working draft 7 of the <a
+href="http://www.eagercon.com/dwarf/dwarf3std.htm">Dwarf 3 standard</a>).</p>
+
+<p>When a program is being debugged, a debugger interacts with the user and
+turns the stored debug information into source-language specific information.
+As such, a debugger must be aware of the source-language, and is thus tied to
+a specific language of family of languages.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="consumers">Debug information consumers</a>
+</div>
+
+<div class="doc_text">
+<p>The role of debug information is to provide meta information normally
+stripped away during the compilation process. This meta information provides an
+llvm user a relationship between generated code and the original program source
+code.</p>
+
+<p>Currently, debug information is consumed by the DwarfWriter to produce dwarf
+information used by the gdb debugger. Other targets could use the same
+information to produce stabs or other debug forms.</p>
+
+<p>It would also be reasonable to use debug information to feed profiling tools
+for analysis of generated code, or, tools for reconstructing the original source
+from generated code.</p>
+
+<p>TODO - expound a bit more.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="debugopt">Debugging optimized code</a>
+</div>
+
+<div class="doc_text">
+
+<p>An extremely high priority of LLVM debugging information is to make it
+interact well with optimizations and analysis. In particular, the LLVM debug
+information provides the following guarantees:</p>
+
+<ul>
+
+<li>LLVM debug information <b>always provides information to accurately read the
+source-level state of the program</b>, regardless of which LLVM optimizations
+have been run, and without any modification to the optimizations themselves.
+However, some optimizations may impact the ability to modify the current state
+of the program with a debugger, such as setting program variables, or calling
+function that have been deleted.</li>
+
+<li>LLVM optimizations gracefully interact with debugging information. If they
+are not aware of debug information, they are automatically disabled as necessary
+in the cases that would invalidate the debug info. This retains the LLVM
+features making it easy to write new transformations.</li>
+
+<li>As desired, LLVM optimizations can be upgraded to be aware of the LLVM
+debugging information, allowing them to update the debugging information as they
+perform aggressive optimizations. This means that, with effort, the LLVM
+optimizers could optimize debug code just as well as non-debug code.</li>
+
+<li>LLVM debug information does not prevent many important optimizations from
+happening (for example inlining, basic block reordering/merging/cleanup, tail
+duplication, etc), further reducing the amount of the compiler that eventually
+is "aware" of debugging information.</li>
+
+<li>LLVM debug information is automatically optimized along with the rest of the
+program, using existing facilities. For example, duplicate information is
+automatically merged by the linker, and unused information is automatically
+removed.</li>
+
+</ul>
+
+<p>Basically, the debug information allows you to compile a program with
+"<tt>-O0 -g</tt>" and get full debug information, allowing you to arbitrarily
+modify the program as it executes from a debugger. Compiling a program with
+"<tt>-O3 -g</tt>" gives you full debug information that is always available and
+accurate for reading (e.g., you get accurate stack traces despite tail call
+elimination and inlining), but you might lose the ability to modify the program
+and call functions where were optimized out of the program, or inlined away
+completely.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="format">Debugging information format</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM debugging information has been carefully designed to make it possible
+for the optimizer to optimize the program and debugging information without
+necessarily having to know anything about debugging information. In particular,
+the global constant merging pass automatically eliminates duplicated debugging
+information (often caused by header files), the global dead code elimination
+pass automatically deletes debugging information for a function if it decides to
+delete the function, and the linker eliminates debug information when it merges
+<tt>linkonce</tt> functions.</p>
+
+<p>To do this, most of the debugging information (descriptors for types,
+variables, functions, source files, etc) is inserted by the language front-end
+in the form of LLVM global variables. These LLVM global variables are no
+different from any other global variables, except that they have a web of LLVM
+intrinsic functions that point to them. If the last references to a particular
+piece of debugging information are deleted (for example, by the
+<tt>-globaldce</tt> pass), the extraneous debug information will automatically
+become dead and be removed by the optimizer.</p>
+
+<p>Debug information is designed to be agnostic about the target debugger and
+debugging information representation (e.g. DWARF/Stabs/etc). It uses a generic
+machine debug information pass to decode the information that represents
+variables, types, functions, namespaces, etc: this allows for arbitrary
+source-language semantics and type-systems to be used, as long as there is a
+module written for the target debugger to interpret the information. In
+addition, debug global variables are declared in the <tt>"llvm.metadata"</tt>
+section. All values declared in this section are stripped away after target
+debug information is constructed and before the program object is emitted.</p>
+
+<p>To provide basic functionality, the LLVM debugger does have to make some
+assumptions about the source-level language being debugged, though it keeps
+these to a minimum. The only common features that the LLVM debugger assumes
+exist are <a href="#format_compile_units">source files</a>, and <a
+href="#format_global_variables">program objects</a>. These abstract objects are
+used by a debugger to form stack traces, show information about local
+variables, etc.</p>
+
+<p>This section of the documentation first describes the representation aspects
+common to any source-language. The <a href="#ccxx_frontend">next section</a>
+describes the data layout conventions used by the C and C++ front-ends.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="debug_info_descriptors">Debug information descriptors</a>
+</div>
+
+<div class="doc_text">
+<p>In consideration of the complexity and volume of debug information, LLVM
+provides a specification for well formed debug global variables. The constant
+value of each of these globals is one of a limited set of structures, known as
+debug descriptors.</p>
+
+<p>Consumers of LLVM debug information expect the descriptors for program
+objects to start in a canonical format, but the descriptors can include
+additional information appended at the end that is source-language specific. All
+LLVM debugging information is versioned, allowing backwards compatibility in the
+case that the core structures need to change in some way. Also, all debugging
+information objects start with a tag to indicate what type of object it is. The
+source-language is allowed to define its own objects, by using unreserved tag
+numbers. We recommend using with tags in the range 0x1000 thru 0x2000 (there is
+a defined enum DW_TAG_user_base = 0x1000.)</p>
+
+<p>The fields of debug descriptors used internally by LLVM (MachineModuleInfo)
+are restricted to only the simple data types <tt>int</tt>, <tt>uint</tt>,
+<tt>bool</tt>, <tt>float</tt>, <tt>double</tt>, <tt>sbyte*</tt> and <tt> { }*
+</tt>. References to arbitrary values are handled using a <tt> { }* </tt> and a
+cast to <tt> { }* </tt> expression; typically references to other field
+descriptors, arrays of descriptors or global variables.</p>
+
+<pre>
+ %llvm.dbg.object.type = type {
+ uint, ;; A tag
+ ...
+ }
+</pre>
+
+<p><a name="LLVMDebugVersion">The first field of a descriptor is always an
+<tt>uint</tt> containing a tag value identifying the content of the descriptor.
+The remaining fields are specific to the descriptor. The values of tags are
+loosely bound to the tag values of Dwarf information entries. However, that
+does not restrict the use of the information supplied to Dwarf targets. To
+facilitate versioning of debug information, the tag is augmented with the
+current debug version (LLVMDebugVersion = 4 << 16 or 0x40000 or 262144.)</a></p>
+
+<p>The details of the various descriptors follow.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_anchors">Anchor descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+ %<a href="#format_anchors">llvm.dbg.anchor.type</a> = type {
+ uint, ;; Tag = 0 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
+ uint ;; Tag of descriptors grouped by the anchor
+ }
+</pre>
+
+<p>One important aspect of the LLVM debug representation is that it allows the
+LLVM debugger to efficiently index all of the global objects without having the
+scan the program. To do this, all of the global objects use "anchor"
+descriptors with designated names. All of the global objects of a particular
+type (e.g., compile units) contain a pointer to the anchor. This pointer allows
+a debugger to use def-use chains to find all global objects of that type.</p>
+
+<p>The following names are recognized as anchors by LLVM:</p>
+
+<pre>
+ %<a href="#format_compile_units">llvm.dbg.compile_units</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> { uint 0, uint 17 } ;; DW_TAG_compile_unit
+ %<a href="#format_global_variables">llvm.dbg.global_variables</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> { uint 0, uint 52 } ;; DW_TAG_variable
+ %<a href="#format_subprograms">llvm.dbg.subprograms</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> { uint 0, uint 46 } ;; DW_TAG_subprogram
+</pre>
+
+<p>Using anchors in this way (where the compile unit descriptor points to the
+anchors, as opposed to having a list of compile unit descriptors) allows for the
+standard dead global elimination and merging passes to automatically remove
+unused debugging information. If the globals were kept track of through lists,
+there would always be an object pointing to the descriptors, thus would never be
+deleted.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_compile_units">Compile unit descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+ %<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> = type {
+ uint, ;; Tag = 17 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_compile_unit)
+ { }*, ;; Compile unit anchor = cast = (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_units</a> to { }*)
+ uint, ;; Dwarf language identifier (ex. DW_LANG_C89)
+ sbyte*, ;; Source file name
+ sbyte*, ;; Source file directory (includes trailing slash)
+ sbyte* ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
+ }
+</pre>
+
+<p>These descriptors contain a source language ID for the file (we use the Dwarf
+3.0 ID numbers, such as <tt>DW_LANG_C89</tt>, <tt>DW_LANG_C_plus_plus</tt>,
+<tt>DW_LANG_Cobol74</tt>, etc), three strings describing the filename, working
+directory of the compiler, and an identifier string for the compiler that
+produced it.</p>
+
+<p> Compile unit descriptors provide the root context for objects declared in a
+specific source file. Global variables and top level functions would be defined
+using this context. Compile unit descriptors also provide context for source
+line correspondence.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_global_variables">Global variable descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+ %<a href="#format_global_variables">llvm.dbg.global_variable.type</a> = type {
+ uint, ;; Tag = 52 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_variable)
+ { }*, ;; Global variable anchor = cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_global_variables">llvm.dbg.global_variables</a> to { }*),
+ { }*, ;; Reference to context descriptor
+ sbyte*, ;; Name
+ sbyte*, ;; Display name (fully qualified C++ name)
+ sbyte*, ;; MIPS linkage name (for C++)
+ { }*, ;; Reference to compile unit where defined
+ uint, ;; Line number where defined
+ { }*, ;; Reference to type descriptor
+ bool, ;; True if the global is local to compile unit (static)
+ bool, ;; True if the global is defined in the compile unit (not extern)
+ { }* ;; Reference to the global variable
+ }
+</pre>
+
+<p>These descriptors provide debug information about globals variables. The
+provide details such as name, type and where the variable is defined.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_subprograms">Subprogram descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+ %<a href="#format_subprograms">llvm.dbg.subprogram.type</a> = type {
+ uint, ;; Tag = 46 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_subprogram)
+ { }*, ;; Subprogram anchor = cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_subprograms">llvm.dbg.subprograms</a> to { }*),
+ { }*, ;; Reference to context descriptor
+ sbyte*, ;; Name
+ sbyte*, ;; Display name (fully qualified C++ name)
+ sbyte*, ;; MIPS linkage name (for C++)
+ { }*, ;; Reference to compile unit where defined
+ uint, ;; Line number where defined
+ { }*, ;; Reference to type descriptor
+ bool, ;; True if the global is local to compile unit (static)
+ bool ;; True if the global is defined in the compile unit (not extern)
+ }
+</pre>
+
+<p>These descriptors provide debug information about functions, methods and
+subprograms. They provide details such as name, return types and the source
+location where the subprogram is defined.</p>
+
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_blocks">Block descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+ %<a href="#format_blocks">llvm.dbg.block</a> = type {
+ uint, ;; Tag = 13 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
+ { }* ;; Reference to context descriptor
+ }
+</pre>
+
+<p>These descriptors provide debug information about nested blocks within a
+subprogram. The array of member descriptors is used to define local variables
+and deeper nested blocks.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_basic_type">Basic type descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+ %<a href="#format_basic_type">llvm.dbg.basictype.type</a> = type {
+ uint, ;; Tag = 36 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_base_type)
+ { }*, ;; Reference to context (typically a compile unit)
+ sbyte*, ;; Name (may be "" for anonymous types)
+ { }*, ;; Reference to compile unit where defined (may be NULL)
+ uint, ;; Line number where defined (may be 0)
+ uint, ;; Size in bits
+ uint, ;; Alignment in bits
+ uint, ;; Offset in bits
+ uint ;; Dwarf type encoding
+ }
+</pre>
+
+<p>These descriptors define primitive types used in the code. Example int, bool
+and float. The context provides the scope of the type, which is usually the top
+level. Since basic types are not usually user defined the compile unit and line
+number can be left as NULL and 0. The size, alignment and offset are expressed
+in bits and can be 64 bit values. The alignment is used to round the offset
+when embedded in a <a href="#format_composite_type">composite type</a>
+(example to keep float doubles on 64 bit boundaries.) The offset is the bit
+offset if embedded in a <a href="#format_composite_type">composite
+type</a>.</p>
+
+<p>The type encoding provides the details of the type. The values are typically
+one of the following;</p>
+
+<pre>
+ DW_ATE_address = 1
+ DW_ATE_boolean = 2
+ DW_ATE_float = 4
+ DW_ATE_signed = 5
+ DW_ATE_signed_char = 6
+ DW_ATE_unsigned = 7
+ DW_ATE_unsigned_char = 8
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_derived_type">Derived type descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+ %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> = type {
+ uint, ;; Tag (see below)
+ { }*, ;; Reference to context
+ sbyte*, ;; Name (may be "" for anonymous types)
+ { }*, ;; Reference to compile unit where defined (may be NULL)
+ uint, ;; Line number where defined (may be 0)
+ uint, ;; Size in bits
+ uint, ;; Alignment in bits
+ uint, ;; Offset in bits
+ { }* ;; Reference to type derived from
+ }
+</pre>
+
+<p>These descriptors are used to define types derived from other types. The
+value of the tag varies depending on the meaning. The following are possible
+tag values;</p>
+
+<pre>
+ DW_TAG_formal_parameter = 5
+ DW_TAG_member = 13
+ DW_TAG_pointer_type = 15
+ DW_TAG_reference_type = 16
+ DW_TAG_typedef = 22
+ DW_TAG_const_type = 38
+ DW_TAG_volatile_type = 53
+ DW_TAG_restrict_type = 55
+</pre>
+
+<p> <tt>DW_TAG_member</tt> is used to define a member of a <a
+href="#format_composite_type">composite type</a> or <a
+href="#format_subprograms">subprogram</a>. The type of the member is the <a
+href="#format_derived_type">derived type</a>. <tt>DW_TAG_formal_parameter</tt>
+is used to define a member which is a formal argument of a subprogram.</p>
+
+<p><tt>DW_TAG_typedef</tt> is used to
+provide a name for the derived type.</p>
+
+<p><tt>DW_TAG_pointer_type</tt>,
+<tt>DW_TAG_reference_type</tt>, <tt>DW_TAG_const_type</tt>,
+<tt>DW_TAG_volatile_type</tt> and <tt>DW_TAG_restrict_type</tt> are used to
+qualify the <a href="#format_derived_type">derived type</a>. </p>
+
+<p><a href="#format_derived_type">Derived type</a> location can be determined
+from the compile unit and line number. The size, alignment and offset are
+expressed in bits and can be 64 bit values. The alignment is used to round the
+offset when embedded in a <a href="#format_composite_type">composite type</a>
+(example to keep float doubles on 64 bit boundaries.) The offset is the bit
+offset if embedded in a <a href="#format_composite_type">composite
+type</a>.</p>
+
+<p>Note that the <tt>void *</tt> type is expressed as a
+<tt>llvm.dbg.derivedtype.type</tt> with tag of <tt>DW_TAG_pointer_type</tt> and
+NULL derived type.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_composite_type">Composite type descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+ %<a href="#format_composite_type">llvm.dbg.compositetype.type</a> = type {
+ uint, ;; Tag (see below)
+ { }*, ;; Reference to context
+ sbyte*, ;; Name (may be "" for anonymous types)
+ { }*, ;; Reference to compile unit where defined (may be NULL)
+ uint, ;; Line number where defined (may be 0)
+ uint, ;; Size in bits
+ uint, ;; Alignment in bits
+ uint, ;; Offset in bits
+ { }* ;; Reference to array of member descriptors
+ }
+</pre>
+
+<p>These descriptors are used to define types that are composed of 0 or more
+elements. The value of the tag varies depending on the meaning. The following
+are possible tag values;</p>
+
+<pre>
+ DW_TAG_array_type = 1
+ DW_TAG_enumeration_type = 4
+ DW_TAG_structure_type = 19
+ DW_TAG_union_type = 23
+ DW_TAG_vector_type = 259
+ DW_TAG_subroutine_type = 46
+ DW_TAG_inheritance = 26
+</pre>
+
+<p>The vector flag indicates that an array type is a native packed vector.</p>
+
+<p>The members of array types (tag = <tt>DW_TAG_array_type</tt>) or vector types
+(tag = <tt>DW_TAG_vector_type</tt>) are <a href="#format_subrange">subrange
+descriptors</a>, each representing the range of subscripts at that level of
+indexing.</p>
+
+<p>The members of enumeration types (tag = <tt>DW_TAG_enumeration_type</tt>) are
+<a href="#format_enumeration">enumerator descriptors</a>, each representing the
+definition of enumeration value
+for the set.</p>
+
+<p>The members of structure (tag = <tt>DW_TAG_structure_type</tt>) or union (tag
+= <tt>DW_TAG_union_type</tt>) types are any one of the <a
+href="#format_basic_type">basic</a>, <a href="#format_derived_type">derived</a>
+or <a href="#format_composite_type">composite</a> type descriptors, each
+representing a field member of the structure or union.</p>
+
+<p>For C++ classes (tag = <tt>DW_TAG_structure_type</tt>), member descriptors
+provide information about base classes, static members and member functions. If
+a member is a <a href="#format_derived_type">derived type descriptor</a> and has
+a tag of <tt>DW_TAG_inheritance</tt>, then the type represents a base class. If
+the member of is a <a href="#format_global_variables">global variable
+descriptor</a> then it represents a static member. And, if the member is a <a
+href="#format_subprograms">subprogram descriptor</a> then it represents a member
+function. For static members and member functions, <tt>getName()</tt> returns
+the members link or the C++ mangled name. <tt>getDisplayName()</tt> the
+simplied version of the name.</p>
+
+<p>The first member of subroutine (tag = <tt>DW_TAG_subroutine_type</tt>)
+type elements is the return type for the subroutine. The remaining
+elements are the formal arguments to the subroutine.</p>
+
+<p><a href="#format_composite_type">Composite type</a> location can be
+determined from the compile unit and line number. The size, alignment and
+offset are expressed in bits and can be 64 bit values. The alignment is used to
+round the offset when embedded in a <a href="#format_composite_type">composite
+type</a> (as an example, to keep float doubles on 64 bit boundaries.) The offset
+is the bit offset if embedded in a <a href="#format_composite_type">composite
+type</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_subrange">Subrange descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+ %<a href="#format_subrange">llvm.dbg.subrange.type</a> = type {
+ uint, ;; Tag = 33 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_subrange_type)
+ uint, ;; Low value
+ uint ;; High value
+ }
+</pre>
+
+<p>These descriptors are used to define ranges of array subscripts for an array
+<a href="#format_composite_type">composite type</a>. The low value defines the
+lower bounds typically zero for C/C++. The high value is the upper bounds.
+Values are 64 bit. High - low + 1 is the size of the array. If
+low == high the array will be unbounded.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_enumeration">Enumerator descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+ %<a href="#format_enumeration">llvm.dbg.enumerator.type</a> = type {
+ uint, ;; Tag = 40 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_enumerator)
+ sbyte*, ;; Name
+ uint ;; Value
+ }
+</pre>
+
+<p>These descriptors are used to define members of an enumeration <a
+href="#format_composite_type">composite type</a>, it associates the name to the
+value.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_variables">Local variables</a>
+</div>
+
+<div class="doc_text">
+<pre>
+ %<a href="#format_variables">llvm.dbg.variable.type</a> = type {
+ uint, ;; Tag (see below)
+ { }*, ;; Context
+ sbyte*, ;; Name
+ { }*, ;; Reference to compile unit where defined
+ uint, ;; Line number where defined
+ { }* ;; Type descriptor
+ }
+</pre>
+
+<p>These descriptors are used to define variables local to a sub program. The
+value of the tag depends on the usage of the variable;</p>
+
+<pre>
+ DW_TAG_auto_variable = 256
+ DW_TAG_arg_variable = 257
+ DW_TAG_return_variable = 258
+</pre>
+
+<p>An auto variable is any variable declared in the body of the function. An
+argument variable is any variable that appears as a formal argument to the
+function. A return variable is used to track the result of a function and has
+no source correspondent.</p>
+
+<p>The context is either the subprogram or block where the variable is defined.
+Name the source variable name. Compile unit and line indicate where the
+variable was defined. Type descriptor defines the declared type of the
+variable.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="format_common_intrinsics">Debugger intrinsic functions</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM uses several intrinsic functions (name prefixed with "llvm.dbg") to
+provide debug information at various points in generated code.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_common_stoppoint">llvm.dbg.stoppoint</a>
+</div>
+
+<div class="doc_text">
+<pre>
+ void %<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint, uint, { }* )
+</pre>
+
+<p>This intrinsic is used to provide correspondence between the source file and
+the generated code. The first argument is the line number (base 1), second
+argument is the column number (0 if unknown) and the third argument the source
+<tt>%<a href="#format_compile_units">llvm.dbg.compile_unit</a>*</tt> cast to a
+<tt>{ }*</tt>. Code following a call to this intrinsic will have been defined
+in close proximity of the line, column and file. This information holds until
+the next call to <tt>%<a
+href="#format_common_stoppoint">lvm.dbg.stoppoint</a></tt>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_common_func_start">llvm.dbg.func.start</a>
+</div>
+
+<div class="doc_text">
+<pre>
+ void %<a href="#format_common_func_start">llvm.dbg.func.start</a>( { }* )
+</pre>
+
+<p>This intrinsic is used to link the debug information in <tt>%<a
+href="#format_subprograms">llvm.dbg.subprogram</a></tt> to the function. It also
+defines the beginning of the function's declarative region (scope.) The
+intrinsic should be called early in the function after the all the alloca
+instructions. It should be paired off with a closing <tt>%<a
+href="#format_common_region_end">llvm.dbg.region.end</a></tt>. The function's
+single argument is the <tt>%<a
+href="#format_subprograms">llvm.dbg.subprogram.type</a></tt>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_common_region_start">llvm.dbg.region.start</a>
+</div>
+
+<div class="doc_text">
+<pre>
+ void %<a href="#format_common_region_start">llvm.dbg.region.start</a>( { }* )
+</pre>
+
+<p>This intrinsic is used to define the beginning of a declarative scope (ex.
+block) for local language elements. It should be paired off with a closing
+<tt>%<a href="#format_common_region_end">llvm.dbg.region.end</a></tt>. The
+function's single argument is the <tt>%<a
+href="#format_blocks">llvm.dbg.block</a></tt> which is starting.</p>
+
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_common_region_end">llvm.dbg.region.end</a>
+</div>
+
+<div class="doc_text">
+<pre>
+ void %<a href="#format_common_region_end">llvm.dbg.region.end</a>( { }* )
+</pre>
+
+<p>This intrinsic is used to define the end of a declarative scope (ex. block)
+for local language elements. It should be paired off with an opening <tt>%<a
+href="#format_common_region_start">llvm.dbg.region.start</a></tt> or <tt>%<a
+href="#format_common_func_start">llvm.dbg.func.start</a></tt>. The function's
+single argument is either the <tt>%<a
+href="#format_blocks">llvm.dbg.block</a></tt> or the <tt>%<a
+href="#format_subprograms">llvm.dbg.subprogram.type</a></tt> which is
+ending.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="format_common_declare">llvm.dbg.declare</a>
+</div>
+
+<div class="doc_text">
+<pre>
+ void %<a href="#format_common_declare">llvm.dbg.declare</a>( { } *, { }* )
+</pre>
+
+<p>This intrinsic provides information about a local element (ex. variable.) The
+first argument is the alloca for the variable, cast to a <tt>{ }*</tt>. The
+second argument is the <tt>%<a
+href="#format_variables">llvm.dbg.variable</a></tt> containing the description
+of the variable, also cast to a <tt>{ }*</tt>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="format_common_stoppoints">
+ Representing stopping points in the source program
+ </a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM debugger "stop points" are a key part of the debugging representation
+that allows the LLVM to maintain simple semantics for <a
+href="#debugopt">debugging optimized code</a>. The basic idea is that the
+front-end inserts calls to the <a
+href="#format_common_stoppoint">%<tt>llvm.dbg.stoppoint</tt></a> intrinsic
+function at every point in the program where a debugger should be able to
+inspect the program (these correspond to places a debugger stops when you
+"<tt>step</tt>" through it). The front-end can choose to place these as
+fine-grained as it would like (for example, before every subexpression
+evaluated), but it is recommended to only put them after every source statement
+that includes executable code.</p>
+
+<p>Using calls to this intrinsic function to demark legal points for the
+debugger to inspect the program automatically disables any optimizations that
+could potentially confuse debugging information. To non-debug-information-aware
+transformations, these calls simply look like calls to an external function,
+which they must assume to do anything (including reading or writing to any part
+of reachable memory). On the other hand, it does not impact many optimizations,
+such as code motion of non-trapping instructions, nor does it impact
+optimization of subexpressions, code duplication transformations, or basic-block
+reordering transformations.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="format_common_lifetime">Object lifetimes and scoping</a>
+</div>
+
+<div class="doc_text">
+<p>In many languages, the local variables in functions can have their lifetime
+or scope limited to a subset of a function. In the C family of languages, for
+example, variables are only live (readable and writable) within the source block
+that they are defined in. In functional languages, values are only readable
+after they have been defined. Though this is a very obvious concept, it is also
+non-trivial to model in LLVM, because it has no notion of scoping in this sense,
+and does not want to be tied to a language's scoping rules.</p>
+
+<p>In order to handle this, the LLVM debug format uses the notion of "regions"
+of a function, delineated by calls to intrinsic functions. These intrinsic
+functions define new regions of the program and indicate when the region
+lifetime expires. Consider the following C fragment, for example:</p>
+
+<pre>
+1. void foo() {
+2. int X = ...;
+3. int Y = ...;
+4. {
+5. int Z = ...;
+6. ...
+7. }
+8. ...
+9. }
+</pre>
+
+<p>Compiled to LLVM, this function would be represented like this:</p>
+
+<pre>
+void %foo() {
+entry:
+ %X = alloca int
+ %Y = alloca int
+ %Z = alloca int
+
+ ...
+
+ call void %<a href="#format_common_func_start">llvm.dbg.func.start</a>( %<a href="#format_subprograms">llvm.dbg.subprogram.type</a>* %llvm.dbg.subprogram )
+
+ call void %<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 2, uint 2, %<a href="#format_compile_units">llvm.dbg.compile_unit</a>* %llvm.dbg.compile_unit )
+
+ call void %<a href="#format_common_declare">llvm.dbg.declare</a>({}* %X, ...)
+ call void %<a href="#format_common_declare">llvm.dbg.declare</a>({}* %Y, ...)
+
+ <i>;; Evaluate expression on line 2, assigning to X.</i>
+
+ call void %<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 3, uint 2, %<a href="#format_compile_units">llvm.dbg.compile_unit</a>* %llvm.dbg.compile_unit )
+
+ <i>;; Evaluate expression on line 3, assigning to Y.</i>
+
+ call void %<a href="#format_common_stoppoint">llvm.region.start</a>()
+ call void %<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 5, uint 4, %<a href="#format_compile_units">llvm.dbg.compile_unit</a>* %llvm.dbg.compile_unit )
+ call void %<a href="#format_common_declare">llvm.dbg.declare</a>({}* %X, ...)
+
+ <i>;; Evaluate expression on line 5, assigning to Z.</i>
+
+ call void %<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 7, uint 2, %<a href="#format_compile_units">llvm.dbg.compile_unit</a>* %llvm.dbg.compile_unit )
+ call void %<a href="#format_common_region_end">llvm.region.end</a>()
+
+ call void %<a href="#format_common_stoppoint">llvm.dbg.stoppoint</a>( uint 9, uint 2, %<a href="#format_compile_units">llvm.dbg.compile_unit</a>* %llvm.dbg.compile_unit )
+
+ call void %<a href="#format_common_region_end">llvm.region.end</a>()
+
+ ret void
+}
+</pre>
+
+<p>This example illustrates a few important details about the LLVM debugging
+information. In particular, it shows how the various intrinsics are applied
+together to allow a debugger to analyze the relationship between statements,
+variable definitions, and the code used to implement the function.</p>
+
+<p>The first intrinsic <tt>%<a
+href="#format_common_func_start">llvm.dbg.func.start</a></tt> provides
+a link with the <a href="#format_subprograms">subprogram descriptor</a>
+containing the details of this function. This call also defines the beginning
+of the function region, bounded by the <tt>%<a
+href="#format_common_region_end">llvm.region.end</a></tt> at the end of
+the function. This region is used to bracket the lifetime of variables declared
+within. For a function, this outer region defines a new stack frame whose
+lifetime ends when the region is ended.</p>
+
+<p>It is possible to define inner regions for short term variables by using the
+%<a href="#format_common_stoppoint"><tt>llvm.region.start</tt></a> and <a
+href="#format_common_region_end"><tt>%llvm.region.end</tt></a> to bound a
+region. The inner region in this example would be for the block containing the
+declaration of Z.</p>
+
+<p>Using regions to represent the boundaries of source-level functions allow
+LLVM interprocedural optimizations to arbitrarily modify LLVM functions without
+having to worry about breaking mapping information between the LLVM code and the
+and source-level program. In particular, the inliner requires no modification
+to support inlining with debugging information: there is no explicit correlation
+drawn between LLVM functions and their source-level counterparts (note however,
+that if the inliner inlines all instances of a non-strong-linkage function into
+its caller that it will not be possible for the user to manually invoke the
+inlined function from a debugger).</p>
+
+<p>Once the function has been defined, the <a
+href="#format_common_stoppoint"><tt>stopping point</tt></a> corresponding to
+line #2 (column #2) of the function is encountered. At this point in the
+function, <b>no</b> local variables are live. As lines 2 and 3 of the example
+are executed, their variable definitions are introduced into the program using
+%<a href="#format_common_declare"><tt>llvm.dbg.declare</tt></a>, without the
+need to specify a new region. These variables do not require new regions to be
+introduced because they go out of scope at the same point in the program: line
+9.</p>
+
+<p>In contrast, the <tt>Z</tt> variable goes out of scope at a different time,
+on line 7. For this reason, it is defined within the inner region, which kills
+the availability of <tt>Z</tt> before the code for line 8 is executed. In this
+way, regions can support arbitrary source-language scoping rules, as long as
+they can only be nested (ie, one scope cannot partially overlap with a part of
+another scope).</p>
+
+<p>It is worth noting that this scoping mechanism is used to control scoping of
+all declarations, not just variable declarations. For example, the scope of a
+C++ using declaration is controlled with this and could change how name lookup is
+performed.</p>
+
+</div>
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="ccxx_frontend">C/C++ front-end specific debug information</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The C and C++ front-ends represent information about the program in a format
+that is effectively identical to <a
+href="http://www.eagercon.com/dwarf/dwarf3std.htm">Dwarf 3.0</a> in terms of
+information content. This allows code generators to trivially support native
+debuggers by generating standard dwarf information, and contains enough
+information for non-dwarf targets to translate it as needed.</p>
+
+<p>This section describes the forms used to represent C and C++ programs. Other
+languages could pattern themselves after this (which itself is tuned to
+representing programs in the same way that Dwarf 3 does), or they could choose
+to provide completely different forms if they don't fit into the Dwarf model.
+As support for debugging information gets added to the various LLVM
+source-language front-ends, the information used should be documented here.</p>
+
+<p>The following sections provide examples of various C/C++ constructs and the
+debug information that would best describe those constructs.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ccxx_compile_units">C/C++ source file information</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given the source files "MySource.cpp" and "MyHeader.h" located in the
+directory "/Users/mine/sources", the following code;</p>
+
+<pre>
+#include "MyHeader.h"
+
+int main(int argc, char *argv[]) {
+ return 0;
+}
+</pre>
+
+<p>a C/C++ front-end would generate the following descriptors;</p>
+
+<pre>
+...
+;;
+;; Define types used. In this case we need one for compile unit anchors and one
+;; for compile units.
+;;
+%<a href="#format_anchors">llvm.dbg.anchor.type</a> = type { uint, uint }
+%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> = type { uint, { }*, uint, uint, sbyte*, sbyte*, sbyte* }
+...
+;;
+;; Define the anchor for compile units. Note that the second field of the
+;; anchor is 17, which is the same as the tag for compile units
+;; (17 = DW_TAG_compile_unit.)
+;;
+%<a href="#format_compile_units">llvm.dbg.compile_units</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> { uint 0, uint 17 }, section "llvm.metadata"
+
+;;
+;; Define the compile unit for the source file "/Users/mine/sources/MySource.cpp".
+;;
+%<a href="#format_compile_units">llvm.dbg.compile_unit1</a> = internal constant %<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> {
+ uint add(uint 17, uint 262144),
+ { }* cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_units</a> to { }*),
+ uint 1,
+ uint 1,
+ sbyte* getelementptr ([13 x sbyte]* %str1, int 0, int 0),
+ sbyte* getelementptr ([21 x sbyte]* %str2, int 0, int 0),
+ sbyte* getelementptr ([33 x sbyte]* %str3, int 0, int 0) }, section "llvm.metadata"
+
+;;
+;; Define the compile unit for the header file "/Users/mine/sources/MyHeader.h".
+;;
+%<a href="#format_compile_units">llvm.dbg.compile_unit2</a> = internal constant %<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> {
+ uint add(uint 17, uint 262144),
+ { }* cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_units</a> to { }*),
+ uint 1,
+ uint 1,
+ sbyte* getelementptr ([11 x sbyte]* %str4, int 0, int 0),
+ sbyte* getelementptr ([21 x sbyte]* %str2, int 0, int 0),
+ sbyte* getelementptr ([33 x sbyte]* %str3, int 0, int 0) }, section "llvm.metadata"
+
+;;
+;; Define each of the strings used in the compile units.
+;;
+%str1 = internal constant [13 x sbyte] c"MySource.cpp\00", section "llvm.metadata";
+%str2 = internal constant [21 x sbyte] c"/Users/mine/sources/\00", section "llvm.metadata";
+%str3 = internal constant [33 x sbyte] c"4.0.1 LLVM (LLVM research group)\00", section "llvm.metadata";
+%str4 = internal constant [11 x sbyte] c"MyHeader.h\00", section "llvm.metadata";
+...
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ccxx_global_variable">C/C++ global variable information</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given an integer global variable declared as follows;</p>
+
+<pre>
+int MyGlobal = 100;
+</pre>
+
+<p>a C/C++ front-end would generate the following descriptors;</p>
+
+<pre>
+;;
+;; Define types used. One for global variable anchors, one for the global
+;; variable descriptor, one for the global's basic type and one for the global's
+;; compile unit.
+;;
+%<a href="#format_anchors">llvm.dbg.anchor.type</a> = type { uint, uint }
+%<a href="#format_global_variables">llvm.dbg.global_variable.type</a> = type { uint, { }*, { }*, sbyte*, { }*, uint, { }*, bool, bool, { }*, uint }
+%<a href="#format_basic_type">llvm.dbg.basictype.type</a> = type { uint, { }*, sbyte*, { }*, int, uint, uint, uint, uint }
+%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> = ...
+...
+;;
+;; Define the global itself.
+;;
+%MyGlobal = global int 100
+...
+;;
+;; Define the anchor for global variables. Note that the second field of the
+;; anchor is 52, which is the same as the tag for global variables
+;; (52 = DW_TAG_variable.)
+;;
+%<a href="#format_global_variables">llvm.dbg.global_variables</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> { uint 0, uint 52 }, section "llvm.metadata"
+
+;;
+;; Define the global variable descriptor. Note the reference to the global
+;; variable anchor and the global variable itself.
+;;
+%<a href="#format_global_variables">llvm.dbg.global_variable</a> = internal constant %<a href="#format_global_variables">llvm.dbg.global_variable.type</a> {
+ uint add(uint 52, uint 262144),
+ { }* cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_global_variables">llvm.dbg.global_variables</a> to { }*),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([9 x sbyte]* %str1, int 0, int 0),
+ sbyte* getelementptr ([1 x sbyte]* %str2, int 0, int 0),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ uint 1,
+ { }* cast (%<a href="#format_basic_type">llvm.dbg.basictype.type</a>* %<a href="#format_basic_type">llvm.dbg.basictype</a> to { }*),
+ bool false,
+ bool true,
+ { }* cast (int* %MyGlobal to { }*) }, section "llvm.metadata"
+
+;;
+;; Define the basic type of 32 bit signed integer. Note that since int is an
+;; intrinsic type the source file is NULL and line 0.
+;;
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([4 x sbyte]* %str3, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 32,
+ uint 32,
+ uint 0,
+ uint 5 }, section "llvm.metadata"
+
+;;
+;; Define the names of the global variable and basic type.
+;;
+%str1 = internal constant [9 x sbyte] c"MyGlobal\00", section "llvm.metadata"
+%str2 = internal constant [1 x sbyte] c"\00", section "llvm.metadata"
+%str3 = internal constant [4 x sbyte] c"int\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ccxx_subprogram">C/C++ function information</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given a function declared as follows;</p>
+
+<pre>
+int main(int argc, char *argv[]) {
+ return 0;
+}
+</pre>
+
+<p>a C/C++ front-end would generate the following descriptors;</p>
+
+<pre>
+;;
+;; Define types used. One for subprogram anchors, one for the subprogram
+;; descriptor, one for the global's basic type and one for the subprogram's
+;; compile unit.
+;;
+%<a href="#format_subprograms">llvm.dbg.subprogram.type</a> = type { uint, { }*, { }*, sbyte*, { }*, bool, bool }
+%<a href="#format_anchors">llvm.dbg.anchor.type</a> = type { uint, uint }
+%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a> = ...
+
+;;
+;; Define the anchor for subprograms. Note that the second field of the
+;; anchor is 46, which is the same as the tag for subprograms
+;; (46 = DW_TAG_subprogram.)
+;;
+%<a href="#format_subprograms">llvm.dbg.subprograms</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> { uint 0, uint 46 }, section "llvm.metadata"
+
+;;
+;; Define the descriptor for the subprogram. TODO - more details.
+;;
+%<a href="#format_subprograms">llvm.dbg.subprogram</a> = internal constant %<a href="#format_subprograms">llvm.dbg.subprogram.type</a> {
+ uint add(uint 46, uint 262144),
+ { }* cast (%<a href="#format_anchors">llvm.dbg.anchor.type</a>* %<a href="#format_subprograms">llvm.dbg.subprograms</a> to { }*),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([5 x sbyte]* %str1, int 0, int 0),
+ sbyte* getelementptr ([1 x sbyte]* %str2, int 0, int 0),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ uint 1,
+ { }* null,
+ bool false,
+ bool true }, section "llvm.metadata"
+
+;;
+;; Define the name of the subprogram.
+;;
+%str1 = internal constant [5 x sbyte] c"main\00", section "llvm.metadata"
+%str2 = internal constant [1 x sbyte] c"\00", section "llvm.metadata"
+
+;;
+;; Define the subprogram itself.
+;;
+int %main(int %argc, sbyte** %argv) {
+...
+}
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ccxx_basic_types">C/C++ basic types</a>
+</div>
+
+<div class="doc_text">
+
+<p>The following are the basic type descriptors for C/C++ core types;</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_type_bool">bool</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([5 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 32,
+ uint 32,
+ uint 0,
+ uint 2 }, section "llvm.metadata"
+%str1 = internal constant [5 x sbyte] c"bool\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_char">char</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([5 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 8,
+ uint 8,
+ uint 0,
+ uint 6 }, section "llvm.metadata"
+%str1 = internal constant [5 x sbyte] c"char\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_unsigned_char">unsigned char</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([14 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 8,
+ uint 8,
+ uint 0,
+ uint 8 }, section "llvm.metadata"
+%str1 = internal constant [14 x sbyte] c"unsigned char\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_short">short</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([10 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 16,
+ uint 16,
+ uint 0,
+ uint 5 }, section "llvm.metadata"
+%str1 = internal constant [10 x sbyte] c"short int\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_unsigned_short">unsigned short</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([19 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 16,
+ uint 16,
+ uint 0,
+ uint 7 }, section "llvm.metadata"
+%str1 = internal constant [19 x sbyte] c"short unsigned int\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_int">int</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([4 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 32,
+ uint 32,
+ uint 0,
+ uint 5 }, section "llvm.metadata"
+%str1 = internal constant [4 x sbyte] c"int\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_unsigned_int">unsigned int</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([13 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 32,
+ uint 32,
+ uint 0,
+ uint 7 }, section "llvm.metadata"
+%str1 = internal constant [13 x sbyte] c"unsigned int\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_long_long">long long</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([14 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 64,
+ uint 64,
+ uint 0,
+ uint 5 }, section "llvm.metadata"
+%str1 = internal constant [14 x sbyte] c"long long int\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_unsigned_long_long">unsigned long long</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([23 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 64,
+ uint 64,
+ uint 0,
+ uint 7 }, section "llvm.metadata"
+%str1 = internal constant [23 x sbyte] c"long long unsigned int\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_float">float</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([6 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 32,
+ uint 32,
+ uint 0,
+ uint 4 }, section "llvm.metadata"
+%str1 = internal constant [6 x sbyte] c"float\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+ <a name="ccxx_basic_double">double</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([7 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 64,
+ uint 64,
+ uint 0,
+ uint 4 }, section "llvm.metadata"
+%str1 = internal constant [7 x sbyte] c"double\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ccxx_derived_types">C/C++ derived types</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given the following as an example of C/C++ derived type;</p>
+
+<pre>
+typedef const int *IntPtr;
+</pre>
+
+<p>a C/C++ front-end would generate the following descriptors;</p>
+
+<pre>
+;;
+;; Define the typedef "IntPtr".
+;;
+%<a href="#format_derived_type">llvm.dbg.derivedtype1</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
+ uint add(uint 22, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([7 x sbyte]* %str1, int 0, int 0),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ int 1,
+ uint 0,
+ uint 0,
+ uint 0,
+ { }* cast (%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a>* %<a href="#format_derived_type">llvm.dbg.derivedtype2</a> to { }*) }, section "llvm.metadata"
+%str1 = internal constant [7 x sbyte] c"IntPtr\00", section "llvm.metadata"
+
+;;
+;; Define the pointer type.
+;;
+%<a href="#format_derived_type">llvm.dbg.derivedtype2</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
+ uint add(uint 15, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* null,
+ { }* null,
+ int 0,
+ uint 32,
+ uint 32,
+ uint 0,
+ { }* cast (%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a>* %<a href="#format_derived_type">llvm.dbg.derivedtype3</a> to { }*) }, section "llvm.metadata"
+
+;;
+;; Define the const type.
+;;
+%<a href="#format_derived_type">llvm.dbg.derivedtype3</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
+ uint add(uint 38, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* null,
+ { }* null,
+ int 0,
+ uint 0,
+ uint 0,
+ uint 0,
+ { }* cast (%<a href="#format_basic_type">llvm.dbg.basictype.type</a>* %<a href="#format_basic_type">llvm.dbg.basictype1</a> to { }*) }, section "llvm.metadata"
+
+;;
+;; Define the int type.
+;;
+%<a href="#format_basic_type">llvm.dbg.basictype1</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([4 x sbyte]* %str2, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 32,
+ uint 32,
+ uint 0,
+ uint 5 }, section "llvm.metadata"
+%str2 = internal constant [4 x sbyte] c"int\00", section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ccxx_composite_types">C/C++ struct/union types</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given the following as an example of C/C++ struct type;</p>
+
+<pre>
+struct Color {
+ unsigned Red;
+ unsigned Green;
+ unsigned Blue;
+};
+</pre>
+
+<p>a C/C++ front-end would generate the following descriptors;</p>
+
+<pre>
+;;
+;; Define basic type for unsigned int.
+;;
+%<a href="#format_basic_type">llvm.dbg.basictype</a> = internal constant %<a href="#format_basic_type">llvm.dbg.basictype.type</a> {
+ uint add(uint 36, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([13 x sbyte]* %str1, int 0, int 0),
+ { }* null,
+ int 0,
+ uint 32,
+ uint 32,
+ uint 0,
+ uint 7 }, section "llvm.metadata"
+%str1 = internal constant [13 x sbyte] c"unsigned int\00", section "llvm.metadata"
+
+;;
+;; Define composite type for struct Color.
+;;
+%<a href="#format_composite_type">llvm.dbg.compositetype</a> = internal constant %<a href="#format_composite_type">llvm.dbg.compositetype.type</a> {
+ uint add(uint 19, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([6 x sbyte]* %str2, int 0, int 0),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ int 1,
+ uint 96,
+ uint 32,
+ uint 0,
+ { }* null,
+ { }* cast ([3 x { }*]* %llvm.dbg.array to { }*) }, section "llvm.metadata"
+%str2 = internal constant [6 x sbyte] c"Color\00", section "llvm.metadata"
+
+;;
+;; Define the Red field.
+;;
+%<a href="#format_derived_type">llvm.dbg.derivedtype1</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
+ uint add(uint 13, uint 262144),
+ { }* null,
+ sbyte* getelementptr ([4 x sbyte]* %str3, int 0, int 0),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ int 2,
+ uint 32,
+ uint 32,
+ uint 0,
+ { }* cast (%<a href="#format_basic_type">llvm.dbg.basictype.type</a>* %<a href="#format_basic_type">llvm.dbg.basictype</a> to { }*) }, section "llvm.metadata"
+%str3 = internal constant [4 x sbyte] c"Red\00", section "llvm.metadata"
+
+;;
+;; Define the Green field.
+;;
+%<a href="#format_derived_type">llvm.dbg.derivedtype2</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
+ uint add(uint 13, uint 262144),
+ { }* null,
+ sbyte* getelementptr ([6 x sbyte]* %str4, int 0, int 0),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ int 3,
+ uint 32,
+ uint 32,
+ uint 32,
+ { }* cast (%<a href="#format_basic_type">llvm.dbg.basictype.type</a>* %<a href="#format_basic_type">llvm.dbg.basictype</a> to { }*) }, section "llvm.metadata"
+%str4 = internal constant [6 x sbyte] c"Green\00", section "llvm.metadata"
+
+;;
+;; Define the Blue field.
+;;
+%<a href="#format_derived_type">llvm.dbg.derivedtype3</a> = internal constant %<a href="#format_derived_type">llvm.dbg.derivedtype.type</a> {
+ uint add(uint 13, uint 262144),
+ { }* null,
+ sbyte* getelementptr ([5 x sbyte]* %str5, int 0, int 0),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ int 4,
+ uint 32,
+ uint 32,
+ uint 64,
+ { }* cast (%<a href="#format_basic_type">llvm.dbg.basictype.type</a>* %<a href="#format_basic_type">llvm.dbg.basictype</a> to { }*) }, section "llvm.metadata"
+%str5 = internal constant [5 x sbyte] c"Blue\00", section "llvm.metadata"
+
+;;
+;; Define the array of fields used by the composite type Color.
+;;
+%llvm.dbg.array = internal constant [3 x { }*] [
+ { }* cast (%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a>* %<a href="#format_derived_type">llvm.dbg.derivedtype1</a> to { }*),
+ { }* cast (%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a>* %<a href="#format_derived_type">llvm.dbg.derivedtype2</a> to { }*),
+ { }* cast (%<a href="#format_derived_type">llvm.dbg.derivedtype.type</a>* %<a href="#format_derived_type">llvm.dbg.derivedtype3</a> to { }*) ], section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ccxx_enumeration_types">C/C++ enumeration types</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given the following as an example of C/C++ enumeration type;</p>
+
+<pre>
+enum Trees {
+ Spruce = 100,
+ Oak = 200,
+ Maple = 300
+};
+</pre>
+
+<p>a C/C++ front-end would generate the following descriptors;</p>
+
+<pre>
+;;
+;; Define composite type for enum Trees
+;;
+%<a href="#format_composite_type">llvm.dbg.compositetype</a> = internal constant %<a href="#format_composite_type">llvm.dbg.compositetype.type</a> {
+ uint add(uint 4, uint 262144),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ sbyte* getelementptr ([6 x sbyte]* %str1, int 0, int 0),
+ { }* cast (%<a href="#format_compile_units">llvm.dbg.compile_unit.type</a>* %<a href="#format_compile_units">llvm.dbg.compile_unit</a> to { }*),
+ int 1,
+ uint 32,
+ uint 32,
+ uint 0,
+ { }* null,
+ { }* cast ([3 x { }*]* %llvm.dbg.array to { }*) }, section "llvm.metadata"
+%str1 = internal constant [6 x sbyte] c"Trees\00", section "llvm.metadata"
+
+;;
+;; Define Spruce enumerator.
+;;
+%<a href="#format_enumeration">llvm.dbg.enumerator1</a> = internal constant %<a href="#format_enumeration">llvm.dbg.enumerator.type</a> {
+ uint add(uint 40, uint 262144),
+ sbyte* getelementptr ([7 x sbyte]* %str2, int 0, int 0),
+ int 100 }, section "llvm.metadata"
+%str2 = internal constant [7 x sbyte] c"Spruce\00", section "llvm.metadata"
+
+;;
+;; Define Oak enumerator.
+;;
+%<a href="#format_enumeration">llvm.dbg.enumerator2</a> = internal constant %<a href="#format_enumeration">llvm.dbg.enumerator.type</a> {
+ uint add(uint 40, uint 262144),
+ sbyte* getelementptr ([4 x sbyte]* %str3, int 0, int 0),
+ int 200 }, section "llvm.metadata"
+%str3 = internal constant [4 x sbyte] c"Oak\00", section "llvm.metadata"
+
+;;
+;; Define Maple enumerator.
+;;
+%<a href="#format_enumeration">llvm.dbg.enumerator3</a> = internal constant %<a href="#format_enumeration">llvm.dbg.enumerator.type</a> {
+ uint add(uint 40, uint 262144),
+ sbyte* getelementptr ([6 x sbyte]* %str4, int 0, int 0),
+ int 300 }, section "llvm.metadata"
+%str4 = internal constant [6 x sbyte] c"Maple\00", section "llvm.metadata"
+
+;;
+;; Define the array of enumerators used by composite type Trees.
+;;
+%llvm.dbg.array = internal constant [3 x { }*] [
+ { }* cast (%<a href="#format_enumeration">llvm.dbg.enumerator.type</a>* %<a href="#format_enumeration">llvm.dbg.enumerator1</a> to { }*),
+ { }* cast (%<a href="#format_enumeration">llvm.dbg.enumerator.type</a>* %<a href="#format_enumeration">llvm.dbg.enumerator2</a> to { }*),
+ { }* cast (%<a href="#format_enumeration">llvm.dbg.enumerator.type</a>* %<a href="#format_enumeration">llvm.dbg.enumerator3</a> to { }*) ], section "llvm.metadata"
+</pre>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/Stacker.html b/docs/Stacker.html
new file mode 100644
index 0000000..102571a
--- /dev/null
+++ b/docs/Stacker.html
@@ -0,0 +1,1418 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>Stacker: An Example Of Using LLVM</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Stacker: An Example Of Using LLVM</div>
+
+<ol>
+ <li><a href="#abstract">Abstract</a></li>
+ <li><a href="#introduction">Introduction</a></li>
+ <li><a href="#lessons">Lessons I Learned About LLVM</a>
+ <ol>
+ <li><a href="#value">Everything's a Value!</a></li>
+ <li><a href="#terminate">Terminate Those Blocks!</a></li>
+ <li><a href="#blocks">Concrete Blocks</a></li>
+ <li><a href="#push_back">push_back Is Your Friend</a></li>
+ <li><a href="#gep">The Wily GetElementPtrInst</a></li>
+ <li><a href="#linkage">Getting Linkage Types Right</a></li>
+ <li><a href="#constants">Constants Are Easier Than That!</a></li>
+ </ol></li>
+ <li><a href="#lexicon">The Stacker Lexicon</a>
+ <ol>
+ <li><a href="#stack">The Stack</a></li>
+ <li><a href="#punctuation">Punctuation</a></li>
+ <li><a href="#comments">Comments</a></li>
+ <li><a href="#literals">Literals</a></li>
+ <li><a href="#words">Words</a></li>
+ <li><a href="#style">Standard Style</a></li>
+ <li><a href="#builtins">Built-Ins</a></li>
+ </ol></li>
+ <li><a href="#example">Prime: A Complete Example</a></li>
+ <li><a href="#internal">Internal Code Details</a>
+ <ol>
+ <li><a href="#directory">The Directory Structure </a></li>
+ <li><a href="#lexer">The Lexer</a></li>
+ <li><a href="#parser">The Parser</a></li>
+ <li><a href="#compiler">The Compiler</a></li>
+ <li><a href="#runtime">The Runtime</a></li>
+ <li><a href="#driver">Compiler Driver</a></li>
+ <li><a href="#tests">Test Programs</a></li>
+ <li><a href="#exercise">Exercise</a></li>
+ <li><a href="#todo">Things Remaining To Be Done</a></li>
+ </ol></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"><a name="abstract">Abstract</a></div>
+<div class="doc_text">
+<p>This document is another way to learn about LLVM. Unlike the
+<a href="LangRef.html">LLVM Reference Manual</a> or
+<a href="ProgrammersManual.html">LLVM Programmer's Manual</a>, here we learn
+about LLVM through the experience of creating a simple programming language
+named Stacker. Stacker was invented specifically as a demonstration of
+LLVM. The emphasis in this document is not on describing the
+intricacies of LLVM itself but on how to use it to build your own
+compiler system.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="introduction">Introduction</a> </div>
+<div class="doc_text">
+<p>Amongst other things, LLVM is a platform for compiler writers.
+Because of its exceptionally clean and small IR (intermediate
+representation), compiler writing with LLVM is much easier than with
+other system. As proof, I wrote the entire compiler (language definition,
+lexer, parser, code generator, etc.) in about <em>four days</em>!
+That's important to know because it shows how quickly you can get a new
+language running when using LLVM. Furthermore, this was the <em >first</em>
+language the author ever created using LLVM. The learning curve is
+included in that four days.</p>
+<p>The language described here, Stacker, is Forth-like. Programs
+are simple collections of word definitions, and the only thing definitions
+can do is manipulate a stack or generate I/O. Stacker is not a "real"
+programming language; it's very simple. Although it is computationally
+complete, you wouldn't use it for your next big project. However,
+the fact that it is complete, it's simple, and it <em>doesn't</em> have
+a C-like syntax make it useful for demonstration purposes. It shows
+that LLVM could be applied to a wide variety of languages.</p>
+<p>The basic notions behind stacker is very simple. There's a stack of
+integers (or character pointers) that the program manipulates. Pretty
+much the only thing the program can do is manipulate the stack and do
+some limited I/O operations. The language provides you with several
+built-in words that manipulate the stack in interesting ways. To get
+your feet wet, here's how you write the traditional "Hello, World"
+program in Stacker:</p>
+<p><code>: hello_world "Hello, World!" &gt;s DROP CR ;<br>
+: MAIN hello_world ;<br></code></p>
+<p>This has two "definitions" (Stacker manipulates words, not
+functions and words have definitions): <code>MAIN</code> and <code>
+hello_world</code>. The <code>MAIN</code> definition is standard; it
+tells Stacker where to start. Here, <code>MAIN</code> is defined to
+simply invoke the word <code>hello_world</code>. The
+<code>hello_world</code> definition tells stacker to push the
+<code>"Hello, World!"</code> string on to the stack, print it out
+(<code>&gt;s</code>), pop it off the stack (<code>DROP</code>), and
+finally print a carriage return (<code>CR</code>). Although
+<code>hello_world</code> uses the stack, its net effect is null. Well
+written Stacker definitions have that characteristic. </p>
+<p>Exercise for the reader: how could you make this a one line program?</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_section"><a name="lessons"></a>Lessons I Learned About LLVM</div>
+<div class="doc_text">
+<p>Stacker was written for two purposes: </p>
+<ol>
+ <li>to get the author over the learning curve, and</li>
+ <li>to provide a simple example of how to write a compiler using LLVM.</li>
+</ol>
+<p>During the development of Stacker, many lessons about LLVM were
+learned. Those lessons are described in the following subsections.<p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="value"></a>Everything's a Value!</div>
+<div class="doc_text">
+<p>Although I knew that LLVM uses a Single Static Assignment (SSA) format,
+it wasn't obvious to me how prevalent this idea was in LLVM until I really
+started using it. Reading the <a href="ProgrammersManual.html">
+Programmer's Manual</a> and <a href="LangRef.html">Language Reference</a>,
+I noted that most of the important LLVM IR (Intermediate Representation) C++
+classes were derived from the Value class. The full power of that simple
+design only became fully understood once I started constructing executable
+expressions for Stacker.</p>
+
+<p>This really makes your programming go faster. Think about compiling code
+for the following C/C++ expression: <code>(a|b)*((x+1)/(y+1))</code>. Assuming
+the values are on the stack in the order a, b, x, y, this could be
+expressed in stacker as: <code>1 + SWAP 1 + / ROT2 OR *</code>.
+You could write a function using LLVM that computes this expression like
+this: </p>
+
+<div class="doc_code"><pre>
+Value*
+expression(BasicBlock* bb, Value* a, Value* b, Value* x, Value* y )
+{
+ ConstantInt* one = ConstantInt::get(Type::IntTy, 1);
+ BinaryOperator* or1 = BinaryOperator::createOr(a, b, "", bb);
+ BinaryOperator* add1 = BinaryOperator::createAdd(x, one, "", bb);
+ BinaryOperator* add2 = BinaryOperator::createAdd(y, one, "", bb);
+ BinaryOperator* div1 = BinaryOperator::createDiv(add1, add2, "", bb);
+ BinaryOperator* mult1 = BinaryOperator::createMul(or1, div1, "", bb);
+ return mult1;
+}
+</pre></div>
+
+<p>"Okay, big deal," you say? It is a big deal. Here's why. Note that I didn't
+have to tell this function which kinds of Values are being passed in. They could be
+<code>Instruction</code>s, <code>Constant</code>s, <code>GlobalVariable</code>s, or
+any of the other subclasses of <code>Value</code> that LLVM supports.
+Furthermore, if you specify Values that are incorrect for this sequence of
+operations, LLVM will either notice right away (at compilation time) or the LLVM
+Verifier will pick up the inconsistency when the compiler runs. In either case
+LLVM prevents you from making a type error that gets passed through to the
+generated program. This <em>really</em> helps you write a compiler that
+always generates correct code!<p>
+<p>The second point is that we don't have to worry about branching, registers,
+stack variables, saving partial results, etc. The instructions we create
+<em>are</em> the values we use. Note that all that was created in the above
+code is a Constant value and five operators. Each of the instructions <em>is</em>
+the resulting value of that instruction. This saves a lot of time.</p>
+<p>The lesson is this: <em>SSA form is very powerful: there is no difference
+between a value and the instruction that created it.</em> This is fully
+enforced by the LLVM IR. Use it to your best advantage.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="terminate"></a>Terminate Those Blocks!</div>
+<div class="doc_text">
+<p>I had to learn about terminating blocks the hard way: using the debugger
+to figure out what the LLVM verifier was trying to tell me and begging for
+help on the LLVMdev mailing list. I hope you avoid this experience.</p>
+<p>Emblazon this rule in your mind:</p>
+<ul>
+ <li><em>All</em> <code>BasicBlock</code>s in your compiler <b>must</b> be
+ terminated with a terminating instruction (branch, return, etc.).
+ </li>
+</ul>
+<p>Terminating instructions are a semantic requirement of the LLVM IR. There
+is no facility for implicitly chaining together blocks placed into a function
+in the order they occur. Indeed, in the general case, blocks will not be
+added to the function in the order of execution because of the recursive
+way compilers are written.</p>
+<p>Furthermore, if you don't terminate your blocks, your compiler code will
+compile just fine. You won't find out about the problem until you're running
+the compiler and the module you just created fails on the LLVM Verifier.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="blocks"></a>Concrete Blocks</div>
+<div class="doc_text">
+<p>After a little initial fumbling around, I quickly caught on to how blocks
+should be constructed. In general, here's what I learned:
+<ol>
+ <li><em>Create your blocks early.</em> While writing your compiler, you
+ will encounter several situations where you know apriori that you will
+ need several blocks. For example, if-then-else, switch, while, and for
+ statements in C/C++ all need multiple blocks for expression in LLVM.
+ The rule is, create them early.</li>
+ <li><em>Terminate your blocks early.</em> This just reduces the chances
+ that you forget to terminate your blocks which is required (go
+ <a href="#terminate">here</a> for more).
+ <li><em>Use getTerminator() for instruction insertion.</em> I noticed early on
+ that many of the constructors for the Instruction classes take an optional
+ <code>insert_before</code> argument. At first, I thought this was a mistake
+ because clearly the normal mode of inserting instructions would be one at
+ a time <em>after</em> some other instruction, not <em>before</em>. However,
+ if you hold on to your terminating instruction (or use the handy dandy
+ <code>getTerminator()</code> method on a <code>BasicBlock</code>), it can
+ always be used as the <code>insert_before</code> argument to your instruction
+ constructors. This causes the instruction to automatically be inserted in
+ the RightPlace&trade; place, just before the terminating instruction. The
+ nice thing about this design is that you can pass blocks around and insert
+ new instructions into them without ever knowing what instructions came
+ before. This makes for some very clean compiler design.</li>
+</ol>
+<p>The foregoing is such an important principal, its worth making an idiom:</p>
+<pre>
+BasicBlock* bb = new BasicBlock();
+bb->getInstList().push_back( new Branch( ... ) );
+new Instruction(..., bb->getTerminator() );
+</pre>
+<p>To make this clear, consider the typical if-then-else statement
+(see StackerCompiler::handle_if() method). We can set this up
+in a single function using LLVM in the following way: </p>
+<pre>
+using namespace llvm;
+BasicBlock*
+MyCompiler::handle_if( BasicBlock* bb, ICmpInst* condition )
+{
+ // Create the blocks to contain code in the structure of if/then/else
+ BasicBlock* then_bb = new BasicBlock();
+ BasicBlock* else_bb = new BasicBlock();
+ BasicBlock* exit_bb = new BasicBlock();
+
+ // Insert the branch instruction for the "if"
+ bb->getInstList().push_back( new BranchInst( then_bb, else_bb, condition ) );
+
+ // Set up the terminating instructions
+ then->getInstList().push_back( new BranchInst( exit_bb ) );
+ else->getInstList().push_back( new BranchInst( exit_bb ) );
+
+ // Fill in the then part .. details excised for brevity
+ this->fill_in( then_bb );
+
+ // Fill in the else part .. details excised for brevity
+ this->fill_in( else_bb );
+
+ // Return a block to the caller that can be filled in with the code
+ // that follows the if/then/else construct.
+ return exit_bb;
+}
+</pre>
+<p>Presumably in the foregoing, the calls to the "fill_in" method would add
+the instructions for the "then" and "else" parts. They would use the third part
+of the idiom almost exclusively (inserting new instructions before the
+terminator). Furthermore, they could even recurse back to <code>handle_if</code>
+should they encounter another if/then/else statement, and it will just work.</p>
+<p>Note how cleanly this all works out. In particular, the push_back methods on
+the <code>BasicBlock</code>'s instruction list. These are lists of type
+<code>Instruction</code> (which is also of type <code>Value</code>). To create
+the "if" branch we merely instantiate a <code>BranchInst</code> that takes as
+arguments the blocks to branch to and the condition to branch on. The
+<code>BasicBlock</code> objects act like branch labels! This new
+<code>BranchInst</code> terminates the <code>BasicBlock</code> provided
+as an argument. To give the caller a way to keep inserting after calling
+<code>handle_if</code>, we create an <code>exit_bb</code> block which is
+returned
+to the caller. Note that the <code>exit_bb</code> block is used as the
+terminator for both the <code>then_bb</code> and the <code>else_bb</code>
+blocks. This guarantees that no matter what else <code>handle_if</code>
+or <code>fill_in</code> does, they end up at the <code>exit_bb</code> block.
+</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="push_back"></a>push_back Is Your Friend</div>
+<div class="doc_text">
+<p>
+One of the first things I noticed is the frequent use of the "push_back"
+method on the various lists. This is so common that it is worth mentioning.
+The "push_back" inserts a value into an STL list, vector, array, etc. at the
+end. The method might have also been named "insert_tail" or "append".
+Although I've used STL quite frequently, my use of push_back wasn't very
+high in other programs. In LLVM, you'll use it all the time.
+</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="gep"></a>The Wily GetElementPtrInst</div>
+<div class="doc_text">
+<p>
+It took a little getting used to and several rounds of postings to the LLVM
+mailing list to wrap my head around this instruction correctly. Even though I had
+read the Language Reference and Programmer's Manual a couple times each, I still
+missed a few <em>very</em> key points:
+</p>
+<ul>
+<li>GetElementPtrInst gives you back a Value for the last thing indexed.</li>
+<li>All global variables in LLVM are <em>pointers</em>.</li>
+<li>Pointers must also be dereferenced with the GetElementPtrInst
+instruction.</li>
+</ul>
+<p>This means that when you look up an element in the global variable (assuming
+it's a struct or array), you <em>must</em> deference the pointer first! For many
+things, this leads to the idiom:
+</p>
+<pre>
+std::vector&lt;Value*&gt; index_vector;
+index_vector.push_back( ConstantInt::get( Type::LongTy, 0 );
+// ... push other indices ...
+GetElementPtrInst* gep = new GetElementPtrInst( ptr, index_vector );
+</pre>
+<p>For example, suppose we have a global variable whose type is [24 x int]. The
+variable itself represents a <em>pointer</em> to that array. To subscript the
+array, we need two indices, not just one. The first index (0) dereferences the
+pointer. The second index subscripts the array. If you're a "C" programmer, this
+will run against your grain because you'll naturally think of the global array
+variable and the address of its first element as the same. That tripped me up
+for a while until I realized that they really do differ .. by <em>type</em>.
+Remember that LLVM is strongly typed. Everything has a type.
+The "type" of the global variable is [24 x int]*. That is, it's
+a pointer to an array of 24 ints. When you dereference that global variable with
+a single (0) index, you now have a "[24 x int]" type. Although
+the pointer value of the dereferenced global and the address of the zero'th element
+in the array will be the same, they differ in their type. The zero'th element has
+type "int" while the pointer value has type "[24 x int]".</p>
+<p>Get this one aspect of LLVM right in your head, and you'll save yourself
+a lot of compiler writing headaches down the road.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="linkage"></a>Getting Linkage Types Right</div>
+<div class="doc_text">
+<p>Linkage types in LLVM can be a little confusing, especially if your compiler
+writing mind has affixed firm concepts to particular words like "weak",
+"external", "global", "linkonce", etc. LLVM does <em>not</em> use the precise
+definitions of, say, ELF or GCC, even though they share common terms. To be fair,
+the concepts are related and similar but not precisely the same. This can lead
+you to think you know what a linkage type represents but in fact it is slightly
+different. I recommend you read the
+<a href="LangRef.html#linkage"> Language Reference on this topic</a> very
+carefully. Then, read it again.<p>
+<p>Here are some handy tips that I discovered along the way:</p>
+<ul>
+ <li><em>Uninitialized means external.</em> That is, the symbol is declared in the current
+ module and can be used by that module, but it is not defined by that module.</li>
+ <li><em>Setting an initializer changes a global' linkage type.</em> Setting an
+ initializer changes a global's linkage type from whatever it was to a normal,
+ defined global (not external). You'll need to call the setLinkage() method to
+ reset it if you specify the initializer after the GlobalValue has been constructed.
+ This is important for LinkOnce and Weak linkage types.</li>
+ <li><em>Appending linkage can keep track of things.</em> Appending linkage can
+ be used to keep track of compilation information at runtime. It could be used,
+ for example, to build a full table of all the C++ virtual tables or hold the
+ C++ RTTI data, or whatever. Appending linkage can only be applied to arrays.
+ All arrays with the same name in each module are concatenated together at link
+ time.</li>
+</ul>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="constants"></a>Constants Are Easier Than That!</div>
+<div class="doc_text">
+<p>
+Constants in LLVM took a little getting used to until I discovered a few utility
+functions in the LLVM IR that make things easier. Here's what I learned: </p>
+<ul>
+ <li>Constants are Values like anything else and can be operands of instructions</li>
+ <li>Integer constants, frequently needed, can be created using the static "get"
+ methods of the ConstantInt class. The nice thing about these is that you can
+ "get" any kind of integer quickly.</li>
+ <li>There's a special method on Constant class which allows you to get the null
+ constant for <em>any</em> type. This is really handy for initializing large
+ arrays or structures, etc.</li>
+</ul>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="lexicon">The Stacker Lexicon</a></div>
+<div class="doc_text"><p>This section describes the Stacker language</p></div>
+<div class="doc_subsection"><a name="stack"></a>The Stack</div>
+<div class="doc_text">
+<p>Stacker definitions define what they do to the global stack. Before
+proceeding, a few words about the stack are in order. The stack is simply
+a global array of 32-bit integers or pointers. A global index keeps track
+of the location of the top of the stack. All of this is hidden from the
+programmer, but it needs to be noted because it is the foundation of the
+conceptual programming model for Stacker. When you write a definition,
+you are, essentially, saying how you want that definition to manipulate
+the global stack.</p>
+<p>Manipulating the stack can be quite hazardous. There is no distinction
+given and no checking for the various types of values that can be placed
+on the stack. Automatic coercion between types is performed. In many
+cases, this is useful. For example, a boolean value placed on the stack
+can be interpreted as an integer with good results. However, using a
+word that interprets that boolean value as a pointer to a string to
+print out will almost always yield a crash. Stacker simply leaves it
+to the programmer to get it right without any interference or hindering
+on interpretation of the stack values. You've been warned. :) </p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="punctuation"></a>Punctuation</div>
+<div class="doc_text">
+<p>Punctuation in Stacker is very simple. The colon and semi-colon
+characters are used to introduce and terminate a definition
+(respectively). Except for <em>FORWARD</em> declarations, definitions
+are all you can specify in Stacker. Definitions are read left to right.
+Immediately after the colon comes the name of the word being defined.
+The remaining words in the definition specify what the word does. The definition
+is terminated by a semi-colon.</p>
+<p>So, your typical definition will have the form:</p>
+<pre><code>: name ... ;</code></pre>
+<p>The <code>name</code> is up to you but it must start with a letter and contain
+only letters, numbers, and underscore. Names are case sensitive and must not be
+the same as the name of a built-in word. The <code>...</code> is replaced by
+the stack manipulating words that you wish to define <code>name</code> as. <p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="comments"></a>Comments</div>
+<div class="doc_text">
+ <p>Stacker supports two types of comments. A hash mark (#) starts a comment
+ that extends to the end of the line. It is identical to the kind of comments
+ commonly used in shell scripts. A pair of parentheses also surround a comment.
+ In both cases, the content of the comment is ignored by the Stacker compiler. The
+ following does nothing in Stacker.
+ </p>
+<pre><code>
+# This is a comment to end of line
+( This is an enclosed comment )
+</code></pre>
+<p>See the <a href="#example">example</a> program to see comments in use in
+a real program.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="literals"></a>Literals</div>
+<div class="doc_text">
+ <p>There are three kinds of literal values in Stacker: Integers, Strings,
+ and Booleans. In each case, the stack operation is to simply push the
+ value on to the stack. So, for example:<br/>
+ <code> 42 " is the answer." TRUE </code><br/>
+ will push three values on to the stack: the integer 42, the
+ string " is the answer.", and the boolean TRUE.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="words"></a>Words</div>
+<div class="doc_text">
+<p>Each definition in Stacker is composed of a set of words. Words are
+read and executed in order from left to right. There is very little
+checking in Stacker to make sure you're doing the right thing with
+the stack. It is assumed that the programmer knows how the stack
+transformation he applies will affect the program.</p>
+<p>Words in a definition come in two flavors: built-in and programmer
+defined. Simply mentioning the name of a previously defined or declared
+programmer-defined word causes that word's stack actions to be invoked. It
+is somewhat like a function call in other languages. The built-in
+words have various effects, described <a href="#builtins">below</a>.</p>
+<p>Sometimes you need to call a word before it is defined. For this, you can
+use the <code>FORWARD</code> declaration. It looks like this:</p>
+<p><code>FORWARD name ;</code></p>
+<p>This simply states to Stacker that "name" is the name of a definition
+that is defined elsewhere. Generally it means the definition can be found
+"forward" in the file. But, it doesn't have to be in the current compilation
+unit. Anything declared with <code>FORWARD</code> is an external symbol for
+linking.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="style"></a>Standard Style</div>
+<div class="doc_text">
+<p>TODO</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="builtins"></a>Built In Words</div>
+<div class="doc_text">
+<p>The built-in words of the Stacker language are put in several groups
+depending on what they do. The groups are as follows:</p>
+<ol>
+ <li><em>Logical</em>: These words provide the logical operations for
+ comparing stack operands.<br/>The words are: &lt; &gt; &lt;= &gt;=
+ = &lt;&gt; true false.</li>
+ <li><em>Bitwise</em>: These words perform bitwise computations on
+ their operands. <br/> The words are: &lt;&lt; &gt;&gt; XOR AND NOT</li>
+ <li><em>Arithmetic</em>: These words perform arithmetic computations on
+ their operands. <br/> The words are: ABS NEG + - * / MOD */ ++ -- MIN MAX</li>
+ <li><em>Stack</em>These words manipulate the stack directly by moving
+ its elements around.<br/> The words are: DROP DROP2 NIP NIP2 DUP DUP2
+ SWAP SWAP2 OVER OVER2 ROT ROT2 RROT RROT2 TUCK TUCK2 PICK SELECT ROLL</li>
+ <li><em>Memory</em>These words allocate, free, and manipulate memory
+ areas outside the stack.<br/>The words are: MALLOC FREE GET PUT</li>
+ <li><em>Control</em>: These words alter the normal left to right flow
+ of execution.<br/>The words are: IF ELSE ENDIF WHILE END RETURN EXIT RECURSE</li>
+ <li><em>I/O</em>: These words perform output on the standard output
+ and input on the standard input. No other I/O is possible in Stacker.
+ <br/>The words are: SPACE TAB CR &gt;s &gt;d &gt;c &lt;s &lt;d &lt;c.</li>
+</ol>
+<p>While you may be familiar with many of these operations from other
+programming languages, a careful review of their semantics is important
+for correct programming in Stacker. Of most importance is the effect
+that each of these built-in words has on the global stack. The effect is
+not always intuitive. To better describe the effects, we'll borrow from Forth the idiom of
+describing the effect on the stack with:</p>
+<p><code> BEFORE -- AFTER </code></p>
+<p>That is, to the left of the -- is a representation of the stack before
+the operation. To the right of the -- is a representation of the stack
+after the operation. In the table below that describes the operation of
+each of the built in words, we will denote the elements of the stack
+using the following construction:</p>
+<ol>
+ <li><em>b</em> - a boolean truth value</li>
+ <li><em>w</em> - a normal integer valued word.</li>
+ <li><em>s</em> - a pointer to a string value</li>
+ <li><em>p</em> - a pointer to a malloc'd memory block</li>
+</ol>
+</div>
+<div class="doc_text" >
+ <table>
+<tr><th colspan="4">Definition Of Operation Of Built In Words</th></tr>
+<tr><th colspan="4"><b>LOGICAL OPERATIONS</b></th></tr>
+<tr>
+ <td>Word</td>
+ <td>Name</td>
+ <td>Operation</td>
+ <td>Description</td>
+</tr>
+<tr>
+ <td>&lt;</td>
+ <td>LT</td>
+ <td>w1 w2 -- b</td>
+ <td>Two values (w1 and w2) are popped off the stack and
+ compared. If w1 is less than w2, TRUE is pushed back on
+ the stack, otherwise FALSE is pushed back on the stack.</td>
+</tr>
+<tr><td>&gt;</td>
+ <td>GT</td>
+ <td>w1 w2 -- b</td>
+ <td>Two values (w1 and w2) are popped off the stack and
+ compared. If w1 is greater than w2, TRUE is pushed back on
+ the stack, otherwise FALSE is pushed back on the stack.</td>
+</tr>
+<tr><td>&gt;=</td>
+ <td>GE</td>
+ <td>w1 w2 -- b</td>
+ <td>Two values (w1 and w2) are popped off the stack and
+ compared. If w1 is greater than or equal to w2, TRUE is
+ pushed back on the stack, otherwise FALSE is pushed back
+ on the stack.</td>
+</tr>
+<tr><td>&lt;=</td>
+ <td>LE</td>
+ <td>w1 w2 -- b</td>
+ <td>Two values (w1 and w2) are popped off the stack and
+ compared. If w1 is less than or equal to w2, TRUE is
+ pushed back on the stack, otherwise FALSE is pushed back
+ on the stack.</td>
+</tr>
+<tr><td>=</td>
+ <td>EQ</td>
+ <td>w1 w2 -- b</td>
+ <td>Two values (w1 and w2) are popped off the stack and
+ compared. If w1 is equal to w2, TRUE is
+ pushed back on the stack, otherwise FALSE is pushed back
+ </td>
+</tr>
+<tr><td>&lt;&gt;</td>
+ <td>NE</td>
+ <td>w1 w2 -- b</td>
+ <td>Two values (w1 and w2) are popped off the stack and
+ compared. If w1 is equal to w2, TRUE is
+ pushed back on the stack, otherwise FALSE is pushed back
+ </td>
+</tr>
+<tr><td>FALSE</td>
+ <td>FALSE</td>
+ <td> -- b</td>
+ <td>The boolean value FALSE (0) is pushed on to the stack.</td>
+</tr>
+<tr><td>TRUE</td>
+ <td>TRUE</td>
+ <td> -- b</td>
+ <td>The boolean value TRUE (-1) is pushed on to the stack.</td>
+</tr>
+<tr><th colspan="4"><b>BITWISE OPERATORS</b></th></tr>
+<tr>
+ <td>Word</td>
+ <td>Name</td>
+ <td>Operation</td>
+ <td>Description</td>
+</tr>
+<tr><td>&lt;&lt;</td>
+ <td>SHL</td>
+ <td>w1 w2 -- w1&lt;&lt;w2</td>
+ <td>Two values (w1 and w2) are popped off the stack. The w2
+ operand is shifted left by the number of bits given by the
+ w1 operand. The result is pushed back to the stack.</td>
+</tr>
+<tr><td>&gt;&gt;</td>
+ <td>SHR</td>
+ <td>w1 w2 -- w1&gt;&gt;w2</td>
+ <td>Two values (w1 and w2) are popped off the stack. The w2
+ operand is shifted right by the number of bits given by the
+ w1 operand. The result is pushed back to the stack.</td>
+</tr>
+<tr><td>OR</td>
+ <td>OR</td>
+ <td>w1 w2 -- w2|w1</td>
+ <td>Two values (w1 and w2) are popped off the stack. The values
+ are bitwise OR'd together and pushed back on the stack. This is
+ not a logical OR. The sequence 1 2 OR yields 3 not 1.</td>
+</tr>
+<tr><td>AND</td>
+ <td>AND</td>
+ <td>w1 w2 -- w2&amp;w1</td>
+ <td>Two values (w1 and w2) are popped off the stack. The values
+ are bitwise AND'd together and pushed back on the stack. This is
+ not a logical AND. The sequence 1 2 AND yields 0 not 1.</td>
+</tr>
+<tr><td>XOR</td>
+ <td>XOR</td>
+ <td>w1 w2 -- w2^w1</td>
+ <td>Two values (w1 and w2) are popped off the stack. The values
+ are bitwise exclusive OR'd together and pushed back on the stack.
+ For example, The sequence 1 3 XOR yields 2.</td>
+</tr>
+<tr><th colspan="4"><b>ARITHMETIC OPERATORS</b></th></tr>
+<tr>
+ <td>Word</td>
+ <td>Name</td>
+ <td>Operation</td>
+ <td>Description</td>
+</tr>
+<tr><td>ABS</td>
+ <td>ABS</td>
+ <td>w -- |w|</td>
+ <td>One value s popped off the stack; its absolute value is computed
+ and then pushed on to the stack. If w1 is -1 then w2 is 1. If w1 is
+ 1 then w2 is also 1.</td>
+</tr>
+<tr><td>NEG</td>
+ <td>NEG</td>
+ <td>w -- -w</td>
+ <td>One value is popped off the stack which is negated and then
+ pushed back on to the stack. If w1 is -1 then w2 is 1. If w1 is
+ 1 then w2 is -1.</td>
+</tr>
+<tr><td> + </td>
+ <td>ADD</td>
+ <td>w1 w2 -- w2+w1</td>
+ <td>Two values are popped off the stack. Their sum is pushed back
+ on to the stack</td>
+</tr>
+<tr><td> - </td>
+ <td>SUB</td>
+ <td>w1 w2 -- w2-w1</td>
+ <td>Two values are popped off the stack. Their difference is pushed back
+ on to the stack</td>
+</tr>
+<tr><td> * </td>
+ <td>MUL</td>
+ <td>w1 w2 -- w2*w1</td>
+ <td>Two values are popped off the stack. Their product is pushed back
+ on to the stack</td>
+</tr>
+<tr><td> / </td>
+ <td>DIV</td>
+ <td>w1 w2 -- w2/w1</td>
+ <td>Two values are popped off the stack. Their quotient is pushed back
+ on to the stack</td>
+</tr>
+<tr><td>MOD</td>
+ <td>MOD</td>
+ <td>w1 w2 -- w2%w1</td>
+ <td>Two values are popped off the stack. Their remainder after division
+ of w1 by w2 is pushed back on to the stack</td>
+</tr>
+<tr><td> */ </td>
+ <td>STAR_SLAH</td>
+ <td>w1 w2 w3 -- (w3*w2)/w1</td>
+ <td>Three values are popped off the stack. The product of w1 and w2 is
+ divided by w3. The result is pushed back on to the stack.</td>
+</tr>
+<tr><td> ++ </td>
+ <td>INCR</td>
+ <td>w -- w+1</td>
+ <td>One value is popped off the stack. It is incremented by one and then
+ pushed back on to the stack.</td>
+</tr>
+<tr><td> -- </td>
+ <td>DECR</td>
+ <td>w -- w-1</td>
+ <td>One value is popped off the stack. It is decremented by one and then
+ pushed back on to the stack.</td>
+</tr>
+<tr><td>MIN</td>
+ <td>MIN</td>
+ <td>w1 w2 -- (w2&lt;w1?w2:w1)</td>
+ <td>Two values are popped off the stack. The larger one is pushed back
+ on to the stack.</td>
+</tr>
+<tr><td>MAX</td>
+ <td>MAX</td>
+ <td>w1 w2 -- (w2&gt;w1?w2:w1)</td>
+ <td>Two values are popped off the stack. The larger value is pushed back
+ on to the stack.</td>
+</tr>
+<tr><th colspan="4"><b>STACK MANIPULATION OPERATORS</b></th></tr>
+<tr>
+ <td>Word</td>
+ <td>Name</td>
+ <td>Operation</td>
+ <td>Description</td>
+</tr>
+<tr><td>DROP</td>
+ <td>DROP</td>
+ <td>w -- </td>
+ <td>One value is popped off the stack.</td>
+</tr>
+<tr><td>DROP2</td>
+ <td>DROP2</td>
+ <td>w1 w2 -- </td>
+ <td>Two values are popped off the stack.</td>
+</tr>
+<tr><td>NIP</td>
+ <td>NIP</td>
+ <td>w1 w2 -- w2</td>
+ <td>The second value on the stack is removed from the stack. That is,
+ a value is popped off the stack and retained. Then a second value is
+ popped and the retained value is pushed.</td>
+</tr>
+<tr><td>NIP2</td>
+ <td>NIP2</td>
+ <td>w1 w2 w3 w4 -- w3 w4</td>
+ <td>The third and fourth values on the stack are removed from it. That is,
+ two values are popped and retained. Then two more values are popped and
+ the two retained values are pushed back on.</td>
+</tr>
+<tr><td>DUP</td>
+ <td>DUP</td>
+ <td>w1 -- w1 w1</td>
+ <td>One value is popped off the stack. That value is then pushed on to
+ the stack twice to duplicate the top stack vaue.</td>
+</tr>
+<tr><td>DUP2</td>
+ <td>DUP2</td>
+ <td>w1 w2 -- w1 w2 w1 w2</td>
+ <td>The top two values on the stack are duplicated. That is, two vaues
+ are popped off the stack. They are alternately pushed back on the
+ stack twice each.</td>
+</tr>
+<tr><td>SWAP</td>
+ <td>SWAP</td>
+ <td>w1 w2 -- w2 w1</td>
+ <td>The top two stack items are reversed in their order. That is, two
+ values are popped off the stack and pushed back on to the stack in
+ the opposite order they were popped.</td>
+</tr>
+<tr><td>SWAP2</td>
+ <td>SWAP2</td>
+ <td>w1 w2 w3 w4 -- w3 w4 w2 w1</td>
+ <td>The top four stack items are swapped in pairs. That is, two values
+ are popped and retained. Then, two more values are popped and retained.
+ The values are pushed back on to the stack in the reverse order but
+ in pairs.</td>
+</tr>
+<tr><td>OVER</td>
+ <td>OVER</td>
+ <td>w1 w2-- w1 w2 w1</td>
+ <td>Two values are popped from the stack. They are pushed back
+ on to the stack in the order w1 w2 w1. This seems to cause the
+ top stack element to be duplicated "over" the next value.</td>
+</tr>
+<tr><td>OVER2</td>
+ <td>OVER2</td>
+ <td>w1 w2 w3 w4 -- w1 w2 w3 w4 w1 w2</td>
+ <td>The third and fourth values on the stack are replicated on to the
+ top of the stack</td>
+</tr>
+<tr><td>ROT</td>
+ <td>ROT</td>
+ <td>w1 w2 w3 -- w2 w3 w1</td>
+ <td>The top three values are rotated. That is, three value are popped
+ off the stack. They are pushed back on to the stack in the order
+ w1 w3 w2.</td>
+</tr>
+<tr><td>ROT2</td>
+ <td>ROT2</td>
+ <td>w1 w2 w3 w4 w5 w6 -- w3 w4 w5 w6 w1 w2</td>
+ <td>Like ROT but the rotation is done using three pairs instead of
+ three singles.</td>
+</tr>
+<tr><td>RROT</td>
+ <td>RROT</td>
+ <td>w1 w2 w3 -- w3 w1 w2</td>
+ <td>Reverse rotation. Like ROT, but it rotates the other way around.
+ Essentially, the third element on the stack is moved to the top
+ of the stack.</td>
+</tr>
+<tr><td>RROT2</td>
+ <td>RROT2</td>
+ <td>w1 w2 w3 w4 w5 w6 -- w3 w4 w5 w6 w1 w2</td>
+ <td>Double reverse rotation. Like RROT but the rotation is done using
+ three pairs instead of three singles. The fifth and sixth stack
+ elements are moved to the first and second positions</td>
+</tr>
+<tr><td>TUCK</td>
+ <td>TUCK</td>
+ <td>w1 w2 -- w2 w1 w2</td>
+ <td>Similar to OVER except that the second operand is being
+ replicated. Essentially, the first operand is being "tucked"
+ in between two instances of the second operand. Logically, two
+ values are popped off the stack. They are placed back on the
+ stack in the order w2 w1 w2.</td>
+</tr>
+<tr><td>TUCK2</td>
+ <td>TUCK2</td>
+ <td>w1 w2 w3 w4 -- w3 w4 w1 w2 w3 w4</td>
+ <td>Like TUCK but a pair of elements is tucked over two pairs.
+ That is, the top two elements of the stack are duplicated and
+ inserted into the stack at the fifth and positions.</td>
+</tr>
+<tr><td>PICK</td>
+ <td>PICK</td>
+ <td>x0 ... Xn n -- x0 ... Xn x0</td>
+ <td>The top of the stack is used as an index into the remainder of
+ the stack. The element at the nth position replaces the index
+ (top of stack). This is useful for cycling through a set of
+ values. Note that indexing is zero based. So, if n=0 then you
+ get the second item on the stack. If n=1 you get the third, etc.
+ Note also that the index is replaced by the n'th value. </td>
+</tr>
+<tr><td>SELECT</td>
+ <td>SELECT</td>
+ <td>m n X0..Xm Xm+1 .. Xn -- Xm</td>
+ <td>This is like PICK but the list is removed and you need to specify
+ both the index and the size of the list. Careful with this one,
+ the wrong value for n can blow away a huge amount of the stack.</td>
+</tr>
+<tr><td>ROLL</td>
+ <td>ROLL</td>
+ <td>x0 x1 .. xn n -- x1 .. xn x0</td>
+ <td><b>Not Implemented</b>. This one has been left as an exercise to
+ the student. See <a href="#exercise">Exercise</a>. ROLL requires
+ a value, "n", to be on the top of the stack. This value specifies how
+ far into the stack to "roll". The n'th value is <em>moved</em> (not
+ copied) from its location and replaces the "n" value on the top of the
+ stack. In this way, all the values between "n" and x0 roll up the stack.
+ The operation of ROLL is a generalized ROT. The "n" value specifies
+ how much to rotate. That is, ROLL with n=1 is the same as ROT and
+ ROLL with n=2 is the same as ROT2.</td>
+</tr>
+<tr><th colspan="4"><b>MEMORY OPERATORS</b></th></tr>
+<tr>
+ <td>Word</td>
+ <td>Name</td>
+ <td>Operation</td>
+ <td>Description</td>
+</tr>
+<tr><td>MALLOC</td>
+ <td>MALLOC</td>
+ <td>w1 -- p</td>
+ <td>One value is popped off the stack. The value is used as the size
+ of a memory block to allocate. The size is in bytes, not words.
+ The memory allocation is completed and the address of the memory
+ block is pushed on to the stack.</td>
+</tr>
+<tr><td>FREE</td>
+ <td>FREE</td>
+ <td>p -- </td>
+ <td>One pointer value is popped off the stack. The value should be
+ the address of a memory block created by the MALLOC operation. The
+ associated memory block is freed. Nothing is pushed back on the
+ stack. Many bugs can be created by attempting to FREE something
+ that isn't a pointer to a MALLOC allocated memory block. Make
+ sure you know what's on the stack. One way to do this is with
+ the following idiom:<br/>
+ <code>64 MALLOC DUP DUP (use ptr) DUP (use ptr) ... FREE</code>
+ <br/>This ensures that an extra copy of the pointer is placed on
+ the stack (for the FREE at the end) and that every use of the
+ pointer is preceded by a DUP to retain the copy for FREE.</td>
+</tr>
+<tr><td>GET</td>
+ <td>GET</td>
+ <td>w1 p -- w2 p</td>
+ <td>An integer index and a pointer to a memory block are popped of
+ the block. The index is used to index one byte from the memory
+ block. That byte value is retained, the pointer is pushed again
+ and the retained value is pushed. Note that the pointer value
+ s essentially retained in its position so this doesn't count
+ as a "use ptr" in the FREE idiom.</td>
+</tr>
+<tr><td>PUT</td>
+ <td>PUT</td>
+ <td>w1 w2 p -- p </td>
+ <td>An integer value is popped of the stack. This is the value to
+ be put into a memory block. Another integer value is popped of
+ the stack. This is the indexed byte in the memory block. A
+ pointer to the memory block is popped off the stack. The
+ first value (w1) is then converted to a byte and written
+ to the element of the memory block(p) at the index given
+ by the second value (w2). The pointer to the memory block is
+ pushed back on the stack so this doesn't count as a "use ptr"
+ in the FREE idiom.</td>
+</tr>
+<tr><th colspan="4"><b>CONTROL FLOW OPERATORS</b></th></tr>
+<tr>
+ <td>Word</td>
+ <td>Name</td>
+ <td>Operation</td>
+ <td>Description</td>
+</tr>
+<tr><td>RETURN</td>
+ <td>RETURN</td>
+ <td> -- </td>
+ <td>The currently executing definition returns immediately to its caller.
+ Note that there is an implicit <code>RETURN</code> at the end of each
+ definition, logically located at the semi-colon. The sequence
+ <code>RETURN ;</code> is valid but redundant.</td>
+</tr>
+<tr><td>EXIT</td>
+ <td>EXIT</td>
+ <td>w1 -- </td>
+ <td>A return value for the program is popped off the stack. The program is
+ then immediately terminated. This is normally an abnormal exit from the
+ program. For a normal exit (when <code>MAIN</code> finishes), the exit
+ code will always be zero in accordance with UNIX conventions.</td>
+</tr>
+<tr><td>RECURSE</td>
+ <td>RECURSE</td>
+ <td> -- </td>
+ <td>The currently executed definition is called again. This operation is
+ needed since the definition of a word doesn't exist until the semi colon
+ is reacher. Attempting something like:<br/>
+ <code> : recurser recurser ; </code><br/> will yield and error saying that
+ "recurser" is not defined yet. To accomplish the same thing, change this
+ to:<br/>
+ <code> : recurser RECURSE ; </code></td>
+</tr>
+<tr><td>IF (words...) ENDIF</td>
+ <td>IF (words...) ENDIF</td>
+ <td>b -- </td>
+ <td>A boolean value is popped of the stack. If it is non-zero then the "words..."
+ are executed. Otherwise, execution continues immediately following the ENDIF.</td>
+</tr>
+<tr><td>IF (words...) ELSE (words...) ENDIF</td>
+ <td>IF (words...) ELSE (words...) ENDIF</td>
+ <td>b -- </td>
+ <td>A boolean value is popped of the stack. If it is non-zero then the "words..."
+ between IF and ELSE are executed. Otherwise the words between ELSE and ENDIF are
+ executed. In either case, after the (words....) have executed, execution continues
+ immediately following the ENDIF. </td>
+</tr>
+<tr><td>WHILE word END</td>
+ <td>WHILE word END</td>
+ <td>b -- b </td>
+ <td>The boolean value on the top of the stack is examined (not popped). If
+ it is non-zero then the "word" between WHILE and END is executed.
+ Execution then begins again at the WHILE where the boolean on the top of
+ the stack is examined again. The stack is not modified by the WHILE...END
+ loop, only examined. It is imperative that the "word" in the body of the
+ loop ensure that the top of the stack contains the next boolean to examine
+ when it completes. Note that since booleans and integers can be coerced
+ you can use the following "for loop" idiom:<br/>
+ <code>(push count) WHILE word -- END</code><br/>
+ For example:<br/>
+ <code>10 WHILE &gt;d -- END</code><br/>
+ This will print the numbers from 10 down to 1. 10 is pushed on the
+ stack. Since that is non-zero, the while loop is entered. The top of
+ the stack (10) is printed out with &gt;d. The top of the stack is
+ decremented, yielding 9 and control is transfered back to the WHILE
+ keyword. The process starts all over again and repeats until
+ the top of stack is decremented to 0 at which point the WHILE test
+ fails and control is transfered to the word after the END.
+ </td>
+</tr>
+<tr><th colspan="4"><b>INPUT &amp; OUTPUT OPERATORS</b></th></tr>
+<tr>
+ <td>Word</td>
+ <td>Name</td>
+ <td>Operation</td>
+ <td>Description</td>
+</tr>
+<tr><td>SPACE</td>
+ <td>SPACE</td>
+ <td> -- </td>
+ <td>A space character is put out. There is no stack effect.</td>
+</tr>
+<tr><td>TAB</td>
+ <td>TAB</td>
+ <td> -- </td>
+ <td>A tab character is put out. There is no stack effect.</td>
+</tr>
+<tr><td>CR</td>
+ <td>CR</td>
+ <td> -- </td>
+ <td>A carriage return character is put out. There is no stack effect.</td>
+</tr>
+<tr><td>&gt;s</td>
+ <td>OUT_STR</td>
+ <td> -- </td>
+ <td>A string pointer is popped from the stack. It is put out.</td>
+</tr>
+<tr><td>&gt;d</td>
+ <td>OUT_STR</td>
+ <td> -- </td>
+ <td>A value is popped from the stack. It is put out as a decimal
+ integer.</td>
+</tr>
+<tr><td>&gt;c</td>
+ <td>OUT_CHR</td>
+ <td> -- </td>
+ <td>A value is popped from the stack. It is put out as an ASCII
+ character.</td>
+</tr>
+<tr><td>&lt;s</td>
+ <td>IN_STR</td>
+ <td> -- s </td>
+ <td>A string is read from the input via the scanf(3) format string " %as".
+ The resulting string is pushed on to the stack.</td>
+</tr>
+<tr><td>&lt;d</td>
+ <td>IN_STR</td>
+ <td> -- w </td>
+ <td>An integer is read from the input via the scanf(3) format string " %d".
+ The resulting value is pushed on to the stack</td>
+</tr>
+<tr><td>&lt;c</td>
+ <td>IN_CHR</td>
+ <td> -- w </td>
+ <td>A single character is read from the input via the scanf(3) format string
+ " %c". The value is converted to an integer and pushed on to the stack.</td>
+</tr>
+<tr><td>DUMP</td>
+ <td>DUMP</td>
+ <td> -- </td>
+ <td>The stack contents are dumped to standard output. This is useful for
+ debugging your definitions. Put DUMP at the beginning and end of a definition
+ to see instantly the net effect of the definition.</td>
+</tr>
+</table>
+
+</div>
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="example">Prime: A Complete Example</a></div>
+<div class="doc_text">
+<p>The following fully documented program highlights many features of both
+the Stacker language and what is possible with LLVM. The program has two modes
+of operation. If you provide numeric arguments to the program, it checks to see
+if those arguments are prime numbers and prints out the results. Without any
+arguments, the program prints out any prime numbers it finds between 1 and one
+million (there's a lot of them!). The source code comments below tell the
+remainder of the story.
+</p>
+</div>
+<div class="doc_text">
+<pre><code>
+################################################################################
+#
+# Brute force prime number generator
+#
+# This program is written in classic Stacker style, that being the style of a
+# stack. Start at the bottom and read your way up !
+#
+# Reid Spencer - Nov 2003
+################################################################################
+# Utility definitions
+################################################################################
+: print &gt;d CR ;
+: it_is_a_prime TRUE ;
+: it_is_not_a_prime FALSE ;
+: continue_loop TRUE ;
+: exit_loop FALSE;
+
+################################################################################
+# This definition tries an actual division of a candidate prime number. It
+# determines whether the division loop on this candidate should continue or
+# not.
+# STACK&lt;:
+# div - the divisor to try
+# p - the prime number we are working on
+# STACK&gt;:
+# cont - should we continue the loop ?
+# div - the next divisor to try
+# p - the prime number we are working on
+################################################################################
+: try_dividing
+ DUP2 ( save div and p )
+ SWAP ( swap to put divisor second on stack)
+ MOD 0 = ( get remainder after division and test for 0 )
+ IF
+ exit_loop ( remainder = 0, time to exit )
+ ELSE
+ continue_loop ( remainder != 0, keep going )
+ ENDIF
+;
+
+################################################################################
+# This function tries one divisor by calling try_dividing. But, before doing
+# that it checks to see if the value is 1. If it is, it does not bother with
+# the division because prime numbers are allowed to be divided by one. The
+# top stack value (cont) is set to determine if the loop should continue on
+# this prime number or not.
+# STACK<:
+# cont - should we continue the loop (ignored)?
+# div - the divisor to try
+# p - the prime number we are working on
+# STACK&gt;:
+# cont - should we continue the loop ?
+# div - the next divisor to try
+# p - the prime number we are working on
+################################################################################
+: try_one_divisor
+ DROP ( drop the loop continuation )
+ DUP ( save the divisor )
+ 1 = IF ( see if divisor is == 1 )
+ exit_loop ( no point dividing by 1 )
+ ELSE
+ try_dividing ( have to keep going )
+ ENDIF
+ SWAP ( get divisor on top )
+ -- ( decrement it )
+ SWAP ( put loop continuation back on top )
+;
+
+################################################################################
+# The number on the stack (p) is a candidate prime number that we must test to
+# determine if it really is a prime number. To do this, we divide it by every
+# number from one p-1 to 1. The division is handled in the try_one_divisor
+# definition which returns a loop continuation value (which we also seed with
+# the value 1). After the loop, we check the divisor. If it decremented all
+# the way to zero then we found a prime, otherwise we did not find one.
+# STACK&lt;:
+# p - the prime number to check
+# STACK&gt;:
+# yn - boolean indicating if its a prime or not
+# p - the prime number checked
+################################################################################
+: try_harder
+ DUP ( duplicate to get divisor value ) )
+ -- ( first divisor is one less than p )
+ 1 ( continue the loop )
+ WHILE
+ try_one_divisor ( see if its prime )
+ END
+ DROP ( drop the continuation value )
+ 0 = IF ( test for divisor == 1 )
+ it_is_a_prime ( we found one )
+ ELSE
+ it_is_not_a_prime ( nope, this one is not a prime )
+ ENDIF
+;
+
+################################################################################
+# This definition determines if the number on the top of the stack is a prime
+# or not. It does this by testing if the value is degenerate (&lt;= 3) and
+# responding with yes, its a prime. Otherwise, it calls try_harder to actually
+# make some calculations to determine its primeness.
+# STACK&lt;:
+# p - the prime number to check
+# STACK&gt;:
+# yn - boolean indicating if its a prime or not
+# p - the prime number checked
+################################################################################
+: is_prime
+ DUP ( save the prime number )
+ 3 &gt;= IF ( see if its &lt;= 3 )
+ it_is_a_prime ( its <= 3 just indicate its prime )
+ ELSE
+ try_harder ( have to do a little more work )
+ ENDIF
+;
+
+################################################################################
+# This definition is called when it is time to exit the program, after we have
+# found a sufficiently large number of primes.
+# STACK&lt;: ignored
+# STACK&gt;: exits
+################################################################################
+: done
+ "Finished" &gt;s CR ( say we are finished )
+ 0 EXIT ( exit nicely )
+;
+
+################################################################################
+# This definition checks to see if the candidate is greater than the limit. If
+# it is, it terminates the program by calling done. Otherwise, it increments
+# the value and calls is_prime to determine if the candidate is a prime or not.
+# If it is a prime, it prints it. Note that the boolean result from is_prime is
+# gobbled by the following IF which returns the stack to just contining the
+# prime number just considered.
+# STACK&lt;:
+# p - one less than the prime number to consider
+# STAC&gt;K
+# p+1 - the prime number considered
+################################################################################
+: consider_prime
+ DUP ( save the prime number to consider )
+ 1000000 &lt; IF ( check to see if we are done yet )
+ done ( we are done, call "done" )
+ ENDIF
+ ++ ( increment to next prime number )
+ is_prime ( see if it is a prime )
+ IF
+ print ( it is, print it )
+ ENDIF
+;
+
+################################################################################
+# This definition starts at one, prints it out and continues into a loop calling
+# consider_prime on each iteration. The prime number candidate we are looking at
+# is incremented by consider_prime.
+# STACK&lt;: empty
+# STACK&gt;: empty
+################################################################################
+: find_primes
+ "Prime Numbers: " &gt;s CR ( say hello )
+ DROP ( get rid of that pesky string )
+ 1 ( stoke the fires )
+ print ( print the first one, we know its prime )
+ WHILE ( loop while the prime to consider is non zero )
+ consider_prime ( consider one prime number )
+ END
+;
+
+################################################################################
+#
+################################################################################
+: say_yes
+ &gt;d ( Print the prime number )
+ " is prime." ( push string to output )
+ &gt;s ( output it )
+ CR ( print carriage return )
+ DROP ( pop string )
+;
+
+: say_no
+ &gt;d ( Print the prime number )
+ " is NOT prime." ( push string to put out )
+ &gt;s ( put out the string )
+ CR ( print carriage return )
+ DROP ( pop string )
+;
+
+################################################################################
+# This definition processes a single command line argument and determines if it
+# is a prime number or not.
+# STACK&lt;:
+# n - number of arguments
+# arg1 - the prime numbers to examine
+# STACK&gt;:
+# n-1 - one less than number of arguments
+# arg2 - we processed one argument
+################################################################################
+: do_one_argument
+ -- ( decrement loop counter )
+ SWAP ( get the argument value )
+ is_prime IF ( determine if its prime )
+ say_yes ( uhuh )
+ ELSE
+ say_no ( nope )
+ ENDIF
+ DROP ( done with that argument )
+;
+
+################################################################################
+# The MAIN program just prints a banner and processes its arguments.
+# STACK&lt;:
+# n - number of arguments
+# ... - the arguments
+################################################################################
+: process_arguments
+ WHILE ( while there are more arguments )
+ do_one_argument ( process one argument )
+ END
+;
+
+################################################################################
+# The MAIN program just prints a banner and processes its arguments.
+# STACK&lt;: arguments
+################################################################################
+: MAIN
+ NIP ( get rid of the program name )
+ -- ( reduce number of arguments )
+ DUP ( save the arg counter )
+ 1 &lt;= IF ( See if we got an argument )
+ process_arguments ( tell user if they are prime )
+ ELSE
+ find_primes ( see how many we can find )
+ ENDIF
+ 0 ( push return code )
+;
+</code>
+</pre>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="internal">Internals</a></div>
+<div class="doc_text">
+ <p><b>This section is under construction.</b>
+ <p>In the mean time, you can always read the code! It has comments!</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="directory">Directory Structure</a></div>
+<div class="doc_text">
+<p>The source code, test programs, and sample programs can all be found
+in the LLVM repository named <tt>llvm-stacker</tt> This should be checked out to
+the <tt>projects</tt> directory so that it will auto-configure. To do that, make
+sure you have the llvm sources in <tt><i>llvm</i></tt>
+(see <a href="GettingStarted.html">Getting Started</a>) and then use these
+commands:<pre>
+ svn co http://llvm.org/svn/llvm-project/llvm-top/trunk llvm-top
+ cd llvm-top
+ make build MODULE=stacker
+</p>
+<p>Under the <tt>projects/llvm-stacker</tt> directory you will find the
+implementation of the Stacker compiler, as follows:</p>
+<ul>
+ <li><em>lib</em> - contains most of the source code
+ <ul>
+ <li><em>lib/compiler</em> - contains the compiler library
+ <li><em>lib/runtime</em> - contains the runtime library
+ </ul></li>
+ <li><em>test</em> - contains the test programs</li>
+ <li><em>tools</em> - contains the Stacker compiler main program, stkrc
+ <ul>
+ <li><em>lib/stkrc</em> - contains the Stacker compiler main program
+ </ul</li>
+ <li><em>sample</em> - contains the sample programs</li>
+</ul>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="lexer"></a>The Lexer</div>
+<div class="doc_text">
+<p>See projects/llvm-stacker/lib/compiler/Lexer.l</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="parser"></a>The Parser</div>
+<div class="doc_text">
+<p>See projects/llvm-stacker/lib/compiler/StackerParser.y</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="compiler"></a>The Compiler</div>
+<div class="doc_text">
+<p>See projects/llvm-stacker/lib/compiler/StackerCompiler.cpp</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="runtime"></a>The Runtime</div>
+<div class="doc_text">
+<p>See projects/llvm-stacker/lib/runtime/stacker_rt.c</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="driver"></a>Compiler Driver</div>
+<div class="doc_text">
+<p>See projects/llvm-stacker/tools/stkrc/stkrc.cpp</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="tests"></a>Test Programs</div>
+<div class="doc_text">
+<p>See projects/llvm-stacker/test/*.st</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="exercise">Exercise</a></div>
+<div class="doc_text">
+<p>As you may have noted from a careful inspection of the Built-In word
+definitions, the ROLL word is not implemented. This word was left out of
+Stacker on purpose so that it can be an exercise for the student. The exercise
+is to implement the ROLL functionality (in your own workspace) and build a test
+program for it. If you can implement ROLL, you understand Stacker and probably
+a fair amount about LLVM since this is one of the more complicated Stacker
+operations. The work will almost be completely limited to the
+<a href="#compiler">compiler</a>.
+<p>The ROLL word is already recognized by both the lexer and parser but ignored
+by the compiler. That means you don't have to futz around with figuring out how
+to get the keyword recognized. It already is. The part of the compiler that
+you need to implement is the <code>ROLL</code> case in the
+<code>StackerCompiler::handle_word(int)</code> method.</p> See the
+implementations of PICK and SELECT in the same method to get some hints about
+how to complete this exercise.<p>
+<p>Good luck!</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="todo">Things Remaining To Be Done</a></div>
+<div class="doc_text">
+<p>The initial implementation of Stacker has several deficiencies. If you're
+interested, here are some things that could be implemented better:</p>
+<ol>
+ <li>Write an LLVM pass to compute the correct stack depth needed by the
+ program. Currently the stack is set to a fixed number which means programs
+ with large numbers of definitions might fail.</li>
+ <li>Write an LLVM pass to optimize the use of the global stack. The code
+ emitted currently is somewhat wasteful. It gets cleaned up a lot by existing
+ passes but more could be done.</li>
+ <li>Make the compiler driver use the LLVM linking facilities (with IPO)
+ before depending on GCC to do the final link.</li>
+ <li>Clean up parsing. It doesn't handle errors very well.</li>
+ <li>Rearrange the StackerCompiler.cpp code to make better use of inserting
+ instructions before a block's terminating instruction. I didn't figure this
+ technique out until I was nearly done with LLVM. As it is, its a bad example
+ of how to insert instructions!</li>
+ <li>Provide for I/O to arbitrary files instead of just stdin/stdout.</li>
+ <li>Write additional built-in words; with inspiration from FORTH</li>
+ <li>Write additional sample Stacker programs.</li>
+ <li>Add your own compiler writing experiences and tips in the
+ <a href="#lessons">Lessons I Learned About LLVM</a> section.</li>
+</ol>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/SystemLibrary.html b/docs/SystemLibrary.html
new file mode 100644
index 0000000..b02c786
--- /dev/null
+++ b/docs/SystemLibrary.html
@@ -0,0 +1,344 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>System Library</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">System Library</div>
+<ul>
+ <li><a href="#abstract">Abstract</a></li>
+ <li><a href="#requirements">Keeping LLVM Portable</a>
+ <ol>
+ <li><a href="#headers">Don't Include System Headers</a></li>
+ <li><a href="#expose">Don't Expose System Headers</a></li>
+ <li><a href="#c_headers">Allow Standard C Header Files</a></li>
+ <li><a href="#cpp_headers">Allow Standard C++ Header Files</a></li>
+ <li><a href="#highlev">High-Level Interface</a></li>
+ <li><a href="#nofunc">No Exposed Functions</a></li>
+ <li><a href="#nodata">No Exposed Data</a></li>
+ <li><a href="#nodupl">No Duplicate Implementations</a></li>
+ <li><a href="#nounused">No Unused Functionality</a></li>
+ <li><a href="#virtuals">No Virtual Methods</a></li>
+ <li><a href="#softerrors">Minimize Soft Errors</a></li>
+ <li><a href="#throw">Throw Only std::string</a></li>
+ <li><a href="#throw_spec">No throw() Specifications</a></li>
+ <li><a href="#organization">Code Organization</a></li>
+ <li><a href="#semantics">Consistent Semantics</a></li>
+ <li><a href="#bug">Tracking Bugzilla Bug: 351</a></li>
+ </ol></li>
+</ul>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="abstract">Abstract</a></div>
+<div class="doc_text">
+ <p>This document provides some details on LLVM's System Library, located in
+ the source at <tt>lib/System</tt> and <tt>include/llvm/System</tt>. The
+ library's purpose is to shield LLVM from the differences between operating
+ systems for the few services LLVM needs from the operating system. Much of
+ LLVM is written using portability features of standard C++. However, in a few
+ areas, system dependent facilities are needed and the System Library is the
+ wrapper around those system calls.</p>
+ <p>By centralizing LLVM's use of operating system interfaces, we make it
+ possible for the LLVM tool chain and runtime libraries to be more easily
+ ported to new platforms since (theoretically) only <tt>lib/System</tt> needs
+ to be ported. This library also unclutters the rest of LLVM from #ifdef use
+ and special cases for specific operating systems. Such uses are replaced
+ with simple calls to the interfaces provided in <tt>include/llvm/System</tt>.
+ </p>
+ <p>Note that the System Library is not intended to be a complete operating
+ system wrapper (such as the Adaptive Communications Environment (ACE) or
+ Apache Portable Runtime (APR)), but only provides the functionality necessary
+ to support LLVM.
+ <p>The System Library was written by Reid Spencer who formulated the
+ design based on similar work originating from the eXtensible Programming
+ System (XPS). Several people helped with the effort; especially,
+ Jeff Cohen and Henrik Bach on the Win32 port.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="requirements">Keeping LLVM Portable</a>
+</div>
+<div class="doc_text">
+ <p>In order to keep LLVM portable, LLVM developers should adhere to a set of
+ portability rules associated with the System Library. Adherence to these rules
+ should help the System Library achieve its goal of shielding LLVM from the
+ variations in operating system interfaces and doing so efficiently. The
+ following sections define the rules needed to fulfill this objective.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="headers">Don't Inlcude System Headers</a>
+</div>
+<div class="doc_text">
+ <p>Except in <tt>lib/System</tt>, no LLVM source code should directly
+ <tt>#include</tt> a system header. Care has been taken to remove all such
+ <tt>#includes</tt> from LLVM while <tt>lib/System</tt> was being
+ developed. Specifically this means that header files like "unistd.h",
+ "windows.h", "stdio.h", and "string.h" are forbidden to be included by LLVM
+ source code outside the implementation of <tt>lib/System</tt>.</p>
+ <p>To obtain system-dependent functionality, existing interfaces to the system
+ found in <tt>include/llvm/System</tt> should be used. If an appropriate
+ interface is not available, it should be added to <tt>include/llvm/System</tt>
+ and implemented in <tt>lib/System</tt> for all supported platforms.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="expose">Don't Expose System Headers</a>
+</div>
+<div class="doc_text">
+ <p>The System Library must shield LLVM from <em>all</em> system headers. To
+ obtain system level functionality, LLVM source must
+ <tt>#include "llvm/System/Thing.h"</tt> and nothing else. This means that
+ <tt>Thing.h</tt> cannot expose any system header files. This protects LLVM
+ from accidentally using system specific functionality and only allows it
+ via the <tt>lib/System</tt> interface.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="c_headers">Use Standard C Headers</a></div>
+<div class="doc_text">
+ <p>The <em>standard</em> C headers (the ones beginning with "c") are allowed
+ to be exposed through the <tt>lib/System</tt> interface. These headers and
+ the things they declare are considered to be platform agnostic. LLVM source
+ files may include them directly or obtain their inclusion through
+ <tt>lib/System</tt> interfaces.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="cpp_headers">Use Standard C++ Headers</a>
+</div>
+<div class="doc_text">
+ <p>The <em>standard</em> C++ headers from the standard C++ library and
+ standard template library may be exposed through the <tt>lib/System</tt>
+ interface. These headers and the things they declare are considered to be
+ platform agnostic. LLVM source files may include them or obtain their
+ inclusion through lib/System interfaces.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="highlev">High Level Interface</a></div>
+<div class="doc_text">
+ <p>The entry points specified in the interface of lib/System must be aimed at
+ completing some reasonably high level task needed by LLVM. We do not want to
+ simply wrap each operating system call. It would be preferable to wrap several
+ operating system calls that are always used in conjunction with one another by
+ LLVM.</p>
+ <p>For example, consider what is needed to execute a program, wait for it to
+ complete, and return its result code. On Unix, this involves the following
+ operating system calls: <tt>getenv, fork, execve,</tt> and <tt>wait</tt>. The
+ correct thing for lib/System to provide is a function, say
+ <tt>ExecuteProgramAndWait</tt>, that implements the functionality completely.
+ what we don't want is wrappers for the operating system calls involved.</p>
+ <p>There must <em>not</em> be a one-to-one relationship between operating
+ system calls and the System library's interface. Any such interface function
+ will be suspicious.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="nounused">No Unused Functionality</a></div>
+<div class="doc_text">
+ <p>There must be no functionality specified in the interface of lib/System
+ that isn't actually used by LLVM. We're not writing a general purpose
+ operating system wrapper here, just enough to satisfy LLVM's needs. And, LLVM
+ doesn't need much. This design goal aims to keep the lib/System interface
+ small and understandable which should foster its actual use and adoption.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="nodupl">No Duplicate Implementations</a>
+</div>
+<div class="doc_text">
+ <p>The implementation of a function for a given platform must be written
+ exactly once. This implies that it must be possible to apply a function's
+ implementation to multiple operating systems if those operating systems can
+ share the same implementation. This rule applies to the set of operating
+ systems supported for a given class of operating system (e.g. Unix, Win32).
+ </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="virtuals">No Virtual Methods</a></div>
+<div class="doc_text">
+ <p>The System Library interfaces can be called quite frequently by LLVM. In
+ order to make those calls as efficient as possible, we discourage the use of
+ virtual methods. There is no need to use inheritance for implementation
+ differences, it just adds complexity. The <tt>#include</tt> mechanism works
+ just fine.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="nofunc">No Exposed Functions</a></div>
+<div class="doc_text">
+ <p>Any functions defined by system libraries (i.e. not defined by lib/System)
+ must not be exposed through the lib/System interface, even if the header file
+ for that function is not exposed. This prevents inadvertent use of system
+ specific functionality.</p>
+ <p>For example, the <tt>stat</tt> system call is notorious for having
+ variations in the data it provides. <tt>lib/System</tt> must not declare
+ <tt>stat</tt> nor allow it to be declared. Instead it should provide its own
+ interface to discovering information about files and directories. Those
+ interfaces may be implemented in terms of <tt>stat</tt> but that is strictly
+ an implementation detail. The interface provided by the System Library must
+ be implemented on all platforms (even those without <tt>stat</tt>).</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="nodata">No Exposed Data</a></div>
+<div class="doc_text">
+ <p>Any data defined by system libraries (i.e. not defined by lib/System) must
+ not be exposed through the lib/System interface, even if the header file for
+ that function is not exposed. As with functions, this prevents inadvertent use
+ of data that might not exist on all platforms.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="softerrors">Minimize Soft Errors</a></div>
+<div class="doc_text">
+ <p>Operating system interfaces will generally provide error results for every
+ little thing that could go wrong. In almost all cases, you can divide these
+ error results into two groups: normal/good/soft and abnormal/bad/hard. That
+ is, some of the errors are simply information like "file not found",
+ "insufficient privileges", etc. while other errors are much harder like
+ "out of space", "bad disk sector", or "system call interrupted". We'll call
+ the first group "<i>soft</i>" errors and the second group "<i>hard</i>"
+ errors.<p>
+ <p>lib/System must always attempt to minimize soft errors and always just
+ throw a std::string on hard errors. This is a design requirement because the
+ minimization of soft errors can affect the granularity and the nature of the
+ interface. In general, if you find that you're wanting to throw soft errors,
+ you must review the granularity of the interface because it is likely you're
+ trying to implement something that is too low level. The rule of thumb is to
+ provide interface functions that <em>can't</em> fail, except when faced with
+ hard errors.</p>
+ <p>For a trivial example, suppose we wanted to add an "OpenFileForWriting"
+ function. For many operating systems, if the file doesn't exist, attempting
+ to open the file will produce an error. However, lib/System should not
+ simply throw that error if it occurs because its a soft error. The problem
+ is that the interface function, OpenFileForWriting is too low level. It should
+ be OpenOrCreateFileForWriting. In the case of the soft "doesn't exist" error,
+ this function would just create it and then open it for writing.</p>
+ <p>This design principle needs to be maintained in lib/System because it
+ avoids the propagation of soft error handling throughout the rest of LLVM.
+ Hard errors will generally just cause a termination for an LLVM tool so don't
+ be bashful about throwing them.</p>
+ <p>Rules of thumb:</p>
+ <ol>
+ <li>Don't throw soft errors, only hard errors.</li>
+ <li>If you're tempted to throw a soft error, re-think the interface.</li>
+ <li>Handle internally the most common normal/good/soft error conditions
+ so the rest of LLVM doesn't have to.</li>
+ </ol>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="throw">Throw Only std::string</a></div>
+<div class="doc_text">
+ <p>If an error occurs that lib/System cannot handle, the only action taken by
+ lib/System is to throw an instance of std:string. The contents of the string
+ must explain both what happened and the context in which it happened. The
+ format of the string should be a (possibly empty) list of contexts each
+ terminated with a : and a space, followed by the error message, optionally
+ followed by a reason, and optionally followed by a suggestion.</p>
+ <p>For example, failure to open a file named "foo" could result in a message
+ like:</p>
+ <ul><li>foo: Unable to open file because it doesn't exist."</li></ul>
+ <p>The "foo:" part is the context. The "Unable to open file" part is the error
+ message. The "because it doesn't exist." part is the reason. This message has
+ no suggestion. Where possible, the implementation of lib/System should use
+ operating system specific facilities for converting the error code returned by
+ a system call into an error message. This will help to make the error message
+ more familiar to users of that type of operating system.</p>
+ <p>Note that this requirement precludes the throwing of any other exceptions.
+ For example, various C++ standard library functions can cause exceptions to be
+ thrown (e.g. out of memory situation). In all cases, if there is a possibility
+ that non-string exceptions could be thrown, the lib/System library must ensure
+ that the exceptions are translated to std::string form.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="throw_spec">No throw Specifications</a>
+</div>
+<div class="doc_text">
+ <p>None of the lib/System interface functions may be declared with C++
+ <tt>throw()</tt> specifications on them. This requirement makes sure that the
+ compiler does not insert additional exception handling code into the interface
+ functions. This is a performance consideration: lib/System functions are at
+ the bottom of many call chains and as such can be frequently called. We
+ need them to be as efficient as possible.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="organization">Code Organization</a></div>
+<div class="doc_text">
+ <p>Implementations of the System Library interface are separated by their
+ general class of operating system. Currently only Unix and Win32 classes are
+ defined but more could be added for other operating system classifications.
+ To distinguish which implementation to compile, the code in lib/System uses
+ the LLVM_ON_UNIX and LLVM_ON_WIN32 #defines provided via configure through the
+ llvm/Config/config.h file. Each source file in lib/System, after implementing
+ the generic (operating system independent) functionality needs to include the
+ correct implementation using a set of <tt>#if defined(LLVM_ON_XYZ)</tt>
+ directives. For example, if we had lib/System/File.cpp, we'd expect to see in
+ that file:</p>
+ <pre><tt>
+ #if defined(LLVM_ON_UNIX)
+ #include "Unix/File.cpp"
+ #endif
+ #if defined(LLVM_ON_WIN32)
+ #include "Win32/File.cpp"
+ #endif
+ </tt></pre>
+ <p>The implementation in lib/System/Unix/File.cpp should handle all Unix
+ variants. The implementation in lib/System/Win32/File.cpp should handle all
+ Win32 variants. What this does is quickly differentiate the basic class of
+ operating system that will provide the implementation. The specific details
+ for a given platform must still be determined through the use of
+ <tt>#ifdef</tt>.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="semantics">Consistent Semantics</a></div>
+<div class="doc_text">
+ <p>The implementation of a lib/System interface can vary drastically between
+ platforms. That's okay as long as the end result of the interface function
+ is the same. For example, a function to create a directory is pretty straight
+ forward on all operating system. System V IPC on the other hand isn't even
+ supported on all platforms. Instead of "supporting" System V IPC, lib/System
+ should provide an interface to the basic concept of inter-process
+ communications. The implementations might use System V IPC if that was
+ available or named pipes, or whatever gets the job done effectively for a
+ given operating system. In all cases, the interface and the implementation
+ must be semantically consistent. </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="bug">Bug 351</a></div>
+<div class="doc_text">
+ <p>See <a href="http://llvm.org/PR351">bug 351</a>
+ for further details on the progress of this work</p>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html
new file mode 100644
index 0000000..90836e9
--- /dev/null
+++ b/docs/TableGenFundamentals.html
@@ -0,0 +1,646 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>TableGen Fundamentals</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">TableGen Fundamentals</div>
+
+<div class="doc_text">
+<ul>
+ <li><a href="#introduction">Introduction</a>
+ <ol>
+ <li><a href="#concepts">Basic concepts</a></li>
+ <li><a href="#example">An example record</a></li>
+ <li><a href="#running">Running TableGen</a></li>
+ </ol></li>
+ <li><a href="#syntax">TableGen syntax</a>
+ <ol>
+ <li><a href="#primitives">TableGen primitives</a>
+ <ol>
+ <li><a href="#comments">TableGen comments</a></li>
+ <li><a href="#types">The TableGen type system</a></li>
+ <li><a href="#values">TableGen values and expressions</a></li>
+ </ol></li>
+ <li><a href="#classesdefs">Classes and definitions</a>
+ <ol>
+ <li><a href="#valuedef">Value definitions</a></li>
+ <li><a href="#recordlet">'let' expressions</a></li>
+ <li><a href="#templateargs">Class template arguments</a></li>
+ <li><a href="#multiclass">Multiclass definitions and instances</a></li>
+ </ol></li>
+ <li><a href="#filescope">File scope entities</a>
+ <ol>
+ <li><a href="#include">File inclusion</a></li>
+ <li><a href="#globallet">'let' expressions</a></li>
+ </ol></li>
+ </ol></li>
+ <li><a href="#backends">TableGen backends</a>
+ <ol>
+ <li><a href="#">todo</a></li>
+ </ol></li>
+</ul>
+</div>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>TableGen's purpose is to help a human develop and maintain records of
+domain-specific information. Because there may be a large number of these
+records, it is specifically designed to allow writing flexible descriptions and
+for common features of these records to be factored out. This reduces the
+amount of duplication in the description, reduces the chance of error, and
+makes it easier to structure domain specific information.</p>
+
+<p>The core part of TableGen <a href="#syntax">parses a file</a>, instantiates
+the declarations, and hands the result off to a domain-specific "<a
+href="#backends">TableGen backend</a>" for processing. The current major user
+of TableGen is the <a href="CodeGenerator.html">LLVM code generator</a>.</p>
+
+<p>Note that if you work on TableGen much, and use emacs or vim, that you can
+find an emacs "TableGen mode" and a vim language file in
+<tt>llvm/utils/emacs</tt> and <tt>llvm/utils/vim</tt> directory of your LLVM
+distribution, respectively.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="concepts">Basic concepts</a></div>
+
+<div class="doc_text">
+
+<p>TableGen files consist of two key parts: 'classes' and 'definitions', both
+of which are considered 'records'.</p>
+
+<p><b>TableGen records</b> have a unique name, a list of values, and a list of
+superclasses. The list of values is main data that TableGen builds for each
+record, it is this that holds the domain specific information for the
+application. The interpretation of this data is left to a specific <a
+href="#backends">TableGen backend</a>, but the structure and format rules are
+taken care of and fixed by TableGen.</p>
+
+<p><b>TableGen definitions</b> are the concrete form of 'records'. These
+generally do not have any undefined values, and are marked with the
+'<tt>def</tt>' keyword.</p>
+
+<p><b>TableGen classes</b> are abstract records that are used to build and
+describe other records. These 'classes' allow the end-user to build
+abstractions for either the domain they are targetting (such as "Register",
+"RegisterClass", and "Instruction" in the LLVM code generator) or for the
+implementor to help factor out common properties of records (such as "FPInst",
+which is used to represent floating point instructions in the X86 backend).
+TableGen keeps track of all of the classes that are used to build up a
+definition, so the backend can find all definitions of a particular class, such
+as "Instruction".</p>
+
+<p><b>TableGen multiclasses</b> are groups of abstract records that are
+instantiated all at once. Each instantiation can result in multiple TableGen
+definitions.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="example">An example record</a></div>
+
+<div class="doc_text">
+
+<p>With no other arguments, TableGen parses the specified file and prints out
+all of the classes, then all of the definitions. This is a good way to see what
+the various definitions expand to fully. Running this on the <tt>X86.td</tt>
+file prints this (at the time of this writing):</p>
+
+<pre>
+...
+<b>def</b> ADDrr8 { <i>// Instruction X86Inst I2A8 Pattern</i>
+ <b>string</b> Name = "add";
+ <b>string</b> Namespace = "X86";
+ <b>list</b>&lt;Register&gt; Uses = [];
+ <b>list</b>&lt;Register&gt; Defs = [];
+ <b>bit</b> isReturn = 0;
+ <b>bit</b> isBranch = 0;
+ <b>bit</b> isCall = 0;
+ <b>bit</b> isTwoAddress = 1;
+ <b>bit</b> isTerminator = 0;
+ <b>dag</b> Pattern = (set R8, (plus R8, R8));
+ <b>bits</b>&lt;8&gt; Opcode = { 0, 0, 0, 0, 0, 0, 0, 0 };
+ Format Form = MRMDestReg;
+ <b>bits</b>&lt;5&gt; FormBits = { 0, 0, 0, 1, 1 };
+ ArgType Type = Arg8;
+ <b>bits</b>&lt;3&gt; TypeBits = { 0, 0, 1 };
+ <b>bit</b> hasOpSizePrefix = 0;
+ <b>bit</b> printImplicitUses = 0;
+ <b>bits</b>&lt;4&gt; Prefix = { 0, 0, 0, 0 };
+ FPFormat FPForm = ?;
+ <b>bits</b>&lt;3&gt; FPFormBits = { 0, 0, 0 };
+}
+...
+</pre>
+
+<p>This definition corresponds to an 8-bit register-register add instruction in
+the X86. The string after the '<tt>def</tt>' string indicates the name of the
+record ("<tt>ADDrr8</tt>" in this case), and the comment at the end of the line
+indicates the superclasses of the definition. The body of the record contains
+all of the data that TableGen assembled for the record, indicating that the
+instruction is part of the "X86" namespace, should be printed as "<tt>add</tt>"
+in the assembly file, it is a two-address instruction, has a particular
+encoding, etc. The contents and semantics of the information in the record is
+specific to the needs of the X86 backend, and is only shown as an example.</p>
+
+<p>As you can see, a lot of information is needed for every instruction
+supported by the code generator, and specifying it all manually would be
+unmaintainble, prone to bugs, and tiring to do in the first place. Because we
+are using TableGen, all of the information was derived from the following
+definition:</p>
+
+<pre>
+<b>def</b> ADDrr8 : I2A8&lt;"add", 0x00, MRMDestReg&gt;,
+ Pattern&lt;(set R8, (plus R8, R8))&gt;;
+</pre>
+
+<p>This definition makes use of the custom I2A8 (two address instruction with
+8-bit operand) class, which is defined in the X86-specific TableGen file to
+factor out the common features that instructions of its class share. A key
+feature of TableGen is that it allows the end-user to define the abstractions
+they prefer to use when describing their information.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="running">Running TableGen</a></div>
+
+<div class="doc_text">
+
+<p>TableGen runs just like any other LLVM tool. The first (optional) argument
+specifies the file to read. If a filename is not specified, <tt>tblgen</tt>
+reads from standard input.</p>
+
+<p>To be useful, one of the <a href="#backends">TableGen backends</a> must be
+used. These backends are selectable on the command line (type '<tt>tblgen
+--help</tt>' for a list). For example, to get a list of all of the definitions
+that subclass a particular type (which can be useful for building up an enum
+list of these records), use the <tt>--print-enums</tt> option:</p>
+
+<pre>
+$ tblgen X86.td -print-enums -class=Register
+AH, AL, AX, BH, BL, BP, BX, CH, CL, CX, DH, DI, DL, DX,
+EAX, EBP, EBX, ECX, EDI, EDX, ESI, ESP, FP0, FP1, FP2, FP3, FP4, FP5, FP6,
+SI, SP, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
+
+$ tblgen X86.td -print-enums -class=Instruction
+ADCrr32, ADDri16, ADDri16b, ADDri32, ADDri32b, ADDri8, ADDrr16, ADDrr32,
+ADDrr8, ADJCALLSTACKDOWN, ADJCALLSTACKUP, ANDri16, ANDri16b, ANDri32, ANDri32b,
+ANDri8, ANDrr16, ANDrr32, ANDrr8, BSWAPr32, CALLm32, CALLpcrel32, ...
+</pre>
+
+<p>The default backend prints out all of the records, as described <a
+href="#example">above</a>.</p>
+
+<p>If you plan to use TableGen for some purpose, you will most likely have to
+<a href="#backends">write a backend</a> that extracts the information specific
+to what you need and formats it in the appropriate way.</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="syntax">TableGen syntax</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>TableGen doesn't care about the meaning of data (that is up to the backend
+to define), but it does care about syntax, and it enforces a simple type system.
+This section describes the syntax and the constructs allowed in a TableGen file.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="primitives">TableGen primitives</a></div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection"><a name="comments">TableGen comments</a></div>
+
+<div class="doc_text">
+<p>TableGen supports BCPL style "<tt>//</tt>" comments, which run to the end of
+the line, and it also supports <b>nestable</b> "<tt>/* */</tt>" comments.</p>
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+ <a name="types">The TableGen type system</a>
+</div>
+
+<div class="doc_text">
+<p>TableGen files are strongly typed, in a simple (but complete) type-system.
+These types are used to perform automatic conversions, check for errors, and to
+help interface designers constrain the input that they allow. Every <a
+href="#valuedef">value definition</a> is required to have an associated type.
+</p>
+
+<p>TableGen supports a mixture of very low-level types (such as <tt>bit</tt>)
+and very high-level types (such as <tt>dag</tt>). This flexibility is what
+allows it to describe a wide range of information conveniently and compactly.
+The TableGen types are:</p>
+
+<ul>
+<li>"<tt><b>bit</b></tt>" - A 'bit' is a boolean value that can hold either 0 or
+1.</li>
+
+<li>"<tt><b>int</b></tt>" - The 'int' type represents a simple 32-bit integer
+value, such as 5.</li>
+
+<li>"<tt><b>string</b></tt>" - The 'string' type represents an ordered sequence
+of characters of arbitrary length.</li>
+
+<li>"<tt><b>bits</b>&lt;n&gt;</tt>" - A 'bits' type is an arbitrary, but fixed,
+size integer that is broken up into individual bits. This type is useful
+because it can handle some bits being defined while others are undefined.</li>
+
+<li>"<tt><b>list</b>&lt;ty&gt;</tt>" - This type represents a list whose
+elements are some other type. The contained type is arbitrary: it can even be
+another list type.</li>
+
+<li>Class type - Specifying a class name in a type context means that the
+defined value must be a subclass of the specified class. This is useful in
+conjunction with the "list" type, for example, to constrain the elements of the
+list to a common base class (e.g., a <tt><b>list</b>&lt;Register&gt;</tt> can
+only contain definitions derived from the "<tt>Register</tt>" class).</li>
+
+<li>"<tt><b>code</b></tt>" - This represents a big hunk of text. NOTE: I don't
+remember why this is distinct from string!</li>
+
+<li>"<tt><b>dag</b></tt>" - This type represents a nestable directed graph of
+elements.</li>
+</ul>
+
+<p>To date, these types have been sufficient for describing things that
+TableGen has been used for, but it is straight-forward to extend this list if
+needed.</p>
+
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+ <a name="values">TableGen values and expressions</a>
+</div>
+
+<div class="doc_text">
+
+<p>TableGen allows for a pretty reasonable number of different expression forms
+when building up values. These forms allow the TableGen file to be written in a
+natural syntax and flavor for the application. The current expression forms
+supported include:</p>
+
+<ul>
+<li><tt>?</tt> - uninitialized field</li>
+<li><tt>0b1001011</tt> - binary integer value</li>
+<li><tt>07654321</tt> - octal integer value (indicated by a leading 0)</li>
+<li><tt>7</tt> - decimal integer value</li>
+<li><tt>0x7F</tt> - hexadecimal integer value</li>
+<li><tt>"foo"</tt> - string value</li>
+<li><tt>[{ ... }]</tt> - code fragment</li>
+<li><tt>[ X, Y, Z ]</tt> - list value.</li>
+<li><tt>{ a, b, c }</tt> - initializer for a "bits&lt;3&gt;" value</li>
+<li><tt>value</tt> - value reference</li>
+<li><tt>value{17}</tt> - access to one bit of a value</li>
+<li><tt>value{15-17}</tt> - access to multiple bits of a value</li>
+<li><tt>DEF</tt> - reference to a record definition</li>
+<li><tt>CLASS&lt;val list&gt;</tt> - reference to a new anonymous definition of
+ CLASS with the specified template arguments.</li>
+<li><tt>X.Y</tt> - reference to the subfield of a value</li>
+<li><tt>list[4-7,17,2-3]</tt> - A slice of the 'list' list, including elements
+4,5,6,7,17,2, and 3 from it. Elements may be included multiple times.</li>
+<li><tt>(DEF a, b)</tt> - a dag value. The first element is required to be a
+record definition, the remaining elements in the list may be arbitrary other
+values, including nested `<tt>dag</tt>' values.</li>
+<li><tt>!strconcat(a, b)</tt> - A string value that is the result of
+ concatenating the 'a' and 'b' strings.</li>
+</ul>
+
+<p>Note that all of the values have rules specifying how they convert to values
+for different types. These rules allow you to assign a value like "7" to a
+"bits&lt;4&gt;" value, for example.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="classesdefs">Classes and definitions</a>
+</div>
+
+<div class="doc_text">
+
+<p>As mentioned in the <a href="#concepts">intro</a>, classes and definitions
+(collectively known as 'records') in TableGen are the main high-level unit of
+information that TableGen collects. Records are defined with a <tt>def</tt> or
+<tt>class</tt> keyword, the record name, and an optional list of "<a
+href="#templateargs">template arguments</a>". If the record has superclasses,
+they are specified as a comma separated list that starts with a colon character
+(":"). If <a href="#valuedef">value definitions</a> or <a href="#recordlet">let
+expressions</a> are needed for the class, they are enclosed in curly braces
+("{}"); otherwise, the record ends with a semicolon. Here is a simple TableGen
+file:</p>
+
+<pre>
+<b>class</b> C { <b>bit</b> V = 1; }
+<b>def</b> X : C;
+<b>def</b> Y : C {
+ <b>string</b> Greeting = "hello";
+}
+</pre>
+
+<p>This example defines two definitions, <tt>X</tt> and <tt>Y</tt>, both of
+which derive from the <tt>C</tt> class. Because of this, they both get the
+<tt>V</tt> bit value. The <tt>Y</tt> definition also gets the Greeting member
+as well.</p>
+
+<p>In general, classes are useful for collecting together the commonality
+between a group of records and isolating it in a single place. Also, classes
+permit the specification of default values for their subclasses, allowing the
+subclasses to override them as they wish.</p>
+
+</div>
+
+<!---------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+ <a name="valuedef">Value definitions</a>
+</div>
+
+<div class="doc_text">
+<p>Value definitions define named entries in records. A value must be defined
+before it can be referred to as the operand for another value definition or
+before the value is reset with a <a href="#recordlet">let expression</a>. A
+value is defined by specifying a <a href="#types">TableGen type</a> and a name.
+If an initial value is available, it may be specified after the type with an
+equal sign. Value definitions require terminating semicolons.</p>
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+ <a name="recordlet">'let' expressions</a>
+</div>
+
+<div class="doc_text">
+<p>A record-level let expression is used to change the value of a value
+definition in a record. This is primarily useful when a superclass defines a
+value that a derived class or definition wants to override. Let expressions
+consist of the '<tt>let</tt>' keyword followed by a value name, an equal sign
+("="), and a new value. For example, a new class could be added to the example
+above, redefining the <tt>V</tt> field for all of its subclasses:</p>
+
+<pre>
+<b>class</b> D : C { let V = 0; }
+<b>def</b> Z : D;
+</pre>
+
+<p>In this case, the <tt>Z</tt> definition will have a zero value for its "V"
+value, despite the fact that it derives (indirectly) from the <tt>C</tt> class,
+because the <tt>D</tt> class overrode its value.</p>
+
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+ <a name="templateargs">Class template arguments</a>
+</div>
+
+<div class="doc_text">
+<p>TableGen permits the definition of parameterized classes as well as normal
+concrete classes. Parameterized TableGen classes specify a list of variable
+bindings (which may optionally have defaults) that are bound when used. Here is
+a simple example:</p>
+
+<pre>
+<b>class</b> FPFormat&lt;<b>bits</b>&lt;3&gt; val&gt; {
+ <b>bits</b>&lt;3&gt; Value = val;
+}
+<b>def</b> NotFP : FPFormat&lt;0&gt;;
+<b>def</b> ZeroArgFP : FPFormat&lt;1&gt;;
+<b>def</b> OneArgFP : FPFormat&lt;2&gt;;
+<b>def</b> OneArgFPRW : FPFormat&lt;3&gt;;
+<b>def</b> TwoArgFP : FPFormat&lt;4&gt;;
+<b>def</b> SpecialFP : FPFormat&lt;5&gt;;
+</pre>
+
+<p>In this case, template arguments are used as a space efficient way to specify
+a list of "enumeration values", each with a "Value" field set to the specified
+integer.</p>
+
+<p>The more esoteric forms of <a href="#values">TableGen expressions</a> are
+useful in conjunction with template arguments. As an example:</p>
+
+<pre>
+<b>class</b> ModRefVal&lt;<b>bits</b>&lt;2&gt; val&gt; {
+ <b>bits</b>&lt;2&gt; Value = val;
+}
+
+<b>def</b> None : ModRefVal&lt;0&gt;;
+<b>def</b> Mod : ModRefVal&lt;1&gt;;
+<b>def</b> Ref : ModRefVal&lt;2&gt;;
+<b>def</b> ModRef : ModRefVal&lt;3&gt;;
+
+<b>class</b> Value&lt;ModRefVal MR&gt; {
+ <i>// decode some information into a more convenient format, while providing
+ // a nice interface to the user of the "Value" class.</i>
+ <b>bit</b> isMod = MR.Value{0};
+ <b>bit</b> isRef = MR.Value{1};
+
+ <i>// other stuff...</i>
+}
+
+<i>// Example uses</i>
+<b>def</b> bork : Value&lt;Mod&gt;;
+<b>def</b> zork : Value&lt;Ref&gt;;
+<b>def</b> hork : Value&lt;ModRef&gt;;
+</pre>
+
+<p>This is obviously a contrived example, but it shows how template arguments
+can be used to decouple the interface provided to the user of the class from the
+actual internal data representation expected by the class. In this case,
+running <tt>tblgen</tt> on the example prints the following definitions:</p>
+
+<pre>
+<b>def</b> bork { <i>// Value</i>
+ <b>bit</b> isMod = 1;
+ <b>bit</b> isRef = 0;
+}
+<b>def</b> hork { <i>// Value</i>
+ <b>bit</b> isMod = 1;
+ <b>bit</b> isRef = 1;
+}
+<b>def</b> zork { <i>// Value</i>
+ <b>bit</b> isMod = 0;
+ <b>bit</b> isRef = 1;
+}
+</pre>
+
+<p> This shows that TableGen was able to dig into the argument and extract a
+piece of information that was requested by the designer of the "Value" class.
+For more realistic examples, please see existing users of TableGen, such as the
+X86 backend.</p>
+
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+ <a name="multiclass">Multiclass definitions and instances</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+While classes with template arguments are a good way to factor commonality
+between two instances of a definition, multiclasses allow a convenient notation
+for defining multiple definitions at once (instances of implicitly constructed
+classes). For example, consider an 3-address instruction set whose instructions
+come in two forms: "reg = reg op reg" and "reg = reg op imm" (e.g. SPARC). In
+this case, you'd like to specify in one place that this commonality exists, then
+in a separate place indicate what all the ops are.
+</p>
+
+<p>
+Here is an example TableGen fragment that shows this idea:
+</p>
+
+<pre>
+<b>def</b> ops;
+<b>def</b> GPR;
+<b>def</b> Imm;
+<b>class</b> inst&lt;<b>int</b> opc, <b>string</b> asmstr, <b>dag</b> operandlist&gt;;
+
+<b>multiclass</b> ri_inst&lt;<b>int</b> opc, <b>string</b> asmstr&gt; {
+ def _rr : inst&lt;opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+ (ops GPR:$dst, GPR:$src1, GPR:$src2)&gt;;
+ def _ri : inst&lt;opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+ (ops GPR:$dst, GPR:$src1, Imm:$src2)&gt;;
+}
+
+// Instantiations of the ri_inst multiclass.
+<b>defm</b> ADD : ri_inst&lt;0b111, "add"&gt;;
+<b>defm</b> SUB : ri_inst&lt;0b101, "sub"&gt;;
+<b>defm</b> MUL : ri_inst&lt;0b100, "mul"&gt;;
+...
+</pre>
+
+<p>The name of the resultant definitions has the multidef fragment names
+ appended to them, so this defines ADD_rr, ADD_ri, SUB_rr, etc. Using a
+ multiclass this way is exactly equivalent to instantiating the
+ classes multiple times yourself, e.g. by writing:</p>
+
+<pre>
+<b>def</b> ops;
+<b>def</b> GPR;
+<b>def</b> Imm;
+<b>class</b> inst&lt;<b>int</b> opc, <b>string</b> asmstr, <b>dag</b> operandlist&gt;;
+
+<b>class</b> rrinst&lt;<b>int</b> opc, <b>string</b> asmstr&gt;
+ : inst&lt;opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+ (ops GPR:$dst, GPR:$src1, GPR:$src2)&gt;;
+
+<b>class</b> riinst&lt;<b>int</b> opc, <b>string</b> asmstr&gt;
+ : inst&lt;opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+ (ops GPR:$dst, GPR:$src1, Imm:$src2)&gt;;
+
+// Instantiations of the ri_inst multiclass.
+<b>def</b> ADD_rr : rrinst&lt;0b111, "add"&gt;;
+<b>def</b> ADD_ri : riinst&lt;0b111, "add"&gt;;
+<b>def</b> SUB_rr : rrinst&lt;0b101, "sub"&gt;;
+<b>def</b> SUB_ri : riinst&lt;0b101, "sub"&gt;;
+<b>def</b> MUL_rr : rrinst&lt;0b100, "mul"&gt;;
+<b>def</b> MUL_ri : riinst&lt;0b100, "mul"&gt;;
+...
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="filescope">File scope entities</a>
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+ <a name="include">File inclusion</a>
+</div>
+
+<div class="doc_text">
+<p>TableGen supports the '<tt>include</tt>' token, which textually substitutes
+the specified file in place of the include directive. The filename should be
+specified as a double quoted string immediately after the '<tt>include</tt>'
+keyword. Example:</p>
+
+<pre>
+<b>include</b> "foo.td"
+</pre>
+
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+ <a name="globallet">'let' expressions</a>
+</div>
+
+<div class="doc_text">
+<p> "let" expressions at file scope are similar to <a href="#recordlet">"let"
+expressions within a record</a>, except they can specify a value binding for
+multiple records at a time, and may be useful in certain other cases.
+File-scope let expressions are really just another way that TableGen allows the
+end-user to factor out commonality from the records.</p>
+
+<p>File-scope "let" expressions take a comma-separated list of bindings to
+apply, and one of more records to bind the values in. Here are some
+examples:</p>
+
+<pre>
+<b>let</b> isTerminator = 1, isReturn = 1 <b>in</b>
+ <b>def</b> RET : X86Inst&lt;"ret", 0xC3, RawFrm, NoArg&gt;;
+
+<b>let</b> isCall = 1 <b>in</b>
+ <i>// All calls clobber the non-callee saved registers...</i>
+ <b>let</b> Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6] in {
+ <b>def</b> CALLpcrel32 : X86Inst&lt;"call", 0xE8, RawFrm, NoArg&gt;;
+ <b>def</b> CALLr32 : X86Inst&lt;"call", 0xFF, MRMS2r, Arg32&gt;;
+ <b>def</b> CALLm32 : X86Inst&lt;"call", 0xFF, MRMS2m, Arg32&gt;;
+ }
+</pre>
+
+<p>File-scope "let" expressions are often useful when a couple of definitions
+need to be added to several records, and the records do not otherwise need to be
+opened, as in the case with the CALL* instructions above.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="backends">TableGen backends</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>How they work, how to write one. This section should not contain details
+about any particular backend, except maybe -print-enums as an example. This
+should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
new file mode 100644
index 0000000..cb88037
--- /dev/null
+++ b/docs/TestingGuide.html
@@ -0,0 +1,813 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM Test Suite Guide</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ LLVM Test Suite Guide
+</div>
+
+<ol>
+ <li><a href="#overview">Overview</a></li>
+ <li><a href="#Requirements">Requirements</a></li>
+ <li><a href="#quick">Quick Start</a></li>
+ <li><a href="#org">LLVM Test Suite Organization</a>
+ <ul>
+ <li><a href="#codefragments">Code Fragments</a></li>
+ <li><a href="#wholeprograms">Whole Programs</a></li>
+ </ul>
+ </li>
+ <li><a href="#tree">LLVM Test Suite Tree</a></li>
+ <li><a href="#dgstructure">DejaGNU Structure</a></li>
+ <li><a href="#progstructure"><tt>llvm-test</tt> Structure</a></li>
+ <li><a href="#run">Running the LLVM Tests</a>
+ <ul>
+ <li><a href="#customtest">Writing custom tests for llvm-test</a></li>
+ </ul>
+ </li>
+ <li><a href="#nightly">Running the nightly tester</a></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by John T. Criswell, <a
+ href="http://llvm.x10sys.com/rspencer">Reid Spencer</a>, and Tanya Lattner</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="overview">Overview</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>This document is the reference manual for the LLVM test suite. It documents
+the structure of the LLVM test suite, the tools needed to use it, and how to add
+and run tests.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="Requirements">Requirements</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>In order to use the LLVM test suite, you will need all of the software
+required to build LLVM, plus the following:</p>
+
+<dl>
+<dt><a href="http://www.gnu.org/software/dejagnu/">DejaGNU</a></dt>
+<dd>The Feature and Regressions tests are organized and run by DejaGNU.</dd>
+<dt><a href="http://expect.nist.gov/">Expect</a></dt>
+<dd>Expect is required by DejaGNU.</dd>
+<dt><a href="http://www.tcl.tk/software/tcltk/">tcl</a></dt>
+<dd>Tcl is required by DejaGNU. </dd>
+
+<dt><a href="http://www.netlib.org/f2c">F2C</a></dt>
+<dd>For now, LLVM does not have a Fortran front-end, but using F2C, we can run
+Fortran benchmarks. F2C support must be enabled via <tt>configure</tt> if not
+installed in a standard place. F2C requires three items: the <tt>f2c</tt>
+executable, <tt>f2c.h</tt> to compile the generated code, and <tt>libf2c.a</tt>
+to link generated code. By default, given an F2C directory <tt>$DIR</tt>, the
+configure script will search <tt>$DIR/bin</tt> for <tt>f2c</tt>,
+<tt>$DIR/include</tt> for <tt>f2c.h</tt>, and <tt>$DIR/lib</tt> for
+<tt>libf2c.a</tt>. The default <tt>$DIR</tt> values are: <tt>/usr</tt>,
+<tt>/usr/local</tt>, <tt>/sw</tt>, and <tt>/opt</tt>. If you installed F2C in a
+different location, you must tell <tt>configure</tt>:
+
+<ul>
+<li><tt>./configure --with-f2c=$DIR</tt><br>
+This will specify a new <tt>$DIR</tt> for the above-described search
+process. This will only work if the binary, header, and library are in their
+respective subdirectories of <tt>$DIR</tt>.</li>
+
+<li><tt>./configure --with-f2c-bin=/binary/path --with-f2c-inc=/include/path
+--with-f2c-lib=/lib/path</tt><br>
+This allows you to specify the F2C components separately. Note: if you choose
+this route, you MUST specify all three components, and you need to only specify
+<em>directories</em> where the files are located; do NOT include the
+filenames themselves on the <tt>configure</tt> line.</li>
+</ul></dd>
+</dl>
+
+<p>Darwin (Mac OS X) developers can simplify the installation of Expect and tcl
+by using fink. <tt>fink install expect</tt> will install both. Alternatively,
+Darwinports users can use <tt>sudo port install expect</tt> to install Expect
+and tcl.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="quick">Quick Start</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+ <p>The tests are located in two separate Subversion modules. The basic feature
+ and regression tests are in the main "llvm" module under the directory
+ <tt>llvm/test</tt>. A more comprehensive test suite that includes whole
+programs in C and C++ is in the <tt>test-suite</tt> module. This module should
+be checked out to the <tt>llvm/projects</tt> directory as llvm-test (for
+historical purpose). When you <tt>configure</tt> the <tt>llvm</tt> module,
+the <tt>llvm-test</tt> directory will be automatically configured.
+Alternatively, you can configure the <tt>test-suite</tt> module manually.</p>
+<p>To run all of the simple tests in LLVM using DejaGNU, use the master Makefile
+ in the <tt>llvm/test</tt> directory:</p>
+<pre>
+% gmake -C llvm/test
+</pre>
+or<br>
+<pre>
+% gmake check
+</pre>
+
+<p>To run only a subdirectory of tests in llvm/test using DejaGNU (ie.
+Regression/Transforms), just set the TESTSUITE variable to the path of the
+subdirectory (relative to <tt>llvm/test</tt>):</p>
+<pre>
+% gmake -C llvm/test TESTSUITE=Regression/Transforms
+</pre>
+
+<p><b>Note: If you are running the tests with <tt>objdir != subdir</tt>, you
+must have run the complete testsuite before you can specify a
+subdirectory.</b></p>
+
+<p>To run the comprehensive test suite (tests that compile and execute whole
+programs), run the <tt>llvm-test</tt> tests:</p>
+
+<pre>
+% cd llvm/projects
+% svn co http://llvm.org/svn/llvm-project/test-suite/trunk llvm-test
+% cd llvm-test
+% ./configure --with-llvmsrc=$LLVM_SRC_ROOT --with-llvmobj=$LLVM_OBJ_ROOT
+% gmake
+</pre>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="org">LLVM Test Suite Organization</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>The LLVM test suite contains two major categories of tests: code
+fragments and whole programs. Code fragments are in the <tt>llvm</tt> module
+under the <tt>llvm/test</tt> directory. The whole programs
+test suite is in the <tt>llvm-test</tt> module under the main directory.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="codefragments">Code Fragments</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+
+<p>Code fragments are small pieces of code that test a specific feature of LLVM
+or trigger a specific bug in LLVM. They are usually written in LLVM assembly
+language, but can be written in other languages if the test targets a particular
+language front end.</p>
+
+<p>Code fragments are not complete programs, and they are never executed to
+determine correct behavior.</p>
+
+<p>These code fragment tests are located in the <tt>llvm/test/Features</tt> and
+<tt>llvm/test/Regression</tt> directories.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="wholeprograms">Whole Programs</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+
+<p>Whole Programs are pieces of code which can be compiled and linked into a
+stand-alone program that can be executed. These programs are generally written
+in high level languages such as C or C++, but sometimes they are written
+straight in LLVM assembly.</p>
+
+<p>These programs are compiled and then executed using several different
+methods (native compiler, LLVM C backend, LLVM JIT, LLVM native code generation,
+etc). The output of these programs is compared to ensure that LLVM is compiling
+the program correctly.</p>
+
+<p>In addition to compiling and executing programs, whole program tests serve as
+a way of benchmarking LLVM performance, both in terms of the efficiency of the
+programs generated as well as the speed with which LLVM compiles, optimizes, and
+generates code.</p>
+
+<p>All "whole program" tests are located in the <tt>test-suite</tt> Subversion
+module.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="tree">LLVM Test Suite Tree</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>Each type of test in the LLVM test suite has its own directory. The major
+subtrees of the test suite directory tree are as follows:</p>
+
+<ul>
+ <li><tt>llvm/test</tt>
+ <p>This directory contains a large array of small tests
+ that exercise various features of LLVM and to ensure that regressions do not
+ occur. The directory is broken into several sub-directories, each focused on
+ a particular area of LLVM. A few of the important ones are:<ul>
+ <li><tt>Analysis</tt>: checks Analysis passes.</li>
+ <li><tt>Archive</tt>: checks the Archive library.</li>
+ <li><tt>Assembler</tt>: checks Assembly reader/writer functionality.</li>
+ <li><tt>Bitcode</tt>: checks Bitcode reader/writer functionality.</li>
+ <li><tt>CodeGen</tt>: checks code generation and each target.</li>
+ <li><tt>Features</tt>: checks various features of the LLVM language.</li>
+ <li><tt>Linker</tt>: tests bitcode linking.</li>
+ <li><tt>Transforms</tt>: tests each of the scalar, IPO, and utility
+ transforms to ensure they make the right transformations.</li>
+ <li><tt>Verifier</tt>: tests the IR verifier.</li>
+ </ul></p>
+ <p>Typically when a bug is found in LLVM, a regression test containing
+ just enough code to reproduce the problem should be written and placed
+ somewhere underneath this directory. In most cases, this will be a small
+ piece of LLVM assembly language code, often distilled from an actual
+ application or benchmark.</p></li>
+
+<li><tt>test-suite</tt>
+<p>The <tt>test-suite</tt> module contains programs that can be compiled
+with LLVM and executed. These programs are compiled using the native compiler
+and various LLVM backends. The output from the program compiled with the
+native compiler is assumed correct; the results from the other programs are
+compared to the native program output and pass if they match.</p>
+
+<p>In addition for testing correctness, the <tt>llvm-test</tt> directory also
+performs timing tests of various LLVM optimizations. It also records
+compilation times for the compilers and the JIT. This information can be
+used to compare the effectiveness of LLVM's optimizations and code
+generation.</p></li>
+
+<li><tt>llvm-test/SingleSource</tt>
+<p>The SingleSource directory contains test programs that are only a single
+source file in size. These are usually small benchmark programs or small
+programs that calculate a particular value. Several such programs are grouped
+together in each directory.</p></li>
+
+<li><tt>llvm-test/MultiSource</tt>
+<p>The MultiSource directory contains subdirectories which contain entire
+programs with multiple source files. Large benchmarks and whole applications
+go here.</p></li>
+
+<li><tt>llvm-test/External</tt>
+<p>The External directory contains Makefiles for building code that is external
+to (i.e., not distributed with) LLVM. The most prominent members of this
+directory are the SPEC 95 and SPEC 2000 benchmark suites. The presence and
+location of these external programs is configured by the llvm-test
+<tt>configure</tt> script.</p></li>
+
+</ul>
+
+</div>
+<!--=========================================================================-->
+<div class="doc_section"><a name="dgstructure">DejaGNU Structure</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+ <p>The LLVM test suite is partially driven by DejaGNU and partially driven by
+ GNU Make. Specifically, the Features and Regression tests are all driven by
+ DejaGNU. The <tt>llvm-test</tt> module is currently driven by a set of
+ Makefiles.</p>
+
+ <p>The DejaGNU structure is very simple, but does require some information to
+ be set. This information is gathered via <tt>configure</tt> and is written
+ to a file, <tt>site.exp</tt> in <tt>llvm/test</tt>. The <tt>llvm/test</tt>
+ Makefile does this work for you.</p>
+
+ <p>In order for DejaGNU to work, each directory of tests must have a
+ <tt>dg.exp</tt> file. DejaGNU looks for this file to determine how to run the
+ tests. This file is just a Tcl script and it can do anything you want, but
+ we've standardized it for the LLVM regression tests. It simply loads a Tcl
+ library (<tt>test/lib/llvm.exp</tt>) and calls the <tt>llvm_runtests</tt>
+ function defined in that library with a list of file names to run. The names
+ are obtained by using Tcl's glob command. Any directory that contains only
+ directories does not need the <tt>dg.exp</tt> file.</p>
+
+ <p>The <tt>llvm-runtests</tt> function lookas at each file that is passed to
+ it and gathers any lines together that match "RUN:". This are the "RUN" lines
+ that specify how the test is to be run. So, each test script must contain
+ RUN lines if it is to do anything. If there are no RUN lines, the
+ <tt>llvm-runtests</tt> function will issue an error and the test will
+ fail.</p>
+
+ <p>RUN lines are specified in the comments of the test program using the
+ keyword <tt>RUN</tt> followed by a colon, and lastly the command (pipeline)
+ to execute. Together, these lines form the "script" that
+ <tt>llvm-runtests</tt> executes to run the test case. The syntax of the
+ RUN lines is similar to a shell's syntax for pipelines including I/O
+ redirection and variable substitution. However, even though these lines
+ may <i>look</i> like a shell script, they are not. RUN lines are interpreted
+ directly by the Tcl <tt>exec</tt> command. They are never executed by a
+ shell. Consequently the syntax differs from normal shell script syntax in a
+ few ways. You can specify as many RUN lines as needed.</p>
+
+ <p>Each RUN line is executed on its own, distinct from other lines unless
+ its last character is <tt>\</tt>. This continuation character causes the RUN
+ line to be concatenated with the next one. In this way you can build up long
+ pipelines of commands without making huge line lengths. The lines ending in
+ <tt>\</tt> are concatenated until a RUN line that doesn't end in <tt>\</tt> is
+ found. This concatenated set or RUN lines then constitutes one execution.
+ Tcl will substitute variables and arrange for the pipeline to be executed. If
+ any process in the pipeline fails, the entire line (and test case) fails too.
+ </p>
+
+ <p> Below is an example of legal RUN lines in a <tt>.ll</tt> file:</p>
+ <pre>
+ ; RUN: llvm-as &lt; %s | llvm-dis &gt; %t1
+ ; RUN: llvm-dis &lt; %s.bc-13 &gt; %t2
+ ; RUN: diff %t1 %t2
+ </pre>
+
+ <p>As with a Unix shell, the RUN: lines permit pipelines and I/O redirection
+ to be used. However, the usage is slightly different than for Bash. To check
+ what's legal, see the documentation for the
+ <a href="http://www.tcl.tk/man/tcl8.5/TclCmd/exec.htm#M2">Tcl exec</a>
+ command and the
+ <a href="http://www.tcl.tk/man/tcl8.5/tutorial/Tcl26.html">tutorial</a>.
+ The major differences are:</p>
+ <ul>
+ <li>You can't do <tt>2&gt;&amp;1</tt>. That will cause Tcl to write to a
+ file named <tt>&amp;1</tt>. Usually this is done to get stderr to go through
+ a pipe. You can do that in tcl with <tt>|&amp;</tt> so replace this idiom:
+ <tt>... 2&gt;&amp;1 | grep</tt> with <tt>... |&amp; grep</tt></li>
+ <li>You can only redirect to a file, not to another descriptor and not from
+ a here document.</li>
+ <li>tcl supports redirecting to open files with the @ syntax but you
+ shouldn't use that here.</li>
+ </ul>
+
+ <p>There are some quoting rules that you must pay attention to when writing
+ your RUN lines. In general nothing needs to be quoted. Tcl won't strip off any
+ ' or " so they will get passed to the invoked program. For example:</p>
+ <pre>
+ ... | grep 'find this string'
+ </pre>
+ <p>This will fail because the ' characters are passed to grep. This would
+ instruction grep to look for <tt>'find</tt> in the files <tt>this</tt> and
+ <tt>string'</tt>. To avoid this use curly braces to tell Tcl that it should
+ treat everything enclosed as one value. So our example would become:</p>
+ <pre>
+ ... | grep {find this string}
+ </pre>
+ <p>Additionally, the characters <tt>[</tt> and <tt>]</tt> are treated
+ specially by Tcl. They tell Tcl to interpret the content as a command to
+ execute. Since these characters are often used in regular expressions this can
+ have disastrous results and cause the entire test run in a directory to fail.
+ For example, a common idiom is to look for some basicblock number:</p>
+ <pre>
+ ... | grep bb[2-8]
+ </pre>
+ <p>This, however, will cause Tcl to fail because its going to try to execute
+ a program named "2-8". Instead, what you want is this:</p>
+ <pre>
+ ... | grep {bb\[2-8\]}
+ </pre>
+ <p>Finally, if you need to pass the <tt>\</tt> character down to a program,
+ then it must be doubled. This is another Tcl special character. So, suppose
+ you had:
+ <pre>
+ ... | grep 'i32\*'
+ </pre>
+ <p>This will fail to match what you want (a pointer to i32). First, the
+ <tt>'</tt> do not get stripped off. Second, the <tt>\</tt> gets stripped off
+ by Tcl so what grep sees is: <tt>'i32*'</tt>. That's not likely to match
+ anything. To resolve this you must use <tt>\\</tt> and the <tt>{}</tt>, like
+ this:</p>
+ <pre>
+ ... | grep {i32\\*}
+ </pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="dgvars">Vars And Substitutions</a></div>
+<div class="doc_text">
+ <p>With a RUN line there are a number of substitutions that are permitted. In
+ general, any Tcl variable that is available in the <tt>substitute</tt>
+ function (in <tt>test/lib/llvm.exp</tt>) can be substituted into a RUN line.
+ To make a substitution just write the variable's name preceded by a $.
+ Additionally, for compatibility reasons with previous versions of the test
+ library, certain names can be accessed with an alternate syntax: a % prefix.
+ These alternates are deprecated and may go away in a future version.
+ </p>
+ Here are the available variable names. The alternate syntax is listed in
+ parentheses.</p>
+ <dl style="margin-left: 25px">
+ <dt><b>$test</b> (%s)</dt>
+ <dd>The full path to the test case's source. This is suitable for passing
+ on the command line as the input to an llvm tool.</dd>
+ <dt><b>$srcdir</b></dt>
+ <dd>The source directory from where the "<tt>make check</tt>" was run.</dd>
+ <dt><b>objdir</b></dt>
+ <dd>The object directory that corresponds to the </tt>$srcdir</tt>.</dd>
+ <dt><b>subdir</b></dt>
+ <dd>A partial path from the <tt>test</tt> directory that contains the
+ sub-directory that contains the test source being executed.</dd>
+ <dt><b>srcroot</b></dt>
+ <dd>The root directory of the LLVM src tree.</dd>
+ <dt><b>objroot</b></dt>
+ <dd>The root directory of the LLVM object tree. This could be the same
+ as the srcroot.</dd>
+ <dt><b>path</b><dt>
+ <dd>The path to the directory that contains the test case source. This is
+ for locating any supporting files that are not generated by the test, but
+ used by the test.</dd>
+ <dt><b>tmp</b></dt>
+ <dd>The path to a temporary file name that could be used for this test case.
+ The file name won't conflict with other test cases. You can append to it if
+ you need multiple temporaries. This is useful as the destination of some
+ redirected output.</dd>
+ <dt><b>llvmlibsdir</b> (%llvmlibsdir)</dt>
+ <dd>The directory where the LLVM libraries are located.</dd>
+ <dt><b>target_triplet</b> (%target_triplet)</dt>
+ <dd>The target triplet that corresponds to the current host machine (the one
+ running the test cases). This should probably be called "host".<dd>
+ <dt><b>prcontext</b> (%prcontext)</dt>
+ <dd>Path to the prcontext tcl script that prints some context around a
+ line that matches a pattern. This isn't strictly necessary as the test suite
+ is run with its PATH altered to include the test/Scripts directory where
+ the prcontext script is located. Note that this script is similar to
+ <tt>grep -C</tt> but you should use the <tt>prcontext</tt> script because
+ not all platforms support <tt>grep -C</tt>.</dd>
+ <dt><b>llvmgcc</b> (%llvmgcc)</dt>
+ <dd>The full path to the <tt>llvm-gcc</tt> executable as specified in the
+ configured LLVM environment</dd>
+ <dt><b>llvmgxx</b> (%llvmgxx)</dt>
+ <dd>The full path to the <tt>llvm-gxx</tt> executable as specified in the
+ configured LLVM environment</dd>
+ <dt><b>llvmgcc_version</b> (%llvmgcc_version)</dt>
+ <dd>The full version number of the <tt>llvm-gcc</tt> executable.</dd>
+ <dt><b>llvmgccmajvers</b> (%llvmgccmajvers)</dt>
+ <dd>The major version number of the <tt>llvm-gcc</tt> executable.</dd>
+ <dt><b>gccpath</b></dt>
+ <dd>The full path to the C compiler used to <i>build </i> LLVM. Note that
+ this might not be gcc.</dd>
+ <dt><b>gxxpath</b></dt>
+ <dd>The full path to the C++ compiler used to <i>build </i> LLVM. Note that
+ this might not be g++.</dd>
+ <dt><b>compile_c</b> (%compile_c)</dt>
+ <dd>The full command line used to compile LLVM C source code. This has all
+ the configured -I, -D and optimization options.</dd>
+ <dt><b>compile_cxx</b> (%compile_cxx)</dt>
+ <dd>The full command used to compile LLVM C++ source code. This has
+ all the configured -I, -D and optimization options.</dd>
+ <dt><b>link</b> (%link)</dt>
+ <dd>This full link command used to link LLVM executables. This has all the
+ configured -I, -L and -l options.</dd>
+ <dt><b>shlibext</b> (%shlibext)</dt>
+ <dd>The suffix for the host platforms share library (dll) files. This
+ includes the period as the first character.</dd>
+ </dl>
+ <p>To add more variables, two things need to be changed. First, add a line in
+ the <tt>test/Makefile</tt> that creates the <tt>site.exp</tt> file. This will
+ "set" the variable as a global in the site.exp file. Second, in the
+ <tt>test/lib/llvm.exp</tt> file, in the substitute proc, add the variable name
+ to the list of "global" declarations at the beginning of the proc. That's it,
+ the variable can then be used in test scripts.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="dgfeatures">Other Features</a></div>
+<div class="doc_text">
+ <p>To make RUN line writing easier, there are several shell scripts located
+ in the <tt>llvm/test/Scripts</tt> directory. For example:</p>
+ <dl>
+ <dt><b>ignore</b></dt>
+ <dd>This script runs its arguments and then always returns 0. This is useful
+ in cases where the test needs to cause a tool to generate an error (e.g. to
+ check the error output). However, any program in a pipeline that returns a
+ non-zero result will cause the test to fail. This script overcomes that
+ issue and nicely documents that the test case is purposefully ignoring the
+ result code of the tool</dd>
+ <dt><b>not</b></dt>
+ <dd>This script runs its arguments and then inverts the result code from
+ it. Zero result codes become 1. Non-zero result codes become 0. This is
+ useful to invert the result of a grep. For example "not grep X" means
+ succeed only if you don't find X in the input.</dd>
+ </dl>
+
+ <p>Sometimes it is necessary to mark a test case as "expected fail" or XFAIL.
+ You can easily mark a test as XFAIL just by including <tt>XFAIL: </tt> on a
+ line near the top of the file. This signals that the test case should succeed
+ if the test fails. Such test cases are counted separately by DejaGnu. To
+ specify an expected fail, use the XFAIL keyword in the comments of the test
+ program followed by a colon and one or more regular expressions (separated by
+ a comma). The regular expressions allow you to XFAIL the test conditionally
+ by host platform. The regular expressions following the : are matched against
+ the target triplet or llvmgcc version number for the host machine. If there is
+ a match, the test is expected to fail. If not, the test is expected to
+ succeed. To XFAIL everywhere just specify <tt>XFAIL: *</tt>. When matching
+ the llvm-gcc version, you can specify the major (e.g. 3) or full version
+ (i.e. 3.4) number. Here is an example of an <tt>XFAIL</tt> line:</p>
+ <pre>
+ ; XFAIL: darwin,sun,llvmgcc4
+ </pre>
+
+ <p>To make the output more useful, the <tt>llvm_runtest</tt> function wil
+ scan the lines of the test case for ones that contain a pattern that matches
+ PR[0-9]+. This is the syntax for specifying a PR (Problem Report) number that
+ is related to the test case. The numer after "PR" specifies the LLVM bugzilla
+ number. When a PR number is specified, it will be used in the pass/fail
+ reporting. This is useful to quickly get some context when a test fails.</p>
+
+ <p>Finally, any line that contains "END." will cause the special
+ interpretation of lines to terminate. This is generally done right after the
+ last RUN: line. This has two side effects: (a) it prevents special
+ interpretation of lines that are part of the test program, not the
+ instructions to the test case, and (b) it speeds things up for really big test
+ cases by avoiding interpretation of the remainder of the file.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="progstructure"><tt>llvm-test</tt>
+Structure</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>As mentioned previously, the <tt>llvm-test</tt> module provides three types
+of tests: MultiSource, SingleSource, and External. Each tree is then subdivided
+into several categories, including applications, benchmarks, regression tests,
+code that is strange grammatically, etc. These organizations should be
+relatively self explanatory.</p>
+
+<p>In addition to the regular "whole program" tests, the <tt>llvm-test</tt>
+module also provides a mechanism for compiling the programs in different ways.
+If the variable TEST is defined on the gmake command line, the test system will
+include a Makefile named <tt>TEST.&lt;value of TEST variable&gt;.Makefile</tt>.
+This Makefile can modify build rules to yield different results.</p>
+
+<p>For example, the LLVM nightly tester uses <tt>TEST.nightly.Makefile</tt> to
+create the nightly test reports. To run the nightly tests, run <tt>gmake
+TEST=nightly</tt>.</p>
+
+<p>There are several TEST Makefiles available in the tree. Some of them are
+designed for internal LLVM research and will not work outside of the LLVM
+research group. They may still be valuable, however, as a guide to writing your
+own TEST Makefile for any optimization or analysis passes that you develop with
+LLVM.</p>
+
+<p>Note, when configuring the <tt>llvm-test</tt> module, you might want to
+specify the following configuration options:</p>
+<dl>
+ <dt><i>--enable-spec2000</i>
+ <dt><i>--enable-spec2000=&lt;<tt>directory</tt>&gt;</i>
+ <dd>
+ Enable the use of SPEC2000 when testing LLVM. This is disabled by default
+ (unless <tt>configure</tt> finds SPEC2000 installed). By specifying
+ <tt>directory</tt>, you can tell configure where to find the SPEC2000
+ benchmarks. If <tt>directory</tt> is left unspecified, <tt>configure</tt>
+ uses the default value
+ <tt>/home/vadve/shared/benchmarks/speccpu2000/benchspec</tt>.
+ <p>
+ <dt><i>--enable-spec95</i>
+ <dt><i>--enable-spec95=&lt;<tt>directory</tt>&gt;</i>
+ <dd>
+ Enable the use of SPEC95 when testing LLVM. It is similar to the
+ <i>--enable-spec2000</i> option.
+ <p>
+ <dt><i>--enable-povray</i>
+ <dt><i>--enable-povray=&lt;<tt>directory</tt>&gt;</i>
+ <dd>
+ Enable the use of Povray as an external test. Versions of Povray written
+ in C should work. This option is similar to the <i>--enable-spec2000</i>
+ option.
+</dl>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="run">Running the LLVM Tests</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>First, all tests are executed within the LLVM object directory tree. They
+<i>are not</i> executed inside of the LLVM source tree. This is because the
+test suite creates temporary files during execution.</p>
+
+<p>The master Makefile in llvm/test is capable of running only the DejaGNU
+driven tests. By default, it will run all of these tests.</p>
+
+<p>To run only the DejaGNU driven tests, run <tt>gmake</tt> at the
+command line in <tt>llvm/test</tt>. To run a specific directory of tests, use
+the TESTSUITE variable.
+</p>
+
+<p>For example, to run the Regression tests, type
+<tt>gmake TESTSUITE=Regression</tt> in <tt>llvm/tests</tt>.</p>
+
+<p>Note that there are no Makefiles in <tt>llvm/test/Features</tt> and
+<tt>llvm/test/Regression</tt>. You must use DejaGNU from the <tt>llvm/test</tt>
+directory to run them.</p>
+
+<p>To run the <tt>llvm-test</tt> suite, you need to use the following steps:
+</p>
+<ol>
+ <li>cd into the llvm/projects directory</li>
+ <li>check out the <tt>test-suite</tt> module with:<br/>
+ <tt>svn co http://llvm.org/svn/llvm-project/test-suite/trunk llvm-test<br/>
+ This will get the test suite into <tt>llvm/projects/llvm-test</tt></li>
+ <li>configure the test suite. You can do this one of two ways:
+ <ol>
+ <li>Use the regular llvm configure:<br/>
+ <tt>cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure</tt><br/>
+ This will ensure that the <tt>projects/llvm-test</tt> directory is also
+ properly configured.</li>
+ <li>Use the <tt>configure</tt> script found in the <tt>llvm-test</tt> source
+ directory:<br/>
+ <tt>$LLVM_SRC_ROOT/projects/llvm-test/configure
+ --with-llvmsrc=$LLVM_SRC_ROOT --with-llvmobj=$LLVM_OBJ_ROOT</tt>
+ </li>
+ </ol>
+ <li>gmake</li>
+</ol>
+<p>Note that the second and third steps only need to be done once. After you
+have the suite checked out and configured, you don't need to do it again (unless
+the test code or configure script changes).</p>
+
+<p>To make a specialized test (use one of the
+<tt>llvm-test/TEST.&lt;type&gt;.Makefile</tt>s), just run:<br/>
+<tt>gmake TEST=&lt;type&gt; test</tt><br/>For example, you could run the
+nightly tester tests using the following commands:</p>
+
+<pre>
+ % cd llvm/projects/llvm-test
+ % gmake TEST=nightly test
+</pre>
+
+<p>Regardless of which test you're running, the results are printed on standard
+output and standard error. You can redirect these results to a file if you
+choose.</p>
+
+<p>Some tests are known to fail. Some are bugs that we have not fixed yet;
+others are features that we haven't added yet (or may never add). In DejaGNU,
+the result for such tests will be XFAIL (eXpected FAILure). In this way, you
+can tell the difference between an expected and unexpected failure.</p>
+
+<p>The tests in <tt>llvm-test</tt> have no such feature at this time. If the
+test passes, only warnings and other miscellaneous output will be generated. If
+a test fails, a large &lt;program&gt; FAILED message will be displayed. This
+will help you separate benign warnings from actual test failures.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+<a name="customtest">Writing custom tests for llvm-test</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+
+<p>Assuming you can run llvm-test, (e.g. "<tt>gmake TEST=nightly report</tt>"
+should work), it is really easy to run optimizations or code generator
+components against every program in the tree, collecting statistics or running
+custom checks for correctness. At base, this is how the nightly tester works,
+it's just one example of a general framework.</p>
+
+<p>Lets say that you have an LLVM optimization pass, and you want to see how
+many times it triggers. First thing you should do is add an LLVM
+<a href="ProgrammersManual.html#Statistic">statistic</a> to your pass, which
+will tally counts of things you care about.</p>
+
+<p>Following this, you can set up a test and a report that collects these and
+formats them for easy viewing. This consists of two files, an
+"<tt>llvm-test/TEST.XXX.Makefile</tt>" fragment (where XXX is the name of your
+test) and an "<tt>llvm-test/TEST.XXX.report</tt>" file that indicates how to
+format the output into a table. There are many example reports of various
+levels of sophistication included with llvm-test, and the framework is very
+general.</p>
+
+<p>If you are interested in testing an optimization pass, check out the
+"libcalls" test as an example. It can be run like this:<p>
+
+<div class="doc_code">
+<pre>
+% cd llvm/projects/llvm-test/MultiSource/Benchmarks # or some other level
+% make TEST=libcalls report
+</pre>
+</div>
+
+<p>This will do a bunch of stuff, then eventually print a table like this:</p>
+
+<div class="doc_code">
+<pre>
+Name | total | #exit |
+...
+FreeBench/analyzer/analyzer | 51 | 6 |
+FreeBench/fourinarow/fourinarow | 1 | 1 |
+FreeBench/neural/neural | 19 | 9 |
+FreeBench/pifft/pifft | 5 | 3 |
+MallocBench/cfrac/cfrac | 1 | * |
+MallocBench/espresso/espresso | 52 | 12 |
+MallocBench/gs/gs | 4 | * |
+Prolangs-C/TimberWolfMC/timberwolfmc | 302 | * |
+Prolangs-C/agrep/agrep | 33 | 12 |
+Prolangs-C/allroots/allroots | * | * |
+Prolangs-C/assembler/assembler | 47 | * |
+Prolangs-C/bison/mybison | 74 | * |
+...
+</pre>
+</div>
+
+<p>This basically is grepping the -stats output and displaying it in a table.
+You can also use the "TEST=libcalls report.html" target to get the table in HTML
+form, similarly for report.csv and report.tex.</p>
+
+<p>The source for this is in llvm-test/TEST.libcalls.*. The format is pretty
+simple: the Makefile indicates how to run the test (in this case,
+"<tt>opt -simplify-libcalls -stats</tt>"), and the report contains one line for
+each column of the output. The first value is the header for the column and the
+second is the regex to grep the output of the command for. There are lots of
+example reports that can do fancy stuff.</p>
+
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="nightly">Running the nightly tester</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>
+The <a href="http://llvm.org/nightlytest/">LLVM Nightly Testers</a>
+automatically check out an LLVM tree, build it, run the "nightly"
+program test (described above), run all of the feature and regression tests,
+delete the checked out tree, and then submit the results to
+<a href="http://llvm.org/nightlytest/">http://llvm.org/nightlytest/</a>.
+After test results are submitted to
+<a href="http://llvm.org/nightlytest/">http://llvm.org/nightlytest/</a>,
+they are processed and displayed on the tests page. An email to
+<a href="http://lists.cs.uiuc.edu/pipermail/llvm-testresults/">
+llvm-testresults@cs.uiuc.edu</a> summarizing the results is also generated.
+This testing scheme is designed to ensure that programs don't break as well
+as keep track of LLVM's progress over time.</p>
+
+<p>If you'd like to set up an instance of the nightly tester to run on your
+machine, take a look at the comments at the top of the
+<tt>utils/NewNightlyTest.pl</tt> file. If you decide to set up a nightly tester
+please choose a unique nickname and invoke <tt>utils/NewNightlyTest.pl</tt>
+with the "-nickname [yournickname]" command line option.
+
+<p>You can create a shell script to encapsulate the running of the script.
+The optimized x86 Linux nightly test is run from just such a script:</p>
+
+<div class="doc_code">
+<pre>
+#!/bin/bash
+BASE=/proj/work/llvm/nightlytest
+export BUILDDIR=$BASE/build
+export WEBDIR=$BASE/testresults
+export LLVMGCCDIR=/proj/work/llvm/cfrontend/install
+export PATH=/proj/install/bin:$LLVMGCCDIR/bin:$PATH
+export LD_LIBRARY_PATH=/proj/install/lib
+cd $BASE
+cp /proj/work/llvm/llvm/utils/NewNightlyTest.pl .
+nice ./NewNightlyTest.pl -nice -release -verbose -parallel -enable-linscan \
+ -nickname NightlyTester -noexternals &gt; output.log 2&gt;&amp;1
+</pre>
+</div>
+
+<p>It is also possible to specify the the location your nightly test results
+are submitted. You can do this by passing the command line option
+"-submit-server [server_address]" and "-submit-script [script_on_server]" to
+<tt>utils/NewNightlyTest.pl</tt>. For example, to submit to the llvm.org
+nightly test results page, you would invoke the nightly test script with
+"-submit-server llvm.org -submit-script /nightlytest/NightlyTestAccept.cgi".
+If these options are not specified, the nightly test script sends the results
+to the llvm.org nightly test results page.</p>
+
+<p>Take a look at the <tt>NewNightlyTest.pl</tt> file to see what all of the
+flags and strings do. If you start running the nightly tests, please let us
+know. Thanks!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ John T. Criswell, Reid Spencer, and Tanya Lattner<br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br/>
+ Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/UsingLibraries.html b/docs/UsingLibraries.html
new file mode 100644
index 0000000..af57907
--- /dev/null
+++ b/docs/UsingLibraries.html
@@ -0,0 +1,449 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>Using The LLVM Libraries</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+<div class="doc_title">Using The LLVM Libraries</div>
+<ol>
+ <li><a href="#abstract">Abstract</a></li>
+ <li><a href="#introduction">Introduction</a></li>
+ <li><a href="#descriptions">Library Descriptions</a></li>
+ <li><a href="#dependencies">Library Dependencies</a></li>
+ <li><a href="#rot">Linkage Rules Of Thumb</a>
+ <ol>
+ <li><a href="#always">Always link LLVMCore, LLVMSupport, LLVMSystem</a>
+ <li><a href="#onlyone">Never link both archive and re-linked</a>
+ </ol>
+ </li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
+</div>
+
+<p class="doc_warning">Warning: This document is out of date, please see <a href="CommandGuide/html/llvm-config.html">llvm-config</a> for more information.</p>
+
+<!-- ======================================================================= -->
+<div class="doc_section"><a name="abstract">Abstract</a></div>
+<div class="doc_text">
+ <p>Amongst other things, LLVM is a toolkit for building compilers, linkers,
+ runtime executives, virtual machines, and other program execution related
+ tools. In addition to the LLVM tool set, the functionality of LLVM is
+ available through a set of libraries. To use LLVM as a toolkit for
+ constructing tools, a developer needs to understand what is contained in the
+ various libraries, what they depend on, and how to use them. Fortunately,
+ there is a tool, <tt>llvm-config</tt> to aid with this. This document
+ describes the contents of the libraries and how to use <tt>llvm-config</tt>
+ to generate command line options.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="introduction">Introduction</a></div>
+<div class="doc_text">
+ <p>If you're writing a compiler, virtual machine, or any other utility based
+ on LLVM, you'll need to figure out which of the many libraries files you will
+ need to link with to be successful. An understanding of the contents of these
+ libraries will be useful in coming up with an optimal specification for the
+ libraries to link with. The purpose of this document is to reduce some of
+ the trial and error that the author experienced in using LLVM.</p>
+ <p>LLVM produces two types of libraries: archives (ending in <tt>.a</tt>) and
+ objects (ending in <tt>.o</tt>). However, both are libraries. Libraries ending
+ in <tt>.o</tt> are known as re-linked libraries because they contain all the
+ compilation units of the library linked together as a single <tt>.o</tt> file.
+ Furthermore, several of the libraries have <em>both</em> forms of library. The
+ re-linked libraries are used whenever you want to include all symbols from the
+ library. The archive libraries are used whenever you want to only resolve
+ outstanding symbols at that point in the link without including everything in
+ the library. </p>
+ <p>If you're using the LLVM Makefile system to link your tools,you will use
+ the <tt>LLVMLIBS</tt> make variable.
+ (see the <a href="MakefileGuide.html#LLVMLIBS">Makefile Guide</a> for
+ details). This variable specifies which LLVM libraries to link into your tool
+ and the order in which they will be linked. You specify re-linked libraries by
+ naming the library without a suffix. You specify archive libraries by naming
+ the library with a <tt>.a</tt> suffix but without the <tt>lib</tt> prefix. The
+ order in which the libraries appear in the <tt>LLVMLIBS</tt> variable
+ definition is the order in which they will be linked. Getting this order
+ correct for your tool can sometimes be challenging.
+</div>
+<!-- ======================================================================= -->
+<div class="doc_section"><a name="descriptions"></a>Library Descriptions</div>
+<div class="doc_text">
+ <p>The table below categorizes each library
+<table style="text-align:left">
+ <tr><th>Library</th><th>Forms</th><th>Description</th></tr>
+ <tr><th colspan="3">Core Libraries</th></tr>
+ <tr><td>LLVMArchive</td><td><tt>.a</tt></td>
+ <td>LLVM archive reading and writing</td></tr>
+ <tr><td>LLVMAsmParser</td><td><tt>.a</tt></td>
+ <td>LLVM assembly parsing</td></tr>
+ <tr><td>LLVMBCReader</td><td><tt>.a</tt></td>
+ <td>LLVM bitcode reading</td></tr>
+ <tr><td>LLVMBCWriter</td><td><tt>.a</tt></td>
+ <td>LLVM bitcode writing</td></tr>
+ <tr><td>LLVMCore</td><td><tt>.a</tt></td>
+ <td>LLVM core intermediate representation</td></tr>
+ <tr><td>LLVMDebugger</td><td><tt>.a</tt></td>
+ <td>Source level debugging support</td></tr>
+ <tr><td>LLVMLinker</td><td><tt>.a</tt></td>
+ <td>Bitcode and archive linking interface</td></tr>
+ <tr><td>LLVMSupport</td><td><tt>.a</tt></td>
+ <td>General support utilities</td></tr>
+ <tr><td>LLVMSystem</td><td><tt>.a</tt></td>
+ <td>Operating system abstraction layer</td></tr>
+ <tr><td>LLVMbzip2</td><td><tt>.a</tt></td>
+ <td>BZip2 compression library</td></tr>
+
+ <tr><th colspan="3">Analysis Libraries</th></tr>
+ <tr><td>LLVMAnalysis</td><td><tt>.a</tt></td>
+ <td>Various analysis passes.</td></tr>
+ <tr><td>LLVMDataStructure</td><td><tt>.o</tt></td>
+ <td>Data structure analysis passes.</td></tr>
+ <tr><td>LLVMipa</td><td><tt>.a</tt></td>
+ <td>Inter-procedural analysis passes.</td></tr>
+
+ <tr><th colspan="3">Transformation Libraries</th></tr>
+ <tr><td>LLVMInstrumentation</td><td><tt>.a</tt></td>
+ <td>Instrumentation passes.</td></tr>
+ <tr><td>LLVMipo</td><td><tt>.a</tt></td>
+ <td>All inter-procedural optimization passes.</td></tr>
+ <tr><td>LLVMScalarOpts</td><td><tt>.a</tt></td>
+ <td>All scalar optimization passes.</td></tr>
+ <tr><td>LLVMTransformUtils</td><td><tt>.a</tt></td>
+ <td>Transformation utilities used by many passes.</td></tr>
+
+ <tr><th colspan="3">Code Generation Libraries </th></tr>
+ <tr><td>LLVMCodeGen</td><td><tt>.o</tt></td>
+ <td>Native code generation infrastructure</td></tr>
+ <tr><td>LLVMSelectionDAG</td><td><tt>.o</tt></td>
+ <td>Aggressive instruction selector for directed acyclic graphs</td></tr>
+
+ <tr><th colspan="3">Target Libraries</th></tr>
+ <tr><td>LLVMAlpha</td><td><tt>.o</tt></td>
+ <td>Code generation for Alpha architecture</td></tr>
+ <tr><td>LLVMARM</td><td><tt>.o</tt></td>
+ <td>Code generation for ARM architecture</td></tr>
+ <tr><td>LLVMCBackend</td><td><tt>.o</tt></td>
+ <td>'C' language code generator.</td></tr>
+ <tr><td>LLVMIA64</td><td><tt>.o</tt></td>
+ <td>Code generation for IA64 architecture</td></tr>
+ <tr><td>LLVMPowerPC</td><td><tt>.o</tt></td>
+ <td>Code generation for PowerPC architecture</td></tr>
+ <tr><td>LLVMSparc</td><td><tt>.o</tt></td>
+ <td>Code generation for Sparc architecture</td></tr>
+ <tr><td>LLVMTarget</td><td><tt>.a</tt></td>
+ <td>Generic code generation utilities.</td></tr>
+ <tr><td>LLVMX86</td><td><tt>.o</tt></td>
+ <td>Code generation for Intel x86 architecture</td></tr>
+
+ <tr><th colspan="3">Runtime Libraries</th></tr>
+ <tr><td>LLVMInterpreter</td><td><tt>.o</tt></td>
+ <td>Bitcode Interpreter</td></tr>
+ <tr><td>LLVMJIT</td><td><tt>.o</tt></td>
+ <td>Bitcode JIT Compiler</td></tr>
+ <tr><td>LLVMExecutionEngine</td><td><tt>.o</tt></td>
+ <td>Virtual machine engine</td></tr>
+</table>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"><a name="dependencies"></a>Using llvm-config</div>
+<div class="doc_text">
+ <p>The <tt>llvm-config</tt> tool is a perl script that produces on its output
+ various kinds of information. For example, the source or object directories
+ used to build LLVM can be accessed by passing options to <tt>llvm-config</tt>.
+ For complete details on this tool, please see the
+ <a href="CommandGuide/html/llvm-config.html">manual page</a>.</p>
+ <p>To understand the relationships between libraries, the <tt>llvm-config</tt>
+ can be very useful. If all you know is that you want certain libraries to
+ be available, you can generate the complete set of libraries to link with
+ using one of four options, as below:</p>
+ <ol>
+ <li><tt>--ldflags</tt>. This generates the command line options necessary to
+ be passed to the <tt>ld</tt> tool in order to link with LLVM. Most notably,
+ the <tt>-L</tt> option is provided to specify a library search directory
+ that contains the LLVM libraries.</li>
+ <li><tt>--libs</tt>. This generates command line options suitable for
+ use with a gcc-style linker. That is, libraries are given with a -l option
+ and object files are given with a full path.</li>
+ <li><tt>--libnames</tt>. This generates a list of just the library file
+ names. If you know the directory in which these files reside (see --ldflags)
+ then you can find the libraries there.</li>
+ <li><tt>--libfiles</tt>. This generates the full path names of the
+ LLVM library files.</li>
+ </ol>
+ <p>If you wish to delve further into how <tt>llvm-config</tt> generates the
+ correct order (based on library dependencies), please see the tool named
+ <tt>GenLibDeps.pl</tt> in the <tt>utils</tt> source directory of LLVM.</p>
+
+ <!-- =======NOTE: =========================================================-->
+ <!-- === The following graphs and <dl> list are generated automatically ===-->
+ <!-- === by the util named GenLibDeps.pl in the llvm/utils directory. ===-->
+ <!-- === This should be updated whenever new libraries are added, ===-->
+ <!-- === removed, or changed ===-->
+ <!-- =======NOTE: =========================================================-->
+ <h2>Dependency Relationships Of Libraries</h2>
+ <p>This graph shows the dependency of archive libraries on other archive
+ libraries or objects. Where a library has both archive and object forms, only
+ the archive form is shown.</p>
+ <img src="img/libdeps.gif" alt="Library Dependencies"/>
+ <h2>Dependency Relationships Of Object Files</h2>
+ <p>This graph shows the dependency of object files on archive libraries or
+ other objects. Where a library has both object and archive forms, only the
+ dependency to the archive form is shown.</p>
+ <img src="img/objdeps.gif" alt="Object File Dependencies"/>
+ <p>The following list shows the dependency relationships between libraries in
+ textual form. The information is the same as shown on the graphs but arranged
+ alphabetically.</p>
+<dl>
+ <dt><b>libLLVMAnalysis.a</b</dt><dd><ul>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ </ul></dd>
+ <dt><b>libLLVMArchive.a</b</dt><dd><ul>
+ <li>libLLVMBCReader.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ </ul></dd>
+ <dt><b>libLLVMAsmParser.a</b</dt><dd><ul>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSystem.a</li>
+ </ul></dd>
+ <dt><b>libLLVMBCReader.a</b</dt><dd><ul>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ </ul></dd>
+ <dt><b>libLLVMBCWriter.a</b</dt><dd><ul>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ </ul></dd>
+ <dt><b>libLLVMCodeGen.a</b</dt><dd><ul>
+ <li>libLLVMAnalysis.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMScalarOpts.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ <li>libLLVMTransformUtils.a</li>
+ </ul></dd>
+ <dt><b>libLLVMCore.a</b</dt><dd><ul>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ </ul></dd>
+ <dt><b>libLLVMDebugger.a</b</dt><dd><ul>
+ <li>libLLVMBCReader.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ </ul></dd>
+ <dt><b>libLLVMInstrumentation.a</b</dt><dd><ul>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMScalarOpts.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMTransformUtils.a</li>
+ </ul></dd>
+ <dt><b>libLLVMLinker.a</b</dt><dd><ul>
+ <li>libLLVMArchive.a</li>
+ <li>libLLVMBCReader.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ </ul></dd>
+ <dt><b>libLLVMScalarOpts.a</b</dt><dd><ul>
+ <li>libLLVMAnalysis.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ <li>libLLVMTransformUtils.a</li>
+ </ul></dd>
+ <dt><b>libLLVMSelectionDAG.a</b</dt><dd><ul>
+ <li>libLLVMAnalysis.a</li>
+ <li>libLLVMCodeGen.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ <li>libLLVMTransformUtils.a</li>
+ </ul></dd>
+ <dt><b>libLLVMSupport.a</b</dt><dd><ul>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMbzip2.a</li>
+ </ul></dd>
+ <dt><b>libLLVMSystem.a</b</dt><dd><ul>
+ </ul></dd>
+ <dt><b>libLLVMTarget.a</b</dt><dd><ul>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ </ul></dd>
+ <dt><b>libLLVMTransformUtils.a</b</dt><dd><ul>
+ <li>libLLVMAnalysis.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ <li>libLLVMipa.a</li>
+ </ul></dd>
+ <dt><b>libLLVMbzip2.a</b</dt><dd><ul>
+ </ul></dd>
+ <dt><b>libLLVMipa.a</b</dt><dd><ul>
+ <li>libLLVMAnalysis.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ </ul></dd>
+ <dt><b>libLLVMipo.a</b</dt><dd><ul>
+ <li>libLLVMAnalysis.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ <li>libLLVMTransformUtils.a</li>
+ <li>libLLVMipa.a</li>
+ </ul></dd>
+ <dt><b>libLLVMlto.a</b</dt><dd><ul>
+ <li>libLLVMAnalysis.a</li>
+ <li>libLLVMBCReader.a</li>
+ <li>libLLVMBCWriter.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMLinker.a</li>
+ <li>libLLVMScalarOpts.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ <li>libLLVMipa.a</li>
+ <li>libLLVMipo.a</li>
+ </ul></dd>
+ <dt><b>LLVMARM.o</b</dt><dd><ul>
+ <li>libLLVMCodeGen.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSelectionDAG.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ </ul></dd>
+ <dt><b>LLVMAlpha.o</b</dt><dd><ul>
+ <li>libLLVMCodeGen.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSelectionDAG.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ </ul></dd>
+ <dt><b>LLVMCBackend.o</b</dt><dd><ul>
+ <li>libLLVMAnalysis.a</li>
+ <li>libLLVMCodeGen.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMScalarOpts.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ <li>libLLVMTransformUtils.a</li>
+ <li>libLLVMipa.a</li>
+ </ul></dd>
+ <dt><b>LLVMExecutionEngine.o</b</dt><dd><ul>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ </ul></dd>
+ <dt><b>LLVMIA64.o</b</dt><dd><ul>
+ <li>libLLVMCodeGen.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSelectionDAG.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ </ul></dd>
+ <dt><b>LLVMInterpreter.o</b</dt><dd><ul>
+ <li>LLVMExecutionEngine.o</li>
+ <li>libLLVMCodeGen.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ </ul></dd>
+ <dt><b>LLVMJIT.o</b</dt><dd><ul>
+ <li>LLVMExecutionEngine.o</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ </ul></dd>
+ <dt><b>LLVMPowerPC.o</b</dt><dd><ul>
+ <li>libLLVMCodeGen.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSelectionDAG.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ </ul></dd>
+ <dt><b>LLVMSparc.o</b</dt><dd><ul>
+ <li>libLLVMCodeGen.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSelectionDAG.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ </ul></dd>
+ <dt><b>LLVMX86.o</b</dt><dd><ul>
+ <li>libLLVMCodeGen.a</li>
+ <li>libLLVMCore.a</li>
+ <li>libLLVMSelectionDAG.a</li>
+ <li>libLLVMSupport.a</li>
+ <li>libLLVMSystem.a</li>
+ <li>libLLVMTarget.a</li>
+ </ul></dd>
+</dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"><a name="rot">Linkage Rules Of Thumb</a></div>
+<div class="doc_text">
+ <p>This section contains various "rules of thumb" about what files you
+ should link into your programs.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="always">Always Link LLVMCore, LLVMSupport,
+ and LLVMSystem</a></div>
+<div class="doc_text">
+ <p>No matter what you do with LLVM, the last three entries in the value of
+ your LLVMLIBS make variable should always be:
+ <tt>LLVMCore LLVMSupport.a LLVMSystem.a</tt>. There are no <tt>LLVM</tt>
+ programs that don't depend on these three.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="onlyone">Never link both archive and
+ re-linked library</a></div>
+<div class="doc_text">
+ <p>There is never any point to linking both the re-linked (<tt>.o</tt>) and
+ the archive (<tt>.a</tt>) versions of a library. Since the re-linked version
+ includes the entire library, the archive version will not resolve any symbols.
+ You could even end up with link error if you place the archive version before
+ the re-linked version on the linker's command line.</p>
+</div>
+<!-- ======================================================================= -->
+<hr>
+<div class="doc_footer">
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"/></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+ <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>
+</address>
+<a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+<br>Last modified: $Date$ </div>
+</body>
+</html>
+<!-- vim: sw=2 ts=2 ai
+-->
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
new file mode 100644
index 0000000..8826ee7
--- /dev/null
+++ b/docs/WritingAnLLVMBackend.html
@@ -0,0 +1,260 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>Writing an LLVM backend</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">
+ Writing an LLVM backend
+</div>
+
+<ol>
+ <li><a href="#intro">Introduction</a>
+ <li><a href="#backends">Writing a backend</a>
+ <ol>
+ <li><a href="#machine">Machine backends</a>
+ <ol>
+ <li><a href="#machineTOC">Outline</a></li>
+ <li><a href="#machineDetails">Implementation details</a></li>
+ </ol></li>
+ <li><a href="#lang">Language backends</a></li>
+ </ol></li>
+ <li><a href="#related">Related reading material</a>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="http://misha.brukman.net">Misha Brukman</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="intro">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document describes techniques for writing backends for LLVM which
+convert the LLVM representation to machine assembly code or other languages.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="backends">Writing a backend</a>
+</div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="machine">Machine backends</a>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="machineTOC">Outline</a>
+</div>
+
+<div class="doc_text">
+
+<p>In general, you want to follow the format of SPARC, X86 or PowerPC (in
+<tt>lib/Target</tt>). SPARC is the simplest backend, and is RISC, so if
+you're working on a RISC target, it is a good one to start with.</p>
+
+<p>To create a static compiler (one that emits text assembly), you need to
+implement the following:</p>
+
+<ul>
+<li>Describe the register set.
+ <ul>
+ <li>Create a <a href="TableGenFundamentals.html">TableGen</a> description of
+ the register set and register classes</li>
+ <li>Implement a subclass of <tt><a
+ href="CodeGenerator.html#mregisterinfo">MRegisterInfo</a></tt></li>
+ </ul></li>
+<li>Describe the instruction set.
+ <ul>
+ <li>Create a <a href="TableGenFundamentals.html">TableGen</a> description of
+ the instruction set</li>
+ <li>Implement a subclass of <tt><a
+ href="CodeGenerator.html#targetinstrinfo">TargetInstrInfo</a></tt></li>
+ </ul></li>
+<li>Describe the target machine.
+ <ul>
+ <li>Create a <a href="TableGenFundamentals.html">TableGen</a> description of
+ the target that describes the pointer size and references the instruction
+ set</li>
+ <li>Implement a subclass of <tt><a
+ href="CodeGenerator.html#targetmachine">TargetMachine</a></tt>, which
+ configures <tt><a href="CodeGenerator.html#targetdata">TargetData</a></tt>
+ correctly</li>
+ <li>Register your new target using the <tt>RegisterTarget</tt>
+ template:<br><br>
+<div class="doc_code"><pre>
+RegisterTarget&lt;<em>MyTargetMachine</em>&gt; M("short_name", " Target name");
+</pre></div>
+ <br>Here, <em>MyTargetMachine</em> is the name of your implemented
+ subclass of <tt><a
+ href="CodeGenerator.html#targetmachine">TargetMachine</a></tt>,
+ <em>short_name</em> is the option that will be active following
+ <tt>-march=</tt> to select a target in llc and lli, and the last string
+ is the description of your target to appear in <tt>-help</tt>
+ listing.</li>
+ </ul></li>
+<li>Implement the assembly printer for the architecture.
+ <ul>
+ <li>Define all of the assembly strings for your target, adding them to the
+ instructions in your *InstrInfo.td file.</li>
+ <li>Implement the <tt>llvm::AsmPrinter</tt> interface.</li>
+ </ul>
+</li>
+<li>Implement an instruction selector for the architecture.
+ <ul>
+ <li>The recommended method is the <a href="CodeGenerator.html#instselect">
+ pattern-matching DAG-to-DAG instruction selector</a> (for example, see
+ the PowerPC backend in PPCISelDAGtoDAG.cpp). Parts of instruction
+ selector creation can be performed by adding patterns to the instructions
+ in your <tt>.td</tt> file.</li>
+ </ul>
+</li>
+<li>Optionally, add subtarget support.
+<ul>
+ <li>If your target has multiple subtargets (e.g. variants with different
+ capabilities), implement the <tt>llvm::TargetSubtarget</tt> interface
+ for your architecture. This allows you to add <tt>-mcpu=</tt> and
+ <tt>-mattr=</tt> options.</li>
+</ul>
+<li>Optionally, add JIT support.
+ <ul>
+ <li>Create a subclass of <tt><a
+ href="CodeGenerator.html#targetjitinfo">TargetJITInfo</a></tt></li>
+ <li>Create a machine code emitter that will be used to emit binary code
+ directly into memory, given <tt>MachineInstr</tt>s</li>
+ </ul>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="machineDetails">Implementation details</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+
+<li><p><b>TableGen register info description</b> - describe a class which
+will store the register's number in the binary encoding of the instruction
+(e.g., for JIT purposes).</p>
+
+<p>You also need to define register classes to contain these registers, such as
+the integer register class and floating-point register class, so that you can
+allocate virtual registers to instructions from these sets, and let the
+target-independent register allocator automatically choose the actual
+architected registers.</p>
+
+<div class="doc_code">
+<pre>
+// class Register is defined in Target.td
+<b>class</b> <em>Target</em>Reg&lt;string name&gt; : Register&lt;name&gt; {
+ <b>let</b> Namespace = "<em>Target</em>";
+}
+
+<b>class</b> IntReg&lt;<b>bits</b>&lt;5&gt; num, string name&gt; : <em>Target</em>Reg&lt;name&gt; {
+ <b>field</b> <b>bits</b>&lt;5&gt; Num = num;
+}
+
+<b>def</b> R0 : IntReg&lt;0, "%R0"&gt;;
+...
+
+// class RegisterClass is defined in Target.td
+<b>def</b> IReg : RegisterClass&lt;i64, 64, [R0, ... ]&gt;;
+</pre>
+</div>
+</li>
+
+<li><p><b>TableGen instruction info description</b> - break up instructions into
+classes, usually that's already done by the manufacturer (see instruction
+manual). Define a class for each instruction category. Define each opcode as a
+subclass of the category, with appropriate parameters such as the fixed binary
+encoding of opcodes and extended opcodes, and map the register bits to the bits
+of the instruction which they are encoded in (for the JIT). Also specify how
+the instruction should be printed so it can use the automatic assembly printer,
+e.g.:</p>
+
+<div class="doc_code">
+<pre>
+// class Instruction is defined in Target.td
+<b>class</b> Form&lt;<b>bits</b>&lt;6&gt; opcode, <b>dag</b> OL, <b>string</b> asmstr&gt; : Instruction {
+ <b>field</b> <b>bits</b>&lt;42&gt; Inst;
+
+ <b>let</b> Namespace = "<em>Target</em>";
+ <b>let</b> Inst{0-6} = opcode;
+ <b>let</b> OperandList = OL;
+ <b>let</b> AsmString = asmstr;
+}
+
+<b>def</b> ADD : Form&lt;42, (ops IReg:$rD, IReg:$rA, IReg:$rB), "add $rD, $rA, $rB"&gt;;
+</pre>
+</div>
+</li>
+
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="lang">Language backends</a>
+</div>
+
+<div class="doc_text">
+
+<p>For now, just take a look at <tt>lib/Target/CBackend</tt> for an example of
+how the C backend is written.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="related">Related reading material</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<ul>
+<li><a href="CodeGenerator.html">Code generator</a> -
+ describes some of the classes in code generation at a high level, but
+ it is not (yet) complete</li>
+<li><a href="TableGenFundamentals.html">TableGen fundamentals</a> -
+ describes how to use TableGen to describe your target information
+ succinctly</li>
+<li><a href="HowToSubmitABug.html#codegen">Debugging code generation with
+ bugpoint</a> - shows bugpoint usage scenarios to simplify backend
+ development</li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="http://misha.brukman.net">Misha Brukman</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+ <br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html
new file mode 100644
index 0000000..c967000
--- /dev/null
+++ b/docs/WritingAnLLVMPass.html
@@ -0,0 +1,1817 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <title>Writing an LLVM Pass</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+ Writing an LLVM Pass
+</div>
+
+<ol>
+ <li><a href="#introduction">Introduction - What is a pass?</a></li>
+ <li><a href="#quickstart">Quick Start - Writing hello world</a>
+ <ul>
+ <li><a href="#makefile">Setting up the build environment</a></li>
+ <li><a href="#basiccode">Basic code required</a></li>
+ <li><a href="#running">Running a pass with <tt>opt</tt></a></li>
+ </ul></li>
+ <li><a href="#passtype">Pass classes and requirements</a>
+ <ul>
+ <li><a href="#ImmutablePass">The <tt>ImmutablePass</tt> class</a></li>
+ <li><a href="#ModulePass">The <tt>ModulePass</tt> class</a>
+ <ul>
+ <li><a href="#runOnModule">The <tt>runOnModule</tt> method</a></li>
+ </ul></li>
+ <li><a href="#CallGraphSCCPass">The <tt>CallGraphSCCPass</tt> class</a>
+ <ul>
+ <li><a href="#doInitialization_scc">The <tt>doInitialization(CallGraph
+ &amp;)</tt> method</a></li>
+ <li><a href="#runOnSCC">The <tt>runOnSCC</tt> method</a></li>
+ <li><a href="#doFinalization_scc">The <tt>doFinalization(CallGraph
+ &amp;)</tt> method</a></li>
+ </ul></li>
+ <li><a href="#FunctionPass">The <tt>FunctionPass</tt> class</a>
+ <ul>
+ <li><a href="#doInitialization_mod">The <tt>doInitialization(Module
+ &amp;)</tt> method</a></li>
+ <li><a href="#runOnFunction">The <tt>runOnFunction</tt> method</a></li>
+ <li><a href="#doFinalization_mod">The <tt>doFinalization(Module
+ &amp;)</tt> method</a></li>
+ </ul></li>
+ <li><a href="#LoopPass">The <tt>LoopPass</tt> class</a>
+ <ul>
+ <li><a href="#doInitialization_loop">The <tt>doInitialization(Loop *,
+ LPPassManager &amp;)</tt> method</a></li>
+ <li><a href="#runOnLoop">The <tt>runOnLoop</tt> method</a></li>
+ <li><a href="#doFinalization_loop">The <tt>doFinalization()
+ </tt> method</a></li>
+ </ul></li>
+ <li><a href="#BasicBlockPass">The <tt>BasicBlockPass</tt> class</a>
+ <ul>
+ <li><a href="#doInitialization_fn">The <tt>doInitialization(Function
+ &amp;)</tt> method</a></li>
+ <li><a href="#runOnBasicBlock">The <tt>runOnBasicBlock</tt>
+ method</a></li>
+ <li><a href="#doFinalization_fn">The <tt>doFinalization(Function
+ &amp;)</tt> method</a></li>
+ </ul></li>
+ <li><a href="#MachineFunctionPass">The <tt>MachineFunctionPass</tt>
+ class</a>
+ <ul>
+ <li><a href="#runOnMachineFunction">The
+ <tt>runOnMachineFunction(MachineFunction &amp;)</tt> method</a></li>
+ </ul></li>
+ </ul>
+ <li><a href="#registration">Pass Registration</a>
+ <ul>
+ <li><a href="#print">The <tt>print</tt> method</a></li>
+ </ul></li>
+ <li><a href="#interaction">Specifying interactions between passes</a>
+ <ul>
+ <li><a href="#getAnalysisUsage">The <tt>getAnalysisUsage</tt>
+ method</a></li>
+ <li><a href="#AU::addRequired">The <tt>AnalysisUsage::addRequired&lt;&gt;</tt> and <tt>AnalysisUsage::addRequiredTransitive&lt;&gt;</tt> methods</a></li>
+ <li><a href="#AU::addPreserved">The <tt>AnalysisUsage::addPreserved&lt;&gt;</tt> method</a></li>
+ <li><a href="#AU::examples">Example implementations of <tt>getAnalysisUsage</tt></a></li>
+ <li><a href="#getAnalysis">The <tt>getAnalysis&lt;&gt;</tt> and <tt>getAnalysisToUpdate&lt;&gt;</tt> methods</a></li>
+ </ul></li>
+ <li><a href="#analysisgroup">Implementing Analysis Groups</a>
+ <ul>
+ <li><a href="#agconcepts">Analysis Group Concepts</a></li>
+ <li><a href="#registerag">Using <tt>RegisterAnalysisGroup</tt></a></li>
+ </ul></li>
+ <li><a href="#passStatistics">Pass Statistics</a>
+ <li><a href="#passmanager">What PassManager does</a>
+ <ul>
+ <li><a href="#releaseMemory">The <tt>releaseMemory</tt> method</a></li>
+ </ul></li>
+ <li><a href="#registering">Registering dynamically loaded passes</a>
+ <ul>
+ <li><a href="#registering_existing">Using existing registries</a></li>
+ <li><a href="#registering_new">Creating new registries</a></li>
+ </ul></li>
+ <li><a href="#debughints">Using GDB with dynamically loaded passes</a>
+ <ul>
+ <li><a href="#breakpoint">Setting a breakpoint in your pass</a></li>
+ <li><a href="#debugmisc">Miscellaneous Problems</a></li>
+ </ul></li>
+ <li><a href="#future">Future extensions planned</a>
+ <ul>
+ <li><a href="#SMP">Multithreaded LLVM</a></li>
+ </ul></li>
+</ol>
+
+<div class="doc_author">
+ <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
+ <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="introduction">Introduction - What is a pass?</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM Pass Framework is an important part of the LLVM system, because LLVM
+passes are where most of the interesting parts of the compiler exist. Passes
+perform the transformations and optimizations that make up the compiler, they
+build the analysis results that are used by these transformations, and they are,
+above all, a structuring technique for compiler code.</p>
+
+<p>All LLVM passes are subclasses of the <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1Pass.html">Pass</a></tt>
+class, which implement functionality by overriding virtual methods inherited
+from <tt>Pass</tt>. Depending on how your pass works, you should inherit from
+the <tt><a href="#ModulePass">ModulePass</a></tt>, <tt><a
+href="#CallGraphSCCPass">CallGraphSCCPass</a></tt>, <tt><a
+href="#FunctionPass">FunctionPass</a></tt>, or <tt><a
+href="#LoopPass">LoopPass</a></tt>, or <tt><a
+href="#BasicBlockPass">BasicBlockPass</a></tt> classes, which gives the system
+more information about what your pass does, and how it can be combined with
+other passes. One of the main features of the LLVM Pass Framework is that it
+schedules passes to run in an efficient way based on the constraints that your
+pass meets (which are indicated by which class they derive from).</p>
+
+<p>We start by showing you how to construct a pass, everything from setting up
+the code, to compiling, loading, and executing it. After the basics are down,
+more advanced features are discussed.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="quickstart">Quick Start - Writing hello world</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Here we describe how to write the "hello world" of passes. The "Hello" pass
+is designed to simply print out the name of non-external functions that exist in
+the program being compiled. It does not modify the program at all, it just
+inspects it. The source code and files for this pass are available in the LLVM
+source tree in the <tt>lib/Transforms/Hello</tt> directory.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="makefile">Setting up the build environment</a>
+</div>
+
+<div class="doc_text">
+
+ <p>First, you need to create a new directory somewhere in the LLVM source
+ base. For this example, we'll assume that you made
+ <tt>lib/Transforms/Hello</tt>. Next, you must set up a build script
+ (Makefile) that will compile the source code for the new pass. To do this,
+ copy the following into <tt>Makefile</tt>:</p>
+ <hr/>
+
+<div class="doc_code"><pre>
+# Makefile for hello pass
+
+# Path to top level of LLVM heirarchy
+LEVEL = ../../..
+
+# Name of the library to build
+LIBRARYNAME = Hello
+
+# Make the shared library become a loadable module so the tools can
+# dlopen/dlsym on the resulting library.
+LOADABLE_MODULE = 1
+
+# Tell the build system which LLVM libraries your pass needs. You'll probably
+# need at least LLVMSystem.a, LLVMSupport.a, LLVMCore.a but possibly several
+# others too.
+LLVMLIBS = LLVMCore.a LLVMSupport.a LLVMSystem.a
+
+# Include the makefile implementation stuff
+include $(LEVEL)/Makefile.common
+</pre></div>
+
+<p>This makefile specifies that all of the <tt>.cpp</tt> files in the current
+directory are to be compiled and linked together into a
+<tt>Debug/lib/Hello.so</tt> shared object that can be dynamically loaded by
+the <tt>opt</tt> or <tt>bugpoint</tt> tools via their <tt>-load</tt> options.
+If your operating system uses a suffix other than .so (such as windows or
+Mac OS/X), the appropriate extension will be used.</p>
+
+<p>Now that we have the build scripts set up, we just need to write the code for
+the pass itself.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="basiccode">Basic code required</a>
+</div>
+
+<div class="doc_text">
+
+<p>Now that we have a way to compile our new pass, we just have to write it.
+Start out with:</p>
+
+<div class="doc_code"><pre>
+<b>#include</b> "<a href="http://llvm.org/doxygen/Pass_8h-source.html">llvm/Pass.h</a>"
+<b>#include</b> "<a href="http://llvm.org/doxygen/Function_8h-source.html">llvm/Function.h</a>"
+</pre></div>
+
+<p>Which are needed because we are writing a <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1Pass.html">Pass</a></tt>, and
+we are operating on <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1Function.html">Function</a></tt>'s.</p>
+
+<p>Next we have:</p>
+<div class="doc_code"><pre>
+<b>using namespace llvm;</b>
+</pre></div>
+<p>... which is required because the functions from the include files
+live in the llvm namespace.
+</p>
+
+<p>Next we have:</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> {
+</pre></div>
+
+<p>... which starts out an anonymous namespace. Anonymous namespaces are to C++
+what the "<tt>static</tt>" keyword is to C (at global scope). It makes the
+things declared inside of the anonymous namespace only visible to the current
+file. If you're not familiar with them, consult a decent C++ book for more
+information.</p>
+
+<p>Next, we declare our pass itself:</p>
+
+<div class="doc_code"><pre>
+ <b>struct</b> Hello : <b>public</b> <a href="#FunctionPass">FunctionPass</a> {
+</pre></div><p>
+
+<p>This declares a "<tt>Hello</tt>" class that is a subclass of <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1FunctionPass.html">FunctionPass</a></tt>.
+The different builtin pass subclasses are described in detail <a
+href="#passtype">later</a>, but for now, know that <a
+href="#FunctionPass"><tt>FunctionPass</tt></a>'s operate a function at a
+time.</p>
+
+<div class="doc_code"><pre>
+ static char ID;
+ Hello() : FunctionPass((intptr_t)&amp;ID) {}
+</pre></div><p>
+
+<p> This declares pass identifier used by LLVM to identify pass. This allows LLVM to
+avoid using expensive C++ runtime information.</p>
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> <a href="#runOnFunction">runOnFunction</a>(Function &amp;F) {
+ llvm::cerr &lt;&lt; "<i>Hello: </i>" &lt;&lt; F.getName() &lt;&lt; "\n";
+ <b>return false</b>;
+ }
+ }; <i>// end of struct Hello</i>
+</pre></div>
+
+<p>We declare a "<a href="#runOnFunction"><tt>runOnFunction</tt></a>" method,
+which overloads an abstract virtual method inherited from <a
+href="#FunctionPass"><tt>FunctionPass</tt></a>. This is where we are supposed
+to do our thing, so we just print out our message with the name of each
+function.</p>
+
+<div class="doc_code"><pre>
+ char Hello::ID = 0;
+</pre></div>
+
+<p> We initialize pass ID here. LLVM uses ID's address to identify pass so
+initialization value is not important.</p>
+
+<div class="doc_code"><pre>
+ RegisterPass&lt;Hello&gt; X("<i>hello</i>", "<i>Hello World Pass</i>");
+} <i>// end of anonymous namespace</i>
+</pre></div>
+
+<p>Lastly, we <a href="#registration">register our class</a> <tt>Hello</tt>,
+giving it a command line
+argument "<tt>hello</tt>", and a name "<tt>Hello World Pass</tt>".</p>
+
+<p>As a whole, the <tt>.cpp</tt> file looks like:</p>
+
+<div class="doc_code"><pre>
+<b>#include</b> "<a href="http://llvm.org/doxygen/Pass_8h-source.html">llvm/Pass.h</a>"
+<b>#include</b> "<a href="http://llvm.org/doxygen/Function_8h-source.html">llvm/Function.h</a>"
+
+<b>using namespace llvm;</b>
+
+<b>namespace</b> {
+ <b>struct Hello</b> : <b>public</b> <a href="#FunctionPass">FunctionPass</a> {
+
+ static char ID;
+ Hello() : FunctionPass((intptr_t)&amp;ID) {}
+
+ <b>virtual bool</b> <a href="#runOnFunction">runOnFunction</a>(Function &amp;F) {
+ llvm::cerr &lt;&lt; "<i>Hello: </i>" &lt;&lt; F.getName() &lt;&lt; "\n";
+ <b>return false</b>;
+ }
+ };
+
+ RegisterPass&lt;Hello&gt; X("<i>hello</i>", "<i>Hello World Pass</i>");
+}
+</pre></div>
+
+<p>Now that it's all together, compile the file with a simple "<tt>gmake</tt>"
+command in the local directory and you should get a new
+"<tt>Debug/lib/Hello.so</tt> file. Note that everything in this file is
+contained in an anonymous namespace: this reflects the fact that passes are self
+contained units that do not need external interfaces (although they can have
+them) to be useful.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="running">Running a pass with <tt>opt</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>Now that you have a brand new shiny shared object file, we can use the
+<tt>opt</tt> command to run an LLVM program through your pass. Because you
+registered your pass with the <tt>RegisterPass</tt> template, you will be able to
+use the <tt>opt</tt> tool to access it, once loaded.</p>
+
+<p>To test it, follow the example at the end of the <a
+href="GettingStarted.html">Getting Started Guide</a> to compile "Hello World" to
+LLVM. We can now run the bitcode file (<tt>hello.bc</tt>) for the program
+through our transformation like this (or course, any bitcode file will
+work):</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug/lib/Hello.so -hello &lt; hello.bc &gt; /dev/null
+Hello: __main
+Hello: puts
+Hello: main
+</pre></div>
+
+<p>The '<tt>-load</tt>' option specifies that '<tt>opt</tt>' should load your
+pass as a shared object, which makes '<tt>-hello</tt>' a valid command line
+argument (which is one reason you need to <a href="#registration">register your
+pass</a>). Because the hello pass does not modify the program in any
+interesting way, we just throw away the result of <tt>opt</tt> (sending it to
+<tt>/dev/null</tt>).</p>
+
+<p>To see what happened to the other string you registered, try running
+<tt>opt</tt> with the <tt>--help</tt> option:</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug/lib/Hello.so --help
+OVERVIEW: llvm .bc -&gt; .bc modular optimizer
+
+USAGE: opt [options] &lt;input bitcode&gt;
+
+OPTIONS:
+ Optimizations available:
+...
+ -funcresolve - Resolve Functions
+ -gcse - Global Common Subexpression Elimination
+ -globaldce - Dead Global Elimination
+ <b>-hello - Hello World Pass</b>
+ -indvars - Canonicalize Induction Variables
+ -inline - Function Integration/Inlining
+ -instcombine - Combine redundant instructions
+...
+</pre></div>
+
+<p>The pass name get added as the information string for your pass, giving some
+documentation to users of <tt>opt</tt>. Now that you have a working pass, you
+would go ahead and make it do the cool transformations you want. Once you get
+it all working and tested, it may become useful to find out how fast your pass
+is. The <a href="#passManager"><tt>PassManager</tt></a> provides a nice command
+line option (<tt>--time-passes</tt>) that allows you to get information about
+the execution time of your pass along with the other passes you queue up. For
+example:</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug/lib/Hello.so -hello -time-passes &lt; hello.bc &gt; /dev/null
+Hello: __main
+Hello: puts
+Hello: main
+===============================================================================
+ ... Pass execution timing report ...
+===============================================================================
+ Total Execution Time: 0.02 seconds (0.0479059 wall clock)
+
+ ---User Time--- --System Time-- --User+System-- ---Wall Time--- --- Pass Name ---
+ 0.0100 (100.0%) 0.0000 ( 0.0%) 0.0100 ( 50.0%) 0.0402 ( 84.0%) Bitcode Writer
+ 0.0000 ( 0.0%) 0.0100 (100.0%) 0.0100 ( 50.0%) 0.0031 ( 6.4%) Dominator Set Construction
+ 0.0000 ( 0.0%) 0.0000 ( 0.0%) 0.0000 ( 0.0%) 0.0013 ( 2.7%) Module Verifier
+ <b> 0.0000 ( 0.0%) 0.0000 ( 0.0%) 0.0000 ( 0.0%) 0.0033 ( 6.9%) Hello World Pass</b>
+ 0.0100 (100.0%) 0.0100 (100.0%) 0.0200 (100.0%) 0.0479 (100.0%) TOTAL
+</pre></div>
+
+<p>As you can see, our implementation above is pretty fast :). The additional
+passes listed are automatically inserted by the '<tt>opt</tt>' tool to verify
+that the LLVM emitted by your pass is still valid and well formed LLVM, which
+hasn't been broken somehow.</p>
+
+<p>Now that you have seen the basics of the mechanics behind passes, we can talk
+about some more details of how they work and how to use them.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="passtype">Pass classes and requirements</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>One of the first things that you should do when designing a new pass is to
+decide what class you should subclass for your pass. The <a
+href="#basiccode">Hello World</a> example uses the <tt><a
+href="#FunctionPass">FunctionPass</a></tt> class for its implementation, but we
+did not discuss why or when this should occur. Here we talk about the classes
+available, from the most general to the most specific.</p>
+
+<p>When choosing a superclass for your Pass, you should choose the <b>most
+specific</b> class possible, while still being able to meet the requirements
+listed. This gives the LLVM Pass Infrastructure information necessary to
+optimize how passes are run, so that the resultant compiler isn't unneccesarily
+slow.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ImmutablePass">The <tt>ImmutablePass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The most plain and boring type of pass is the "<tt><a
+href="http://llvm.org/doxygen/classllvm_1_1ImmutablePass.html">ImmutablePass</a></tt>"
+class. This pass type is used for passes that do not have to be run, do not
+change state, and never need to be updated. This is not a normal type of
+transformation or analysis, but can provide information about the current
+compiler configuration.</p>
+
+<p>Although this pass class is very infrequently used, it is important for
+providing information about the current target machine being compiled for, and
+other static information that can affect the various transformations.</p>
+
+<p><tt>ImmutablePass</tt>es never invalidate other transformations, are never
+invalidated, and are never "run".</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="ModulePass">The <tt>ModulePass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The "<tt><a
+href="http://llvm.org/doxygen/classllvm_1_1ModulePass.html">ModulePass</a></tt>"
+class is the most general of all superclasses that you can use. Deriving from
+<tt>ModulePass</tt> indicates that your pass uses the entire program as a unit,
+refering to function bodies in no predictable order, or adding and removing
+functions. Because nothing is known about the behavior of <tt>ModulePass</tt>
+subclasses, no optimization can be done for their execution. A module pass
+can use function level passes (e.g. dominators) using getAnalysis interface
+<tt> getAnalysis&lt;DominatorTree&gt;(Function)</tt>. </p>
+
+<p>To write a correct <tt>ModulePass</tt> subclass, derive from
+<tt>ModulePass</tt> and overload the <tt>runOnModule</tt> method with the
+following signature:</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="runOnModule">The <tt>runOnModule</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> runOnModule(Module &amp;M) = 0;
+</pre></div>
+
+<p>The <tt>runOnModule</tt> method performs the interesting work of the pass.
+It should return true if the module was modified by the transformation and
+false otherwise.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="CallGraphSCCPass">The <tt>CallGraphSCCPass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The "<tt><a
+href="http://llvm.org/doxygen/classllvm_1_1CallGraphSCCPass.html">CallGraphSCCPass</a></tt>"
+is used by passes that need to traverse the program bottom-up on the call graph
+(callees before callers). Deriving from CallGraphSCCPass provides some
+mechanics for building and traversing the CallGraph, but also allows the system
+to optimize execution of CallGraphSCCPass's. If your pass meets the
+requirements outlined below, and doesn't meet the requirements of a <tt><a
+href="#FunctionPass">FunctionPass</a></tt> or <tt><a
+href="#BasicBlockPass">BasicBlockPass</a></tt>, you should derive from
+<tt>CallGraphSCCPass</tt>.</p>
+
+<p><b>TODO</b>: explain briefly what SCC, Tarjan's algo, and B-U mean.</p>
+
+<p>To be explicit, <tt>CallGraphSCCPass</tt> subclasses are:</p>
+
+<ol>
+
+<li>... <em>not allowed</em> to modify any <tt>Function</tt>s that are not in
+the current SCC.</li>
+
+<li>... <em>not allowed</em> to inspect any Function's other than those in the
+current SCC and the direct callees of the SCC.</li>
+
+<li>... <em>required</em> to preserve the current CallGraph object, updating it
+to reflect any changes made to the program.</li>
+
+<li>... <em>not allowed</em> to add or remove SCC's from the current Module,
+though they may change the contents of an SCC.</li>
+
+<li>... <em>allowed</em> to add or remove global variables from the current
+Module.</li>
+
+<li>... <em>allowed</em> to maintain state across invocations of
+ <a href="#runOnSCC"><tt>runOnSCC</tt></a> (including global data).</li>
+</ol>
+
+<p>Implementing a <tt>CallGraphSCCPass</tt> is slightly tricky in some cases
+because it has to handle SCCs with more than one node in it. All of the virtual
+methods described below should return true if they modified the program, or
+false if they didn't.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="doInitialization_scc">The <tt>doInitialization(CallGraph &amp;)</tt>
+ method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> doInitialization(CallGraph &amp;CG);
+</pre></div>
+
+<p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
+<tt>CallGraphSCCPass</tt>'s are not allowed to do. They can add and remove
+functions, get pointers to functions, etc. The <tt>doInitialization</tt> method
+is designed to do simple initialization type of stuff that does not depend on
+the SCCs being processed. The <tt>doInitialization</tt> method call is not
+scheduled to overlap with any other pass executions (thus it should be very
+fast).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="runOnSCC">The <tt>runOnSCC</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> runOnSCC(const std::vector&lt;CallGraphNode *&gt; &amp;SCCM) = 0;
+</pre></div>
+
+<p>The <tt>runOnSCC</tt> method performs the interesting work of the pass, and
+should return true if the module was modified by the transformation, false
+otherwise.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="doFinalization_scc">The <tt>doFinalization(CallGraph
+ &amp;)</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> doFinalization(CallGraph &amp;CG);
+</pre></div>
+
+<p>The <tt>doFinalization</tt> method is an infrequently used method that is
+called when the pass framework has finished calling <a
+href="#runOnFunction"><tt>runOnFunction</tt></a> for every function in the
+program being compiled.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="FunctionPass">The <tt>FunctionPass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>In contrast to <tt>ModulePass</tt> subclasses, <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1Pass.html">FunctionPass</a></tt>
+subclasses do have a predictable, local behavior that can be expected by the
+system. All <tt>FunctionPass</tt> execute on each function in the program
+independent of all of the other functions in the program.
+<tt>FunctionPass</tt>'s do not require that they are executed in a particular
+order, and <tt>FunctionPass</tt>'s do not modify external functions.</p>
+
+<p>To be explicit, <tt>FunctionPass</tt> subclasses are not allowed to:</p>
+
+<ol>
+<li>Modify a Function other than the one currently being processed.</li>
+<li>Add or remove Function's from the current Module.</li>
+<li>Add or remove global variables from the current Module.</li>
+<li>Maintain state across invocations of
+ <a href="#runOnFunction"><tt>runOnFunction</tt></a> (including global data)</li>
+</ol>
+
+<p>Implementing a <tt>FunctionPass</tt> is usually straightforward (See the <a
+href="#basiccode">Hello World</a> pass for example). <tt>FunctionPass</tt>'s
+may overload three virtual methods to do their work. All of these methods
+should return true if they modified the program, or false if they didn't.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="doInitialization_mod">The <tt>doInitialization(Module &amp;)</tt>
+ method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> doInitialization(Module &amp;M);
+</pre></div>
+
+<p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
+<tt>FunctionPass</tt>'s are not allowed to do. They can add and remove
+functions, get pointers to functions, etc. The <tt>doInitialization</tt> method
+is designed to do simple initialization type of stuff that does not depend on
+the functions being processed. The <tt>doInitialization</tt> method call is not
+scheduled to overlap with any other pass executions (thus it should be very
+fast).</p>
+
+<p>A good example of how this method should be used is the <a
+href="http://llvm.org/doxygen/LowerAllocations_8cpp-source.html">LowerAllocations</a>
+pass. This pass converts <tt>malloc</tt> and <tt>free</tt> instructions into
+platform dependent <tt>malloc()</tt> and <tt>free()</tt> function calls. It
+uses the <tt>doInitialization</tt> method to get a reference to the malloc and
+free functions that it needs, adding prototypes to the module if necessary.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="runOnFunction">The <tt>runOnFunction</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> runOnFunction(Function &amp;F) = 0;
+</pre></div><p>
+
+<p>The <tt>runOnFunction</tt> method must be implemented by your subclass to do
+the transformation or analysis work of your pass. As usual, a true value should
+be returned if the function is modified.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="doFinalization_mod">The <tt>doFinalization(Module
+ &amp;)</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> doFinalization(Module &amp;M);
+</pre></div>
+
+<p>The <tt>doFinalization</tt> method is an infrequently used method that is
+called when the pass framework has finished calling <a
+href="#runOnFunction"><tt>runOnFunction</tt></a> for every function in the
+program being compiled.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="LoopPass">The <tt>LoopPass</tt> class </a>
+</div>
+
+<div class="doc_text">
+
+<p> All <tt>LoopPass</tt> execute on each loop in the function independent of
+all of the other loops in the function. <tt>LoopPass</tt> processes loops in
+loop nest order such that outer most loop is processed last. </p>
+
+<p> <tt>LoopPass</tt> subclasses are allowed to update loop nest using
+<tt>LPPassManager</tt> interface. Implementing a loop pass is usually
+straightforward. <tt>Looppass</tt>'s may overload three virtual methods to
+do their work. All these methods should return true if they modified the
+program, or false if they didn't. </p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="doInitialization_loop">The <tt>doInitialization(Loop *,
+ LPPassManager &amp;)</tt>
+ method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> doInitialization(Loop *, LPPassManager &amp;LPM);
+</pre></div>
+
+<p>The <tt>doInitialization</tt> method is designed to do simple initialization
+type of stuff that does not depend on the functions being processed. The
+<tt>doInitialization</tt> method call is not scheduled to overlap with any
+other pass executions (thus it should be very fast). LPPassManager
+interface should be used to access Function or Module level analysis
+information.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="runOnLoop">The <tt>runOnLoop</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> runOnLoop(Loop *, LPPassManager &amp;LPM) = 0;
+</pre></div><p>
+
+<p>The <tt>runOnLoop</tt> method must be implemented by your subclass to do
+the transformation or analysis work of your pass. As usual, a true value should
+be returned if the function is modified. <tt>LPPassManager</tt> interface
+should be used to update loop nest.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="doFinalization_loop">The <tt>doFinalization()</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> doFinalization();
+</pre></div>
+
+<p>The <tt>doFinalization</tt> method is an infrequently used method that is
+called when the pass framework has finished calling <a
+href="#runOnLoop"><tt>runOnLoop</tt></a> for every loop in the
+program being compiled. </p>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="BasicBlockPass">The <tt>BasicBlockPass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>BasicBlockPass</tt>'s are just like <a
+href="#FunctionPass"><tt>FunctionPass</tt></a>'s, except that they must limit
+their scope of inspection and modification to a single basic block at a time.
+As such, they are <b>not</b> allowed to do any of the following:</p>
+
+<ol>
+<li>Modify or inspect any basic blocks outside of the current one</li>
+<li>Maintain state across invocations of
+ <a href="#runOnBasicBlock"><tt>runOnBasicBlock</tt></a></li>
+<li>Modify the control flow graph (by altering terminator instructions)</li>
+<li>Any of the things forbidden for
+ <a href="#FunctionPass"><tt>FunctionPass</tt></a>es.</li>
+</ol>
+
+<p><tt>BasicBlockPass</tt>es are useful for traditional local and "peephole"
+optimizations. They may override the same <a
+href="#doInitialization_mod"><tt>doInitialization(Module &amp;)</tt></a> and <a
+href="#doFinalization_mod"><tt>doFinalization(Module &amp;)</tt></a> methods that <a
+href="#FunctionPass"><tt>FunctionPass</tt></a>'s have, but also have the following virtual methods that may also be implemented:</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="doInitialization_fn">The <tt>doInitialization(Function
+ &amp;)</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> doInitialization(Function &amp;F);
+</pre></div>
+
+<p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
+<tt>BasicBlockPass</tt>'s are not allowed to do, but that
+<tt>FunctionPass</tt>'s can. The <tt>doInitialization</tt> method is designed
+to do simple initialization that does not depend on the
+BasicBlocks being processed. The <tt>doInitialization</tt> method call is not
+scheduled to overlap with any other pass executions (thus it should be very
+fast).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="runOnBasicBlock">The <tt>runOnBasicBlock</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> runOnBasicBlock(BasicBlock &amp;BB) = 0;
+</pre></div>
+
+<p>Override this function to do the work of the <tt>BasicBlockPass</tt>. This
+function is not allowed to inspect or modify basic blocks other than the
+parameter, and are not allowed to modify the CFG. A true value must be returned
+if the basic block is modified.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="doFinalization_fn">The <tt>doFinalization(Function &amp;)</tt>
+ method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> doFinalization(Function &amp;F);
+</pre></div>
+
+<p>The <tt>doFinalization</tt> method is an infrequently used method that is
+called when the pass framework has finished calling <a
+href="#runOnBasicBlock"><tt>runOnBasicBlock</tt></a> for every BasicBlock in the
+program being compiled. This can be used to perform per-function
+finalization.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="MachineFunctionPass">The <tt>MachineFunctionPass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>A <tt>MachineFunctionPass</tt> is a part of the LLVM code generator that
+executes on the machine-dependent representation of each LLVM function in the
+program. A <tt>MachineFunctionPass</tt> is also a <tt>FunctionPass</tt>, so all
+the restrictions that apply to a <tt>FunctionPass</tt> also apply to it.
+<tt>MachineFunctionPass</tt>es also have additional restrictions. In particular,
+<tt>MachineFunctionPass</tt>es are not allowed to do any of the following:</p>
+
+<ol>
+<li>Modify any LLVM Instructions, BasicBlocks or Functions.</li>
+<li>Modify a MachineFunction other than the one currently being processed.</li>
+<li>Add or remove MachineFunctions from the current Module.</li>
+<li>Add or remove global variables from the current Module.</li>
+<li>Maintain state across invocations of <a
+href="#runOnMachineFunction"><tt>runOnMachineFunction</tt></a> (including global
+data)</li>
+</ol>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="runOnMachineFunction">The <tt>runOnMachineFunction(MachineFunction
+ &amp;MF)</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual bool</b> runOnMachineFunction(MachineFunction &amp;MF) = 0;
+</pre></div>
+
+<p><tt>runOnMachineFunction</tt> can be considered the main entry point of a
+<tt>MachineFunctionPass</tt>; that is, you should override this method to do the
+work of your <tt>MachineFunctionPass</tt>.</p>
+
+<p>The <tt>runOnMachineFunction</tt> method is called on every
+<tt>MachineFunction</tt> in a <tt>Module</tt>, so that the
+<tt>MachineFunctionPass</tt> may perform optimizations on the machine-dependent
+representation of the function. If you want to get at the LLVM <tt>Function</tt>
+for the <tt>MachineFunction</tt> you're working on, use
+<tt>MachineFunction</tt>'s <tt>getFunction()</tt> accessor method -- but
+remember, you may not modify the LLVM <tt>Function</tt> or its contents from a
+<tt>MachineFunctionPass</tt>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="registration">Pass registration</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>In the <a href="#basiccode">Hello World</a> example pass we illustrated how
+pass registration works, and discussed some of the reasons that it is used and
+what it does. Here we discuss how and why passes are registered.</p>
+
+<p>As we saw above, passes are registered with the <b><tt>RegisterPass</tt></b>
+template, which requires you to pass at least two
+parameters. The first parameter is the name of the pass that is to be used on
+the command line to specify that the pass should be added to a program (for
+example, with <tt>opt</tt> or <tt>bugpoint</tt>). The second argument is the
+name of the pass, which is to be used for the <tt>--help</tt> output of
+programs, as
+well as for debug output generated by the <tt>--debug-pass</tt> option.</p>
+
+<p>If you want your pass to be easily dumpable, you should
+implement the virtual <tt>print</tt> method:</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="print">The <tt>print</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual void</b> print(llvm::OStream &amp;O, <b>const</b> Module *M) <b>const</b>;
+</pre></div>
+
+<p>The <tt>print</tt> method must be implemented by "analyses" in order to print
+a human readable version of the analysis results. This is useful for debugging
+an analysis itself, as well as for other people to figure out how an analysis
+works. Use the <tt>opt -analyze</tt> argument to invoke this method.</p>
+
+<p>The <tt>llvm::OStream</tt> parameter specifies the stream to write the results on,
+and the <tt>Module</tt> parameter gives a pointer to the top level module of the
+program that has been analyzed. Note however that this pointer may be null in
+certain circumstances (such as calling the <tt>Pass::dump()</tt> from a
+debugger), so it should only be used to enhance debug output, it should not be
+depended on.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="interaction">Specifying interactions between passes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>One of the main responsibilities of the <tt>PassManager</tt> is the make sure
+that passes interact with each other correctly. Because <tt>PassManager</tt>
+tries to <a href="#passmanager">optimize the execution of passes</a> it must
+know how the passes interact with each other and what dependencies exist between
+the various passes. To track this, each pass can declare the set of passes that
+are required to be executed before the current pass, and the passes which are
+invalidated by the current pass.</p>
+
+<p>Typically this functionality is used to require that analysis results are
+computed before your pass is run. Running arbitrary transformation passes can
+invalidate the computed analysis results, which is what the invalidation set
+specifies. If a pass does not implement the <tt><a
+href="#getAnalysisUsage">getAnalysisUsage</a></tt> method, it defaults to not
+having any prerequisite passes, and invalidating <b>all</b> other passes.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="getAnalysisUsage">The <tt>getAnalysisUsage</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;Info) <b>const</b>;
+</pre></div>
+
+<p>By implementing the <tt>getAnalysisUsage</tt> method, the required and
+invalidated sets may be specified for your transformation. The implementation
+should fill in the <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1AnalysisUsage.html">AnalysisUsage</a></tt>
+object with information about which passes are required and not invalidated. To
+do this, a pass may call any of the following methods on the AnalysisUsage
+object:</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="AU::addRequired">The <tt>AnalysisUsage::addRequired&lt;&gt;</tt> and <tt>AnalysisUsage::addRequiredTransitive&lt;&gt;</tt> methods</a>
+</div>
+
+<div class="doc_text">
+<p>
+If your pass requires a previous pass to be executed (an analysis for example),
+it can use one of these methods to arrange for it to be run before your pass.
+LLVM has many different types of analyses and passes that can be required,
+spanning the range from <tt>DominatorSet</tt> to <tt>BreakCriticalEdges</tt>.
+Requiring <tt>BreakCriticalEdges</tt>, for example, guarantees that there will
+be no critical edges in the CFG when your pass has been run.
+</p>
+
+<p>
+Some analyses chain to other analyses to do their job. For example, an <a
+href="AliasAnalysis.html">AliasAnalysis</a> implementation is required to <a
+href="AliasAnalysis.html#chaining">chain</a> to other alias analysis passes. In
+cases where analyses chain, the <tt>addRequiredTransitive</tt> method should be
+used instead of the <tt>addRequired</tt> method. This informs the PassManager
+that the transitively required pass should be alive as long as the requiring
+pass is.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="AU::addPreserved">The <tt>AnalysisUsage::addPreserved&lt;&gt;</tt> method</a>
+</div>
+
+<div class="doc_text">
+<p>
+One of the jobs of the PassManager is to optimize how and when analyses are run.
+In particular, it attempts to avoid recomputing data unless it needs to. For
+this reason, passes are allowed to declare that they preserve (i.e., they don't
+invalidate) an existing analysis if it's available. For example, a simple
+constant folding pass would not modify the CFG, so it can't possibly affect the
+results of dominator analysis. By default, all passes are assumed to invalidate
+all others.
+</p>
+
+<p>
+The <tt>AnalysisUsage</tt> class provides several methods which are useful in
+certain circumstances that are related to <tt>addPreserved</tt>. In particular,
+the <tt>setPreservesAll</tt> method can be called to indicate that the pass does
+not modify the LLVM program at all (which is true for analyses), and the
+<tt>setPreservesCFG</tt> method can be used by transformations that change
+instructions in the program but do not modify the CFG or terminator instructions
+(note that this property is implicitly set for <a
+href="#BasicBlockPass">BasicBlockPass</a>'s).
+</p>
+
+<p>
+<tt>addPreserved</tt> is particularly useful for transformations like
+<tt>BreakCriticalEdges</tt>. This pass knows how to update a small set of loop
+and dominator related analyses if they exist, so it can preserve them, despite
+the fact that it hacks on the CFG.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="AU::examples">Example implementations of <tt>getAnalysisUsage</tt></a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <i>// This is an example implementation from an analysis, which does not modify
+ // the program at all, yet has a prerequisite.</i>
+ <b>void</b> <a href="http://llvm.org/doxygen/classllvm_1_1PostDominanceFrontier.html">PostDominanceFrontier</a>::getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
+ AU.setPreservesAll();
+ AU.addRequired&lt;<a href="http://llvm.org/doxygen/classllvm_1_1PostDominatorTree.html">PostDominatorTree</a>&gt;();
+ }
+</pre></div>
+
+<p>and:</p>
+
+<div class="doc_code"><pre>
+ <i>// This example modifies the program, but does not modify the CFG</i>
+ <b>void</b> <a href="http://llvm.org/doxygen/structLICM.html">LICM</a>::getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
+ AU.setPreservesCFG();
+ AU.addRequired&lt;<a href="http://llvm.org/doxygen/classllvm_1_1LoopInfo.html">LoopInfo</a>&gt;();
+ }
+</pre></div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="getAnalysis">The <tt>getAnalysis&lt;&gt;</tt> and <tt>getAnalysisToUpdate&lt;&gt;</tt> methods</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>Pass::getAnalysis&lt;&gt;</tt> method is automatically inherited by
+your class, providing you with access to the passes that you declared that you
+required with the <a href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a>
+method. It takes a single template argument that specifies which pass class you
+want, and returns a reference to that pass. For example:</p>
+
+<div class="doc_code"><pre>
+ bool LICM::runOnFunction(Function &amp;F) {
+ LoopInfo &amp;LI = getAnalysis&lt;LoopInfo&gt;();
+ ...
+ }
+</pre></div>
+
+<p>This method call returns a reference to the pass desired. You may get a
+runtime assertion failure if you attempt to get an analysis that you did not
+declare as required in your <a
+href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a> implementation. This
+method can be called by your <tt>run*</tt> method implementation, or by any
+other local method invoked by your <tt>run*</tt> method.
+
+A module level pass can use function level analysis info using this interface.
+For example:</p>
+
+<div class="doc_code"><pre>
+ bool ModuleLevelPass::runOnModule(Module &amp;M) {
+ ...
+ DominatorTree &amp;DT = getAnalysis&lt;DominatorTree&gt;(Func);
+ ...
+ }
+</pre></div>
+
+<p>In above example, runOnFunction for DominatorTree is called by pass manager
+before returning a reference to the desired pass.</p>
+
+<p>
+If your pass is capable of updating analyses if they exist (e.g.,
+<tt>BreakCriticalEdges</tt>, as described above), you can use the
+<tt>getAnalysisToUpdate</tt> method, which returns a pointer to the analysis if
+it is active. For example:</p>
+
+<div class="doc_code"><pre>
+ ...
+ if (DominatorSet *DS = getAnalysisToUpdate&lt;DominatorSet&gt;()) {
+ <i>// A DominatorSet is active. This code will update it.</i>
+ }
+ ...
+</pre></div>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="analysisgroup">Implementing Analysis Groups</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Now that we understand the basics of how passes are defined, how the are
+used, and how they are required from other passes, it's time to get a little bit
+fancier. All of the pass relationships that we have seen so far are very
+simple: one pass depends on one other specific pass to be run before it can run.
+For many applications, this is great, for others, more flexibility is
+required.</p>
+
+<p>In particular, some analyses are defined such that there is a single simple
+interface to the analysis results, but multiple ways of calculating them.
+Consider alias analysis for example. The most trivial alias analysis returns
+"may alias" for any alias query. The most sophisticated analysis a
+flow-sensitive, context-sensitive interprocedural analysis that can take a
+significant amount of time to execute (and obviously, there is a lot of room
+between these two extremes for other implementations). To cleanly support
+situations like this, the LLVM Pass Infrastructure supports the notion of
+Analysis Groups.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="agconcepts">Analysis Group Concepts</a>
+</div>
+
+<div class="doc_text">
+
+<p>An Analysis Group is a single simple interface that may be implemented by
+multiple different passes. Analysis Groups can be given human readable names
+just like passes, but unlike passes, they need not derive from the <tt>Pass</tt>
+class. An analysis group may have one or more implementations, one of which is
+the "default" implementation.</p>
+
+<p>Analysis groups are used by client passes just like other passes are: the
+<tt>AnalysisUsage::addRequired()</tt> and <tt>Pass::getAnalysis()</tt> methods.
+In order to resolve this requirement, the <a href="#passmanager">PassManager</a>
+scans the available passes to see if any implementations of the analysis group
+are available. If none is available, the default implementation is created for
+the pass to use. All standard rules for <A href="#interaction">interaction
+between passes</a> still apply.</p>
+
+<p>Although <a href="#registration">Pass Registration</a> is optional for normal
+passes, all analysis group implementations must be registered, and must use the
+<A href="#registerag"><tt>RegisterAnalysisGroup</tt></a> template to join the
+implementation pool. Also, a default implementation of the interface
+<b>must</b> be registered with <A
+href="#registerag"><tt>RegisterAnalysisGroup</tt></a>.</p>
+
+<p>As a concrete example of an Analysis Group in action, consider the <a
+href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>
+analysis group. The default implementation of the alias analysis interface (the
+<tt><a
+href="http://llvm.org/doxygen/structBasicAliasAnalysis.html">basicaa</a></tt>
+pass) just does a few simple checks that don't require significant analysis to
+compute (such as: two different globals can never alias each other, etc).
+Passes that use the <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a></tt>
+interface (for example the <tt><a
+href="http://llvm.org/doxygen/structGCSE.html">gcse</a></tt> pass), do
+not care which implementation of alias analysis is actually provided, they just
+use the designated interface.</p>
+
+<p>From the user's perspective, commands work just like normal. Issuing the
+command '<tt>opt -gcse ...</tt>' will cause the <tt>basicaa</tt> class to be
+instantiated and added to the pass sequence. Issuing the command '<tt>opt
+-somefancyaa -gcse ...</tt>' will cause the <tt>gcse</tt> pass to use the
+<tt>somefancyaa</tt> alias analysis (which doesn't actually exist, it's just a
+hypothetical example) instead.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="registerag">Using <tt>RegisterAnalysisGroup</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>RegisterAnalysisGroup</tt> template is used to register the analysis
+group itself as well as add pass implementations to the analysis group. First,
+an analysis should be registered, with a human readable name provided for it.
+Unlike registration of passes, there is no command line argument to be specified
+for the Analysis Group Interface itself, because it is "abstract":</p>
+
+<div class="doc_code"><pre>
+ <b>static</b> RegisterAnalysisGroup&lt;<a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>&gt; A("<i>Alias Analysis</i>");
+</pre></div>
+
+<p>Once the analysis is registered, passes can declare that they are valid
+implementations of the interface by using the following code:</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> {
+ //<i> Analysis Group implementations <b>must</b> be registered normally...</i>
+ RegisterPass&lt;FancyAA&gt;
+ B("<i>somefancyaa</i>", "<i>A more complex alias analysis implementation</i>");
+
+ //<i> Declare that we implement the AliasAnalysis interface</i>
+ RegisterAnalysisGroup&lt;<a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>&gt; C(B);
+}
+</pre></div>
+
+<p>This just shows a class <tt>FancyAA</tt> that is registered normally, then
+uses the <tt>RegisterAnalysisGroup</tt> template to "join" the <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a></tt>
+analysis group. Every implementation of an analysis group should join using
+this template. A single pass may join multiple different analysis groups with
+no problem.</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> {
+ //<i> Analysis Group implementations <b>must</b> be registered normally...</i>
+ RegisterPass&lt;<a href="http://llvm.org/doxygen/structBasicAliasAnalysis.html">BasicAliasAnalysis</a>&gt;
+ D("<i>basicaa</i>", "<i>Basic Alias Analysis (default AA impl)</i>");
+
+ //<i> Declare that we implement the AliasAnalysis interface</i>
+ RegisterAnalysisGroup&lt;<a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>, <b>true</b>&gt; E(D);
+}
+</pre></div>
+
+<p>Here we show how the default implementation is specified (using the extra
+argument to the <tt>RegisterAnalysisGroup</tt> template). There must be exactly
+one default implementation available at all times for an Analysis Group to be
+used. Only default implementation can derive from <tt>ImmutablePass</tt>.
+Here we declare that the
+ <tt><a href="http://llvm.org/doxygen/structBasicAliasAnalysis.html">BasicAliasAnalysis</a></tt>
+pass is the default implementation for the interface.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="passStatistics">Pass Statistics</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>The <a
+href="http://llvm.org/doxygen/Statistic_8h-source.html"><tt>Statistic</tt></a>
+class is designed to be an easy way to expose various success
+metrics from passes. These statistics are printed at the end of a
+run, when the -stats command line option is enabled on the command
+line. See the <a href="http://llvm.org/docs/ProgrammersManual.html#Statistic">Statistics section</a> in the Programmer's Manual for details.
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="passmanager">What PassManager does</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The <a
+href="http://llvm.org/doxygen/PassManager_8h-source.html"><tt>PassManager</tt></a>
+<a
+href="http://llvm.org/doxygen/classllvm_1_1PassManager.html">class</a>
+takes a list of passes, ensures their <a href="#interaction">prerequisites</a>
+are set up correctly, and then schedules passes to run efficiently. All of the
+LLVM tools that run passes use the <tt>PassManager</tt> for execution of these
+passes.</p>
+
+<p>The <tt>PassManager</tt> does two main things to try to reduce the execution
+time of a series of passes:</p>
+
+<ol>
+<li><b>Share analysis results</b> - The PassManager attempts to avoid
+recomputing analysis results as much as possible. This means keeping track of
+which analyses are available already, which analyses get invalidated, and which
+analyses are needed to be run for a pass. An important part of work is that the
+<tt>PassManager</tt> tracks the exact lifetime of all analysis results, allowing
+it to <a href="#releaseMemory">free memory</a> allocated to holding analysis
+results as soon as they are no longer needed.</li>
+
+<li><b>Pipeline the execution of passes on the program</b> - The
+<tt>PassManager</tt> attempts to get better cache and memory usage behavior out
+of a series of passes by pipelining the passes together. This means that, given
+a series of consequtive <a href="#FunctionPass"><tt>FunctionPass</tt></a>'s, it
+will execute all of the <a href="#FunctionPass"><tt>FunctionPass</tt></a>'s on
+the first function, then all of the <a
+href="#FunctionPass"><tt>FunctionPass</tt></a>es on the second function,
+etc... until the entire program has been run through the passes.
+
+<p>This improves the cache behavior of the compiler, because it is only touching
+the LLVM program representation for a single function at a time, instead of
+traversing the entire program. It reduces the memory consumption of compiler,
+because, for example, only one <a
+href="http://llvm.org/doxygen/classllvm_1_1DominatorSet.html"><tt>DominatorSet</tt></a>
+needs to be calculated at a time. This also makes it possible some <a
+href="#SMP">interesting enhancements</a> in the future.</p></li>
+
+</ol>
+
+<p>The effectiveness of the <tt>PassManager</tt> is influenced directly by how
+much information it has about the behaviors of the passes it is scheduling. For
+example, the "preserved" set is intentionally conservative in the face of an
+unimplemented <a href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a> method.
+Not implementing when it should be implemented will have the effect of not
+allowing any analysis results to live across the execution of your pass.</p>
+
+<p>The <tt>PassManager</tt> class exposes a <tt>--debug-pass</tt> command line
+options that is useful for debugging pass execution, seeing how things work, and
+diagnosing when you should be preserving more analyses than you currently are
+(To get information about all of the variants of the <tt>--debug-pass</tt>
+option, just type '<tt>opt --help-hidden</tt>').</p>
+
+<p>By using the <tt>--debug-pass=Structure</tt> option, for example, we can see
+how our <a href="#basiccode">Hello World</a> pass interacts with other passes.
+Lets try it out with the <tt>gcse</tt> and <tt>licm</tt> passes:</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug/lib/Hello.so -gcse -licm --debug-pass=Structure &lt; hello.bc &gt; /dev/null
+Module Pass Manager
+ Function Pass Manager
+ Dominator Set Construction
+ Immediate Dominators Construction
+ Global Common Subexpression Elimination
+-- Immediate Dominators Construction
+-- Global Common Subexpression Elimination
+ Natural Loop Construction
+ Loop Invariant Code Motion
+-- Natural Loop Construction
+-- Loop Invariant Code Motion
+ Module Verifier
+-- Dominator Set Construction
+-- Module Verifier
+ Bitcode Writer
+--Bitcode Writer
+</pre></div>
+
+<p>This output shows us when passes are constructed and when the analysis
+results are known to be dead (prefixed with '<tt>--</tt>'). Here we see that
+GCSE uses dominator and immediate dominator information to do its job. The LICM
+pass uses natural loop information, which uses dominator sets, but not immediate
+dominators. Because immediate dominators are no longer useful after the GCSE
+pass, it is immediately destroyed. The dominator sets are then reused to
+compute natural loop information, which is then used by the LICM pass.</p>
+
+<p>After the LICM pass, the module verifier runs (which is automatically added
+by the '<tt>opt</tt>' tool), which uses the dominator set to check that the
+resultant LLVM code is well formed. After it finishes, the dominator set
+information is destroyed, after being computed once, and shared by three
+passes.</p>
+
+<p>Lets see how this changes when we run the <a href="#basiccode">Hello
+World</a> pass in between the two passes:</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug/lib/Hello.so -gcse -hello -licm --debug-pass=Structure &lt; hello.bc &gt; /dev/null
+Module Pass Manager
+ Function Pass Manager
+ Dominator Set Construction
+ Immediate Dominators Construction
+ Global Common Subexpression Elimination
+<b>-- Dominator Set Construction</b>
+-- Immediate Dominators Construction
+-- Global Common Subexpression Elimination
+<b> Hello World Pass
+-- Hello World Pass
+ Dominator Set Construction</b>
+ Natural Loop Construction
+ Loop Invariant Code Motion
+-- Natural Loop Construction
+-- Loop Invariant Code Motion
+ Module Verifier
+-- Dominator Set Construction
+-- Module Verifier
+ Bitcode Writer
+--Bitcode Writer
+Hello: __main
+Hello: puts
+Hello: main
+</pre></div>
+
+<p>Here we see that the <a href="#basiccode">Hello World</a> pass has killed the
+Dominator Set pass, even though it doesn't modify the code at all! To fix this,
+we need to add the following <a
+href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a> method to our pass:</p>
+
+<div class="doc_code"><pre>
+ <i>// We don't modify the program, so we preserve all analyses</i>
+ <b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
+ AU.setPreservesAll();
+ }
+</pre></div>
+
+<p>Now when we run our pass, we get this output:</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug/lib/Hello.so -gcse -hello -licm --debug-pass=Structure &lt; hello.bc &gt; /dev/null
+Pass Arguments: -gcse -hello -licm
+Module Pass Manager
+ Function Pass Manager
+ Dominator Set Construction
+ Immediate Dominators Construction
+ Global Common Subexpression Elimination
+-- Immediate Dominators Construction
+-- Global Common Subexpression Elimination
+ Hello World Pass
+-- Hello World Pass
+ Natural Loop Construction
+ Loop Invariant Code Motion
+-- Loop Invariant Code Motion
+-- Natural Loop Construction
+ Module Verifier
+-- Dominator Set Construction
+-- Module Verifier
+ Bitcode Writer
+--Bitcode Writer
+Hello: __main
+Hello: puts
+Hello: main
+</pre></div>
+
+<p>Which shows that we don't accidentally invalidate dominator information
+anymore, and therefore do not have to compute it twice.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="releaseMemory">The <tt>releaseMemory</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+ <b>virtual void</b> releaseMemory();
+</pre></div>
+
+<p>The <tt>PassManager</tt> automatically determines when to compute analysis
+results, and how long to keep them around for. Because the lifetime of the pass
+object itself is effectively the entire duration of the compilation process, we
+need some way to free analysis results when they are no longer useful. The
+<tt>releaseMemory</tt> virtual method is the way to do this.</p>
+
+<p>If you are writing an analysis or any other pass that retains a significant
+amount of state (for use by another pass which "requires" your pass and uses the
+<a href="#getAnalysis">getAnalysis</a> method) you should implement
+<tt>releaseMEmory</tt> to, well, release the memory allocated to maintain this
+internal state. This method is called after the <tt>run*</tt> method for the
+class, before the next call of <tt>run*</tt> in your pass.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="registering">Registering dynamically loaded passes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><i>Size matters</i> when constructing production quality tools using llvm,
+both for the purposes of distribution, and for regulating the resident code size
+when running on the target system. Therefore, it becomes desirable to
+selectively use some passes, while omitting others and maintain the flexibility
+to change configurations later on. You want to be able to do all this, and,
+provide feedback to the user. This is where pass registration comes into
+play.</p>
+
+<p>The fundamental mechanisms for pass registration are the
+<tt>MachinePassRegistry</tt> class and subclasses of
+<tt>MachinePassRegistryNode</tt>.</p>
+
+<p>An instance of <tt>MachinePassRegistry</tt> is used to maintain a list of
+<tt>MachinePassRegistryNode</tt> objects. This instance maintains the list and
+communicates additions and deletions to the command line interface.</p>
+
+<p>An instance of <tt>MachinePassRegistryNode</tt> subclass is used to maintain
+information provided about a particular pass. This information includes the
+command line name, the command help string and the address of the function used
+to create an instance of the pass. A global static constructor of one of these
+instances <i>registers</i> with a corresponding <tt>MachinePassRegistry</tt>,
+the static destructor <i>unregisters</i>. Thus a pass that is statically linked
+in the tool will be registered at start up. A dynamically loaded pass will
+register on load and unregister at unload.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+ <a name="registering_existing">Using existing registries</a>
+</div>
+
+<div class="doc_text">
+
+<p>There are predefined registries to track instruction scheduling
+(<tt>RegisterScheduler</tt>) and register allocation (<tt>RegisterRegAlloc</tt>)
+machine passes. Here we will describe how to <i>register</i> a register
+allocator machine pass.</p>
+
+<p>Implement your register allocator machine pass. In your register allocator
+.cpp file add the following include;</p>
+
+<div class="doc_code"><pre>
+ #include "llvm/CodeGen/RegAllocRegistry.h"
+</pre></div>
+
+<p>Also in your register allocator .cpp file, define a creator function in the
+form; </p>
+
+<div class="doc_code"><pre>
+ FunctionPass *createMyRegisterAllocator() {
+ return new MyRegisterAllocator();
+ }
+</pre></div>
+
+<p>Note that the signature of this function should match the type of
+<tt>RegisterRegAlloc::FunctionPassCtor</tt>. In the same file add the
+"installing" declaration, in the form;</p>
+
+<div class="doc_code"><pre>
+ static RegisterRegAlloc myRegAlloc("myregalloc",
+ " my register allocator help string",
+ createMyRegisterAllocator);
+</pre></div>
+
+<p>Note the two spaces prior to the help string produces a tidy result on the
+--help query.</p>
+
+<div class="doc_code"><pre>
+$ llc --help
+ ...
+ -regalloc - Register allocator to use: (default = linearscan)
+ =linearscan - linear scan register allocator
+ =local - local register allocator
+ =simple - simple register allocator
+ =myregalloc - my register allocator help string
+ ...
+</pre></div>
+
+<p>And that's it. The user is now free to use <tt>-regalloc=myregalloc</tt> as
+an option. Registering instruction schedulers is similar except use the
+<tt>RegisterScheduler</tt> class. Note that the
+<tt>RegisterScheduler::FunctionPassCtor</tt> is significantly different from
+<tt>RegisterRegAlloc::FunctionPassCtor</tt>.</p>
+
+<p>To force the load/linking of your register allocator into the llc/lli tools,
+add your creator function's global declaration to "Passes.h" and add a "pseudo"
+call line to <tt>llvm/Codegen/LinkAllCodegenComponents.h</tt>.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+ <a name="registering_new">Creating new registries</a>
+</div>
+
+<div class="doc_text">
+
+<p>The easiest way to get started is to clone one of the existing registries; we
+recommend <tt>llvm/CodeGen/RegAllocRegistry.h</tt>. The key things to modify
+are the class name and the <tt>FunctionPassCtor</tt> type.</p>
+
+<p>Then you need to declare the registry. Example: if your pass registry is
+<tt>RegisterMyPasses</tt> then define;</p>
+
+<div class="doc_code"><pre>
+MachinePassRegistry RegisterMyPasses::Registry;
+</pre></div>
+
+<p>And finally, declare the command line option for your passes. Example:</p>
+
+<div class="doc_code"><pre>
+ cl::opt&lt;RegisterMyPasses::FunctionPassCtor, false,
+ RegisterPassParser&lt;RegisterMyPasses&gt &gt
+ MyPassOpt("mypass",
+ cl::init(&amp;createDefaultMyPass),
+ cl::desc("my pass option help"));
+</pre></div>
+
+<p>Here the command option is "mypass", with createDefaultMyPass as the default
+creator.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="debughints">Using GDB with dynamically loaded passes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Unfortunately, using GDB with dynamically loaded passes is not as easy as it
+should be. First of all, you can't set a breakpoint in a shared object that has
+not been loaded yet, and second of all there are problems with inlined functions
+in shared objects. Here are some suggestions to debugging your pass with
+GDB.</p>
+
+<p>For sake of discussion, I'm going to assume that you are debugging a
+transformation invoked by <tt>opt</tt>, although nothing described here depends
+on that.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="breakpoint">Setting a breakpoint in your pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>First thing you do is start <tt>gdb</tt> on the <tt>opt</tt> process:</p>
+
+<div class="doc_code"><pre>
+$ <b>gdb opt</b>
+GNU gdb 5.0
+Copyright 2000 Free Software Foundation, Inc.
+GDB is free software, covered by the GNU General Public License, and you are
+welcome to change it and/or distribute copies of it under certain conditions.
+Type "show copying" to see the conditions.
+There is absolutely no warranty for GDB. Type "show warranty" for details.
+This GDB was configured as "sparc-sun-solaris2.6"...
+(gdb)
+</pre></div>
+
+<p>Note that <tt>opt</tt> has a lot of debugging information in it, so it takes
+time to load. Be patient. Since we cannot set a breakpoint in our pass yet
+(the shared object isn't loaded until runtime), we must execute the process, and
+have it stop before it invokes our pass, but after it has loaded the shared
+object. The most foolproof way of doing this is to set a breakpoint in
+<tt>PassManager::run</tt> and then run the process with the arguments you
+want:</p>
+
+<div class="doc_code"><pre>
+(gdb) <b>break llvm::PassManager::run</b>
+Breakpoint 1 at 0x2413bc: file Pass.cpp, line 70.
+(gdb) <b>run test.bc -load $(LLVMTOP)/llvm/Debug/lib/[libname].so -[passoption]</b>
+Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug/lib/[libname].so -[passoption]
+Breakpoint 1, PassManager::run (this=0xffbef174, M=@0x70b298) at Pass.cpp:70
+70 bool PassManager::run(Module &amp;M) { return PM-&gt;run(M); }
+(gdb)
+</pre></div>
+
+<p>Once the <tt>opt</tt> stops in the <tt>PassManager::run</tt> method you are
+now free to set breakpoints in your pass so that you can trace through execution
+or do other standard debugging stuff.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="debugmisc">Miscellaneous Problems</a>
+</div>
+
+<div class="doc_text">
+
+<p>Once you have the basics down, there are a couple of problems that GDB has,
+some with solutions, some without.</p>
+
+<ul>
+<li>Inline functions have bogus stack information. In general, GDB does a
+pretty good job getting stack traces and stepping through inline functions.
+When a pass is dynamically loaded however, it somehow completely loses this
+capability. The only solution I know of is to de-inline a function (move it
+from the body of a class to a .cpp file).</li>
+
+<li>Restarting the program breaks breakpoints. After following the information
+above, you have succeeded in getting some breakpoints planted in your pass. Nex
+thing you know, you restart the program (i.e., you type '<tt>run</tt>' again),
+and you start getting errors about breakpoints being unsettable. The only way I
+have found to "fix" this problem is to <tt>delete</tt> the breakpoints that are
+already set in your pass, run the program, and re-set the breakpoints once
+execution stops in <tt>PassManager::run</tt>.</li>
+
+</ul>
+
+<p>Hopefully these tips will help with common case debugging situations. If
+you'd like to contribute some tips of your own, just contact <a
+href="mailto:sabre@nondot.org">Chris</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+ <a name="future">Future extensions planned</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Although the LLVM Pass Infrastructure is very capable as it stands, and does
+some nifty stuff, there are things we'd like to add in the future. Here is
+where we are going:</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="SMP">Multithreaded LLVM</a>
+</div>
+
+<div class="doc_text">
+
+<p>Multiple CPU machines are becoming more common and compilation can never be
+fast enough: obviously we should allow for a multithreaded compiler. Because of
+the semantics defined for passes above (specifically they cannot maintain state
+across invocations of their <tt>run*</tt> methods), a nice clean way to
+implement a multithreaded compiler would be for the <tt>PassManager</tt> class
+to create multiple instances of each pass object, and allow the separate
+instances to be hacking on different parts of the program at the same time.</p>
+
+<p>This implementation would prevent each of the passes from having to implement
+multithreaded constructs, requiring only the LLVM core to have locking in a few
+places (for global resources). Although this is a simple extension, we simply
+haven't had time (or multiprocessor machines, thus a reason) to implement this.
+Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" /></a>
+
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+ <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/docs/doxygen.cfg.in b/docs/doxygen.cfg.in
new file mode 100644
index 0000000..e6df859
--- /dev/null
+++ b/docs/doxygen.cfg.in
@@ -0,0 +1,1230 @@
+# Doxyfile 1.4.4
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = LLVM
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER = @PACKAGE_VERSION@
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY = @abs_top_builddir@/docs/doxygen
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish,
+# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese,
+# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish,
+# Swedish, and Ukrainian.
+
+OUTPUT_LANGUAGE = English
+
+# This tag can be used to specify the encoding used in the generated output.
+# The encoding is not always determined by the language that is chosen,
+# but also whether or not the output is meant for Windows or non-Windows users.
+# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES
+# forces the Windows encoding (this is the default for the Windows binary),
+# whereas setting the tag to NO uses a Unix-style encoding (the default for
+# all platforms other than Windows).
+
+USE_WINDOWS_ENCODING = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH = ../..
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like the Qt-style comments (thus requiring an
+# explicit @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member
+# documentation.
+
+DETAILS_AT_TOP = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 2
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources
+# only. Doxygen will then generate output that is more tailored for Java.
+# For instance, namespaces will be presented as packages, qualified scopes
+# will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is YES.
+
+SHOW_DIRECTORIES = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from the
+# version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the progam writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = NO
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = NO
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT =
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = @abs_top_srcdir@/include \
+ @abs_top_srcdir@/lib \
+ @abs_top_srcdir@/docs/doxygen.intro
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm
+
+FILE_PATTERNS =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH = @abs_top_srcdir@/examples
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = YES
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH = @abs_top_srcdir@/docs/img
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output. If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS = NO
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default)
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default)
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 4
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX = llvm::
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER = @abs_top_srcdir@/docs/doxygen.header
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER = @abs_top_srcdir@/docs/doxygen.footer
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET = @abs_top_srcdir@/docs/doxygen.css
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
+# probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT =
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = letter
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT =
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT =
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION =
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader. This is useful
+# if you want to understand what is going on. On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_PREDEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH = ../include
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = YES
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = NO
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = YES
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will
+# generate a call dependency graph for every global function or class method.
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+
+CALL_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH = @DOT@
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_WIDTH = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_HEIGHT = 1024
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that a graph may be further truncated if the graph's
+# image dimensions are not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH
+# and MAX_DOT_GRAPH_HEIGHT). If 0 is used for the depth value (the default),
+# the graph is not depth-constrained.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, which results in a white background.
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+
+DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE = NO
diff --git a/docs/doxygen.css b/docs/doxygen.css
new file mode 100644
index 0000000..83b049b
--- /dev/null
+++ b/docs/doxygen.css
@@ -0,0 +1,378 @@
+BODY,H1,H2,H3,H4,H5,H6,P,CENTER,TD,TH,UL,DL,DIV {
+ font-family: Verdana,Geneva,Arial,Helvetica,sans-serif;
+}
+BODY,TD {
+ font-size: 90%;
+}
+H1 {
+ text-align: center;
+ font-size: 140%;
+ font-weight: bold;
+}
+H2 {
+ font-size: 120%;
+ font-style: italic;
+}
+H3 {
+ font-size: 100%;
+}
+CAPTION { font-weight: bold }
+DIV.qindex {
+ width: 100%;
+ background-color: #eeeeff;
+ border: 1px solid #b0b0b0;
+ text-align: center;
+ margin: 2px;
+ padding: 2px;
+ line-height: 140%;
+}
+DIV.nav {
+ width: 100%;
+ background-color: #eeeeff;
+ border: 1px solid #b0b0b0;
+ text-align: center;
+ margin: 2px;
+ padding: 2px;
+ line-height: 140%;
+}
+DIV.navtab {
+ background-color: #eeeeff;
+ border: 1px solid #b0b0b0;
+ text-align: center;
+ margin: 2px;
+ margin-right: 15px;
+ padding: 2px;
+}
+TD.navtab {
+ font-size: 70%;
+}
+A.qindex {
+ text-decoration: none;
+ font-weight: bold;
+ color: #1A419D;
+}
+A.qindex:visited {
+ text-decoration: none;
+ font-weight: bold;
+ color: #1A419D
+}
+A.qindex:hover {
+ text-decoration: none;
+ background-color: #ddddff;
+}
+A.qindexHL {
+ text-decoration: none;
+ font-weight: bold;
+ background-color: #6666cc;
+ color: #ffffff;
+ border: 1px double #9295C2;
+}
+A.qindexHL:hover {
+ text-decoration: none;
+ background-color: #6666cc;
+ color: #ffffff;
+}
+A.qindexHL:visited {
+ text-decoration: none; background-color: #6666cc; color: #ffffff }
+A.el { text-decoration: none; font-weight: bold }
+A.elRef { font-weight: bold }
+A.code:link { text-decoration: none; font-weight: normal; color: #0000FF}
+A.code:visited { text-decoration: none; font-weight: normal; color: #0000FF}
+A.codeRef:link { font-weight: normal; color: #0000FF}
+A.codeRef:visited { font-weight: normal; color: #0000FF}
+A:hover { text-decoration: none; background-color: #f2f2ff }
+DL.el { margin-left: -1cm }
+.fragment {
+ font-family: Fixed, monospace;
+ font-size: 95%;
+}
+PRE.fragment {
+ border: 1px solid #CCCCCC;
+ background-color: #f5f5f5;
+ margin-top: 4px;
+ margin-bottom: 4px;
+ margin-left: 2px;
+ margin-right: 8px;
+ padding-left: 6px;
+ padding-right: 6px;
+ padding-top: 4px;
+ padding-bottom: 4px;
+}
+DIV.ah { background-color: black; font-weight: bold; color: #ffffff; margin-bottom: 3px; margin-top: 3px }
+TD.md { background-color: #F4F4FB; font-weight: bold; }
+TD.mdPrefix {
+ background-color: #F4F4FB;
+ color: #606060;
+ font-size: 80%;
+}
+TD.mdname1 { background-color: #F4F4FB; font-weight: bold; color: #602020; }
+TD.mdname { background-color: #F4F4FB; font-weight: bold; color: #602020; width: 600px; }
+DIV.groupHeader {
+ margin-left: 16px;
+ margin-top: 12px;
+ margin-bottom: 6px;
+ font-weight: bold;
+}
+DIV.groupText { margin-left: 16px; font-style: italic; font-size: 90% }
+BODY {
+ background: white;
+ color: black;
+ margin-right: 20px;
+ margin-left: 20px;
+}
+TD.indexkey {
+ background-color: #eeeeff;
+ font-weight: bold;
+ padding-right : 10px;
+ padding-top : 2px;
+ padding-left : 10px;
+ padding-bottom : 2px;
+ margin-left : 0px;
+ margin-right : 0px;
+ margin-top : 2px;
+ margin-bottom : 2px;
+ border: 1px solid #CCCCCC;
+}
+TD.indexvalue {
+ background-color: #eeeeff;
+ font-style: italic;
+ padding-right : 10px;
+ padding-top : 2px;
+ padding-left : 10px;
+ padding-bottom : 2px;
+ margin-left : 0px;
+ margin-right : 0px;
+ margin-top : 2px;
+ margin-bottom : 2px;
+ border: 1px solid #CCCCCC;
+}
+TR.memlist {
+ background-color: #f0f0f0;
+}
+P.formulaDsp { text-align: center; }
+IMG.formulaDsp { }
+IMG.formulaInl { vertical-align: middle; }
+SPAN.keyword { color: #008000 }
+SPAN.keywordtype { color: #604020 }
+SPAN.keywordflow { color: #e08000 }
+SPAN.comment { color: #800000 }
+SPAN.preprocessor { color: #806020 }
+SPAN.stringliteral { color: #002080 }
+SPAN.charliteral { color: #008080 }
+.mdTable {
+ border: 1px solid #868686;
+ background-color: #F4F4FB;
+}
+.mdRow {
+ padding: 8px 10px;
+}
+.mdescLeft {
+ padding: 0px 8px 4px 8px;
+ font-size: 80%;
+ font-style: italic;
+ background-color: #FAFAFA;
+ border-top: 1px none #E0E0E0;
+ border-right: 1px none #E0E0E0;
+ border-bottom: 1px none #E0E0E0;
+ border-left: 1px none #E0E0E0;
+ margin: 0px;
+}
+.mdescRight {
+ padding: 0px 8px 4px 8px;
+ font-size: 80%;
+ font-style: italic;
+ background-color: #FAFAFA;
+ border-top: 1px none #E0E0E0;
+ border-right: 1px none #E0E0E0;
+ border-bottom: 1px none #E0E0E0;
+ border-left: 1px none #E0E0E0;
+ margin: 0px;
+}
+.memItemLeft {
+ padding: 1px 0px 0px 8px;
+ margin: 4px;
+ border-top-width: 1px;
+ border-right-width: 1px;
+ border-bottom-width: 1px;
+ border-left-width: 1px;
+ border-top-color: #E0E0E0;
+ border-right-color: #E0E0E0;
+ border-bottom-color: #E0E0E0;
+ border-left-color: #E0E0E0;
+ border-top-style: solid;
+ border-right-style: none;
+ border-bottom-style: none;
+ border-left-style: none;
+ background-color: #FAFAFA;
+ font-size: 80%;
+}
+.memItemRight {
+ padding: 1px 8px 0px 8px;
+ margin: 4px;
+ border-top-width: 1px;
+ border-right-width: 1px;
+ border-bottom-width: 1px;
+ border-left-width: 1px;
+ border-top-color: #E0E0E0;
+ border-right-color: #E0E0E0;
+ border-bottom-color: #E0E0E0;
+ border-left-color: #E0E0E0;
+ border-top-style: solid;
+ border-right-style: none;
+ border-bottom-style: none;
+ border-left-style: none;
+ background-color: #FAFAFA;
+ font-size: 80%;
+}
+.memTemplItemLeft {
+ padding: 1px 0px 0px 8px;
+ margin: 4px;
+ border-top-width: 1px;
+ border-right-width: 1px;
+ border-bottom-width: 1px;
+ border-left-width: 1px;
+ border-top-color: #E0E0E0;
+ border-right-color: #E0E0E0;
+ border-bottom-color: #E0E0E0;
+ border-left-color: #E0E0E0;
+ border-top-style: none;
+ border-right-style: none;
+ border-bottom-style: none;
+ border-left-style: none;
+ background-color: #FAFAFA;
+ font-size: 80%;
+}
+.memTemplItemRight {
+ padding: 1px 8px 0px 8px;
+ margin: 4px;
+ border-top-width: 1px;
+ border-right-width: 1px;
+ border-bottom-width: 1px;
+ border-left-width: 1px;
+ border-top-color: #E0E0E0;
+ border-right-color: #E0E0E0;
+ border-bottom-color: #E0E0E0;
+ border-left-color: #E0E0E0;
+ border-top-style: none;
+ border-right-style: none;
+ border-bottom-style: none;
+ border-left-style: none;
+ background-color: #FAFAFA;
+ font-size: 80%;
+}
+.memTemplParams {
+ padding: 1px 0px 0px 8px;
+ margin: 4px;
+ border-top-width: 1px;
+ border-right-width: 1px;
+ border-bottom-width: 1px;
+ border-left-width: 1px;
+ border-top-color: #E0E0E0;
+ border-right-color: #E0E0E0;
+ border-bottom-color: #E0E0E0;
+ border-left-color: #E0E0E0;
+ border-top-style: solid;
+ border-right-style: none;
+ border-bottom-style: none;
+ border-left-style: none;
+ color: #606060;
+ background-color: #FAFAFA;
+ font-size: 80%;
+}
+.search { color: #003399;
+ font-weight: bold;
+}
+FORM.search {
+ margin-bottom: 0px;
+ margin-top: 0px;
+}
+INPUT.search { font-size: 75%;
+ color: #000080;
+ font-weight: normal;
+ background-color: #eeeeff;
+}
+TD.tiny { font-size: 75%;
+}
+a {
+ color: #252E78;
+}
+a:visited {
+ color: #3D2185;
+}
+.dirtab { padding: 4px;
+ border-collapse: collapse;
+ border: 1px solid #b0b0b0;
+}
+TH.dirtab { background: #eeeeff;
+ font-weight: bold;
+}
+HR { height: 1px;
+ border: none;
+ border-top: 1px solid black;
+}
+
+/*
+ * LLVM Modifications.
+ * Note: Everything above here is generated with "doxygen -w htlm" command. See
+ * "doxygen --help" for details. What follows are CSS overrides for LLVM
+ * specific formatting. We want to keep the above so it can be replaced with
+ * subsequent doxygen upgrades.
+ */
+
+.footer {
+ font-size: 80%;
+ font-weight: bold;
+ text-align: center;
+ vertical-align: middle;
+}
+.title {
+ font-size: 25pt;
+ color: black; background: url("../img/lines.gif");
+ font-weight: bold;
+ border-width: 1px;
+ border-style: solid none solid none;
+ text-align: center;
+ vertical-align: middle;
+ padding-left: 8pt;
+ padding-top: 1px;
+ padding-bottom: 2px
+}
+A:link {
+ cursor: pointer;
+ text-decoration: none;
+ font-weight: bolder;
+}
+A:visited {
+ cursor: pointer;
+ text-decoration: underline;
+ font-weight: bolder;
+}
+A:hover {
+ cursor: pointer;
+ text-decoration: underline;
+ font-weight: bolder;
+}
+A:active {
+ cursor: pointer;
+ text-decoration: underline;
+ font-weight: bolder;
+ font-style: italic;
+}
+H1 {
+ text-align: center;
+ font-size: 140%;
+ font-weight: bold;
+}
+H2 {
+ font-size: 120%;
+ font-style: italic;
+}
+H3 {
+ font-size: 100%;
+}
+A.qindex {}
+A.qindexRef {}
+A.el { text-decoration: none; font-weight: bold }
+A.elRef { font-weight: bold }
+A.code { text-decoration: none; font-weight: normal; color: #4444ee }
+A.codeRef { font-weight: normal; color: #4444ee }
diff --git a/docs/doxygen.footer b/docs/doxygen.footer
new file mode 100644
index 0000000..ddf7523
--- /dev/null
+++ b/docs/doxygen.footer
@@ -0,0 +1,13 @@
+<hr>
+<p class="footer">
+Generated on $datetime for <a href="http://llvm.org">$projectname</a> by
+<a href="http://www.doxygen.org"><img src="doxygen.png" alt="Doxygen"
+align="middle" border="0"/>$doxygenversion</a><br/>
+Copyright &copy; 2003-2007 University of Illinois at Urbana-Champaign.
+All Rights Reserved.</p>
+
+<hr>
+<!--#include virtual="/attrib.incl" -->
+
+</body>
+</html>
diff --git a/docs/doxygen.header b/docs/doxygen.header
new file mode 100644
index 0000000..a520434
--- /dev/null
+++ b/docs/doxygen.header
@@ -0,0 +1,9 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<html><head>
+<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"/>
+<meta name="keywords" content="LLVM,Low Level Virtual Machine,C++,doxygen,API,documentation"/>
+<meta name="description" content="C++ source code API documentation for the Low Level Virtual Machine (LLVM)."/>
+<title>LLVM: $title</title>
+<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+</head><body>
+<p class="title">LLVM API Documentation</p>
diff --git a/docs/doxygen.intro b/docs/doxygen.intro
new file mode 100644
index 0000000..547730c
--- /dev/null
+++ b/docs/doxygen.intro
@@ -0,0 +1,18 @@
+/// @mainpage Low Level Virtual Machine
+///
+/// @section main_intro Introduction
+/// Welcome to the Low Level Virtual Machine (LLVM).
+///
+/// This documentation describes the @b internal software that makes
+/// up LLVM, not the @b external use of LLVM. There are no instructions
+/// here on how to use LLVM, only the APIs that make up the software. For usage
+/// instructions, please see the programmer's guide or reference manual.
+///
+/// @section main_caveat Caveat
+/// This documentation is generated directly from the source code with doxygen.
+/// Since LLVM is constantly under active development, what you're about to
+/// read is out of date! However, it may still be useful since certain portions
+/// of LLVM are very stable.
+///
+/// @section main_changelog Change Log
+/// - Original content written 12/30/2003 by Reid Spencer
diff --git a/docs/img/Debugging.gif b/docs/img/Debugging.gif
new file mode 100644
index 0000000..662d35a
--- /dev/null
+++ b/docs/img/Debugging.gif
Binary files differ
diff --git a/docs/img/libdeps.gif b/docs/img/libdeps.gif
new file mode 100644
index 0000000..c5c0ed4
--- /dev/null
+++ b/docs/img/libdeps.gif
Binary files differ
diff --git a/docs/img/lines.gif b/docs/img/lines.gif
new file mode 100644
index 0000000..88f491e
--- /dev/null
+++ b/docs/img/lines.gif
Binary files differ
diff --git a/docs/img/objdeps.gif b/docs/img/objdeps.gif
new file mode 100644
index 0000000..57c3e2e
--- /dev/null
+++ b/docs/img/objdeps.gif
Binary files differ
diff --git a/docs/img/venusflytrap.jpg b/docs/img/venusflytrap.jpg
new file mode 100644
index 0000000..59340ef
--- /dev/null
+++ b/docs/img/venusflytrap.jpg
Binary files differ
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..e3da6ac
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,273 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>Documentation for the LLVM System</title>
+ <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Documentation for the LLVM System</div>
+
+<div class="doc_text">
+<table class="layout" width="95%"><tr class="layout"><td class="left">
+<ul>
+ <li><a href="#llvmdesign">LLVM Design</a></li>
+ <li><a href="/pubs/">LLVM Publications</a></li>
+ <li><a href="#userguide">LLVM User Guides</a></li>
+ <li><a href="#llvmprog">General LLVM Programming Documentation</a></li>
+ <li><a href="#subsystems">LLVM Subsystem Documentation</a></li>
+ <li><a href="#maillist">LLVM Mailing Lists</a></li>
+</ul>
+</td><td class="right">
+ <form action="http://www.google.com/search" method=get>
+ <input type="hidden" name="sitesearch" value="llvm.org/docs">
+ <input type=text name=q size=25><br>
+ <input type=submit value="Search the LLVM Docs" name="submit">
+ </form>
+</td></tr></table>
+</div>
+
+<div class="doc_author">
+ <p>Written by <a href="http://llvm.org">The LLVM Team</a></p>
+</div>
+
+<!--=======================================================================-->
+<div class="doc_section"><a name="llvmdesign">LLVM Design &amp; Overview</a></div>
+<!--=======================================================================-->
+
+<ul>
+<li><a href="LangRef.html">LLVM Language Reference Manual</a> - Defines the LLVM
+intermediate representation.</li>
+<li><a href="http://llvm.org/pubs/2006-04-25-GelatoLLVMIntro.html">Introduction to the LLVM Compiler Infrastructure</a> - Presentation describing LLVM.</li>
+<li><a href="http://llvm.org/pubs/2004-09-22-LCPCLLVMTutorial.html">The LLVM Compiler Framework and
+Infrastructure Tutorial</a> - Tutorial for writing passes, exploring the system.</li>
+<li><a href="http://llvm.org/pubs/2004-01-30-CGO-LLVM.html">LLVM: A Compilation Framework for
+Lifelong Program Analysis &amp; Transformation</a> - Design overview.</li>
+<li><a href="http://llvm.org/pubs/2002-12-LattnerMSThesis.html">LLVM: An Infrastructure for
+Multi-Stage Optimization</a> - More details (somewhat old now).</li>
+<li><a href="GetElementPtr.html">GetElementPtr FAQ</a> - Answers to some very
+frequent questions about LLVM's most frequently misunderstood instruction.</li>
+</ul>
+
+<!--=======================================================================-->
+<div class="doc_section"><a name="userguide">LLVM User Guides</a></div>
+<!--=======================================================================-->
+
+<ul>
+<li><a href="GettingStarted.html">The LLVM Getting Started Guide</a> -
+Discusses how to get up and running quickly with the LLVM infrastructure.
+Everything from unpacking and compilation of the distribution to execution of
+some tools.</li>
+
+<li><a href="GettingStartedVS.html">Getting Started with the LLVM System using
+Microsoft Visual Studio</a> - An addendum to the main Getting Started guide for
+those using Visual Studio on Windows.</li>
+
+<li><a href="DeveloperPolicy.html">Developer Policy</a> - The LLVM project's
+policy towards developers and their contributions.</li>
+
+<li><a href="/cmds/index.html">LLVM Command Guide</a> - A reference
+manual for the LLVM command line utilities ("man" pages for LLVM tools).<br/>
+Current tools:
+ <a href="/cmds/html/llvm-ar.html">llvm-ar</a>,
+ <a href="/cmds/html/llvm-as.html">llvm-as</a>,
+ <a href="/cmds/html/llvm-dis.html">llvm-dis</a>,
+ <a href="/cmds/html/llvm-extract.html">llvm-extract</a>,
+ <a href="/cmds/html/llvm-ld.html">llvm-ld</a>,
+ <a href="/cmds/html/llvm-link.html">llvm-link</a>,
+ <a href="/cmds/html/llvm-nm.html">llvm-nm</a>,
+ <a href="/cmds/html/llvm-prof.html">llvm-prof</a>,
+ <a href="/cmds/html/llvm-ranlib.html">llvm-ranlib</a>,
+ <a href="/cmds/html/opt.html">opt</a>,
+ <a href="/cmds/html/llc.html">llc</a>,
+ <a href="/cmds/html/lli.html">lli</a>,
+ <a href="/cmds/html/llvmc.html">llvmc</a>
+ <a href="/cmds/html/llvmgcc.html">llvm-gcc</a>,
+ <a href="/cmds/html/llvmgxx.html">llvm-g++</a>,
+ <a href="/cmds/html/stkrc.html">stkrc</a>,
+ <a href="/cmds/html/bugpoint.html">bugpoint</a>,
+ <a href="/cmds/html/llvm-bcanalyzer.html">llvm-bcanalyzer</a>,
+</li>
+
+<li><a href="FAQ.html">Frequently Asked Questions</a> - A list of common
+questions and problems and their solutions.</li>
+
+<li><a href="ReleaseNotes.html">Release notes for the current release</a>
+- This describes new features, known bugs, and other limitations.</li>
+
+<li><a href="HowToSubmitABug.html">How to Submit A Bug Report</a> -
+Instructions for properly submitting information about any bugs you run into in
+the LLVM system.</li>
+
+<li><a href="TestingGuide.html">LLVM Test Suite Guide</a> - A reference
+manual for using the LLVM test suite.</li>
+
+<li><a href="CFEBuildInstrs.html">How to build the C/C++ front-end</a> -
+Instructions for building the front-end from source.</li>
+
+<li><a href="Lexicon.html">The LLVM Lexicon</a> - Definition of acronyms, terms
+and concepts used in LLVM.</li>
+
+<li><a name="irc">You can probably find help on the unofficial LLVM IRC
+channel</a>. We often are on irc.oftc.net in the #llvm channel. If you are
+using the mozilla browser, and have chatzilla installed, you can <a
+href="irc://irc.oftc.net/llvm">join #llvm on irc.oftc.net</a> directly.</li>
+
+</ul>
+
+
+<!--=======================================================================-->
+<div class="doc_section"><a name="llvmprog">General LLVM Programming Documentation</a></div>
+<!--=======================================================================-->
+
+<ul>
+<li><a href="LangRef.html">LLVM Language Reference Manual</a> - Defines the LLVM
+intermediate representation and the assembly form of the different nodes.</li>
+
+<li><a href="ProgrammersManual.html">The LLVM Programmers Manual</a> -
+Introduction to the general layout of the LLVM sourcebase, important classes
+and APIs, and some tips &amp; tricks.</li>
+
+<li><a href="Projects.html">LLVM Project Guide</a> - How-to guide and
+templates for new projects that <em>use</em> the LLVM infrastructure. The
+templates (directory organization, Makefiles, and test tree) allow the project
+code to be located outside (or inside) the <tt>llvm/</tt> tree, while using LLVM
+header files and libraries.</li>
+
+<li><a href="MakefileGuide.html">LLVM Makefile Guide</a> - Describes how the
+LLVM makefiles work and how to use them.</li>
+
+<li><a href="CommandLine.html">CommandLine library Reference Manual</a> -
+Provides information on using the command line parsing library.</li>
+
+<li><a href="CodingStandards.html">LLVM Coding standards</a> -
+Details the LLVM coding standards and provides useful information on writing
+efficient C++ code.</li>
+
+<li><a href="ExtendingLLVM.html">Extending LLVM</a> - Look here to see how
+to add instructions and intrinsics to LLVM.</li>
+
+<li><a href="UsingLibraries.html">Using LLVM Libraries</a> - Look here to
+understand how to use the libraries produced when LLVM is compiled.</li>
+
+<li><a href="HowToReleaseLLVM.html">How To Release LLVM To The Public</a> - This
+is a guide to preparing LLVM releases. Most developers can ignore it.</li>
+
+<li><a href="http://llvm.org/doxygen/">Doxygen generated
+documentation</a> (<a
+href="http://llvm.org/doxygen/inherits.html">classes</a>)
+
+(<a href="http://llvm.org/doxygen/doxygen.tar.gz">tarball</a>)
+</li>
+
+<li><a href="http://llvm.org/viewvc/">ViewVC Repository Browser</a></li>
+
+</ul>
+
+<!--=======================================================================-->
+<div class="doc_section"><a name="subsystems">LLVM Subsystem Documentation</a></div>
+<!--=======================================================================-->
+
+<ul>
+
+<li><a href="WritingAnLLVMPass.html">Writing an LLVM Pass</a> - Information
+on how to write LLVM transformations and analyses.</li>
+
+<li><a href="WritingAnLLVMBackend.html">Writing an LLVM Backend</a> - Information
+on how to write LLVM backends for machine targets.</li>
+
+<li><a href="CodeGenerator.html">The LLVM Target-Independent Code
+Generator</a> - The design and implementation of the LLVM code generator.
+Useful if you are working on retargetting LLVM to a new architecture, designing
+a new codegen pass, or enhancing existing components.</li>
+
+<li><a href="TableGenFundamentals.html">TableGen Fundamentals</a> -
+Describes the TableGen tool, which is used heavily by the LLVM code
+generator.</li>
+
+<li><a href="AliasAnalysis.html">Alias Analysis in LLVM</a> - Information
+on how to write a new alias analysis implementation or how to use existing
+analyses.</li>
+
+<li><a href="Stacker.html">The Stacker Chronicles</a> - This document
+describes both the Stacker language and LLVM frontend, but also some details
+about LLVM useful for those writing front-ends.</li>
+
+<li><a href="GarbageCollection.html">Accurate Garbage Collection with
+LLVM</a> - The interfaces source-language compilers should use for compiling
+GC'd programs.</li>
+
+<li><a href="SourceLevelDebugging.html">Source Level Debugging with
+LLVM</a> - This document describes the design and philosophy behind the LLVM
+source-level debugger.</li>
+
+<li><a href="ExceptionHandling.html">Zero Cost Exception handling in LLVM</a>
+- This document describes the design and implementation of exception handling
+in LLVM.</li>
+
+<li><a href="Bugpoint.html">Bugpoint</a> - automatic bug finder and test-case
+reducer description and usage information.</li>
+
+<li><a href="CompilerDriver.html">Compiler Driver (llvmc)</a> - This document
+describes the design and configuration of the LLVM compiler driver tool,
+<tt>llvmc</tt>.</li>
+
+<li><a href="BitCodeFormat.html">LLVM Bitcode File Format</a></li>
+
+<li><a href="SystemLibrary.html">System Library</a> - This document describes
+the LLVM System Library (<tt>lib/System</tt>) and how to keep LLVM source code
+portable</li>
+
+<li><a href="LinkTimeOptimization.html">Link Time Optimization</a> - This
+document describes the interface between LLVM intermodular optimizer and
+the linker and its design</li>
+
+</ul>
+
+
+<!--=======================================================================-->
+<div class="doc_section"><a name="maillist">LLVM Mailing Lists</a></div>
+<!--=======================================================================-->
+
+<ul>
+<li>The <a href="http://mail.cs.uiuc.edu/mailman/listinfo/llvm-announce">
+LLVM Announcements List</a>: This is a low volume list that provides important
+announcements regarding LLVM. It gets email about once a month.</li>
+
+<li>The <a href="http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev">Developer's
+List</a>: This list is for people who want to be included in technical
+discussions of LLVM. People post to this list when they have questions about
+writing code for or using the LLVM tools. It is relatively low volume.</li>
+
+<li>The <a href="http://mail.cs.uiuc.edu/pipermail/llvmbugs/">Bugs &amp;
+Patches Archive</a>: This list gets emailed every time a bug is opened and
+closed, and when people submit patches to be included in LLVM. It is higher
+volume than the LLVMdev list.</li>
+
+<li>The <a href="http://mail.cs.uiuc.edu/pipermail/llvm-commits/">Commits
+Archive</a>: This list contains all commit messages that are made when LLVM
+developers commit code changes to the repository. It is useful for those who
+want to stay on the bleeding edge of LLVM development. This list is very high
+volume.</li>
+
+<li>The <a href="http://mail.cs.uiuc.edu/pipermail/llvm-testresults/">
+Test Results Archive</a>: A message is automatically sent to this list by every
+active nightly tester when it completes. As such, this list gets email several
+times each day, making it a high volume list.</li>
+
+</ul>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+ <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+ <a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+ <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+ Last modified: $Date$
+</address>
+</body></html>
diff --git a/docs/llvm.css b/docs/llvm.css
new file mode 100644
index 0000000..69ae455
--- /dev/null
+++ b/docs/llvm.css
@@ -0,0 +1,84 @@
+/*
+ * LLVM documentation style sheet
+ */
+
+/* Common styles */
+.body { color: black; background: white; margin: 0 0 0 0 }
+
+/* No borders on image links */
+a:link img, a:visited img {border-style: none}
+
+address img { float: right; width: 88px; height: 31px; }
+address { clear: right; }
+
+TR, TD { border: 2px solid gray; padding: 4pt 4pt 2pt 2pt; }
+TH { border: 2px solid gray; font-weight: bold; font-size: 105%;
+ color: black; background: url("img/lines.gif");
+ font-family: "Georgia,Palatino,Times,Roman,SanSerif"; text-align:center;
+ vertical-align: middle; }
+TABLE { text-align: center; border: 2px solid black;
+ border-collapse: collapse; margin-top: 1em; margin-left: 1em;
+ margin-right: 1em; margin-bottom: 1em; }
+/*
+ * Documentation
+ */
+/* Common for title and header */
+.doc_title, .doc_section, .doc_subsection {
+ color: black; background: url("img/lines.gif");
+ font-family: "Georgia,Palatino,Times,Roman,SanSerif"; font-weight: bold;
+ border-width: 1px;
+ border-style: solid none solid none;
+ text-align: center;
+ vertical-align: middle;
+ padding-left: 8pt;
+ padding-top: 1px;
+ padding-bottom: 2px
+}
+
+.doc_title { text-align: left; font-size: 25pt }
+.doc_section { text-align: center; font-size: 22pt;
+ margin: 20pt 0pt 5pt 0pt; }
+.doc_subsection { width: 75%;
+ text-align: left; font-size: 12pt; padding: 4pt 4pt 4pt 4pt;
+ margin: 1.5em 0.5em 0.5em 0.5em }
+
+.doc_subsubsection { margin: 2.0em 0.5em 0.5em 0.5em;
+ font-weight: bold; font-style: oblique;
+ border-bottom: 1px solid #999999; font-size: 12pt;
+ width: 75%; }
+.doc_author { text-align: left; font-weight: bold; padding-left: 20pt }
+.doc_text { text-align: left; padding-left: 20pt; padding-right: 10pt }
+
+.doc_footer { text-align: left; padding: 0 0 0 0 }
+
+.doc_hilite { color: blue; font-weight: bold; }
+
+.doc_table { text-align: center; width: 90%;
+ padding: 1px 1px 1px 1px; border: 1px; }
+
+.doc_table_nw { text-align: center; border: 1px;
+ padding: 1px 1px 1px 1px; }
+
+.doc_warning { color: red; font-weight: bold }
+
+.doc_code { border: solid 1px gray; background: #eeeeee;
+ margin: 0 1em 0 1em;
+ padding: 0 1em 0 1em;
+ display:table;
+ }
+.doc_notes { background: #fafafa; border: 1px solid #cecece; padding: 0.1em }
+
+TABLE.layout { text-align: left; border: none; border-collapse: collapse;
+ padding: 4px 4px 4px 4px; }
+TR.layout { border: none; padding: 4pt 4pt 2pt 2pt; }
+TD.layout { border: none; padding: 4pt 4pt 2pt 2pt;
+ vertical-align: top;}
+TD.left { border: none; padding: 4pt 4pt 2pt 2pt; text-align: left;
+ vertical-align: top;}
+TD.right { border: none; padding: 4pt 4pt 2pt 2pt; text-align: right;
+ vertical-align: top;}
+TH.layout { border: none; font-weight: bold; font-size: 105%;
+ text-align:center; vertical-align: middle; }
+
+/* Left align table cell */
+.td_left { border: 2px solid gray; text-align: left; }