From dce4a407a24b04eebc6a376f8e62b41aaa7b071f Mon Sep 17 00:00:00 2001
From: Stephen Hines <srhines@google.com>
Date: Thu, 29 May 2014 02:49:00 -0700
Subject: Update LLVM for 3.5 rebase (r209712).

Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
---
 docs/ARM-BE-bitcastfail.png           | Bin 0 -> 29373 bytes
 docs/ARM-BE-bitcastsuccess.png        | Bin 0 -> 41468 bytes
 docs/ARM-BE-ld1.png                   | Bin 0 -> 22561 bytes
 docs/ARM-BE-ldr.png                   | Bin 0 -> 16516 bytes
 docs/AliasAnalysis.rst                |  20 +++-
 docs/BigEndianNEON.rst                | 205 ++++++++++++++++++++++++++++++++++
 docs/BitCodeFormat.rst                |   2 -
 docs/BlockFrequencyTerminology.rst    | 130 +++++++++++++++++++++
 docs/BranchWeightMetadata.rst         |   9 +-
 docs/Bugpoint.rst                     |   2 +-
 docs/CMake.rst                        |  40 ++++++-
 docs/CMakeLists.txt                   |  15 +++
 docs/CodeGenerator.rst                |  14 +--
 docs/CodingStandards.rst              |  21 ++--
 docs/CommandGuide/index.rst           |   1 +
 docs/CommandGuide/llvm-cov.rst        | 114 ++++++++++++++++---
 docs/CommandGuide/llvm-dwarfdump.rst  |  30 +++++
 docs/CommandGuide/llvm-symbolizer.rst |   7 +-
 docs/CommandGuide/tblgen.rst          |   3 +-
 docs/CompilerWriterInfo.rst           |   2 +-
 docs/DeveloperPolicy.rst              |   2 +-
 docs/Extensions.rst                   |  31 +++++
 docs/GettingStartedVS.rst             |   8 +-
 docs/LLVMBuild.rst                    |   4 +-
 docs/LangRef.rst                      | 187 ++++++++++++++++++++++++-------
 docs/Passes.rst                       |  28 ++---
 docs/Phabricator.rst                  |   9 +-
 docs/ProgrammersManual.rst            |  90 +++++++++++++--
 docs/README.txt                       |   9 ++
 docs/ReleaseNotes.rst                 |   3 +
 docs/SegmentedStacks.rst              |   5 +-
 docs/TableGen/LangIntro.rst           |  10 +-
 docs/TableGen/LangRef.rst             |   6 +-
 docs/WritingAnLLVMBackend.rst         |   2 +-
 docs/YamlIO.rst                       |  40 ++++++-
 docs/index.rst                        |  13 ++-
 36 files changed, 917 insertions(+), 145 deletions(-)
 create mode 100644 docs/ARM-BE-bitcastfail.png
 create mode 100644 docs/ARM-BE-bitcastsuccess.png
 create mode 100644 docs/ARM-BE-ld1.png
 create mode 100644 docs/ARM-BE-ldr.png
 create mode 100644 docs/BigEndianNEON.rst
 create mode 100644 docs/BlockFrequencyTerminology.rst
 create mode 100644 docs/CommandGuide/llvm-dwarfdump.rst

(limited to 'docs')
diff --git a/docs/ARM-BE-bitcastfail.png b/docs/ARM-BE-bitcastfail.png
new file mode 100644
index 0000000..0c83f0b
Binary files /dev/null and b/docs/ARM-BE-bitcastfail.png differ
diff --git a/docs/ARM-BE-bitcastsuccess.png b/docs/ARM-BE-bitcastsuccess.png
new file mode 100644
index 0000000..8f3414d
Binary files /dev/null and b/docs/ARM-BE-bitcastsuccess.png differ
diff --git a/docs/ARM-BE-ld1.png b/docs/ARM-BE-ld1.png
new file mode 100644
index 0000000..f58c99d
Binary files /dev/null and b/docs/ARM-BE-ld1.png differ
diff --git a/docs/ARM-BE-ldr.png b/docs/ARM-BE-ldr.png
new file mode 100644
index 0000000..73db07e
Binary files /dev/null and b/docs/ARM-BE-ldr.png differ
diff --git a/docs/AliasAnalysis.rst b/docs/AliasAnalysis.rst
index 712d57d..1cbaee7 100644
--- a/docs/AliasAnalysis.rst
+++ b/docs/AliasAnalysis.rst
@@ -51,7 +51,7 @@ starting address and size, and function calls are represented as the actual
 get mod/ref information for arbitrary instructions.
 
 All ``AliasAnalysis`` interfaces require that in queries involving multiple
-values, values which are not `constants <LangRef.html#constants>`_ are all
+values, values which are not :ref:`constants <constants>` are all
 defined within the same function.
 
 Representation of Pointers
@@ -111,7 +111,7 @@ returns MustAlias, PartialAlias, MayAlias, or NoAlias as appropriate.
 
 Like all ``AliasAnalysis`` interfaces, the ``alias`` method requires that either
 the two pointer values be defined within the same function, or at least one of
-the values is a `constant <LangRef.html#constants>`_.
+the values is a :ref:`constant <constants>`.
 
 .. _Must, May, or No:
 
@@ -126,7 +126,7 @@ used for reading memory. Another is when the memory is freed and reallocated
 between accesses through one pointer and accesses through the other --- in this
 case, there is a dependence, but it's mediated by the free and reallocation.
 
-As an exception to this is with the `noalias <LangRef.html#noalias>`_ keyword;
+As an exception to this is with the :ref:`noalias <noalias>` keyword;
 the "irrelevant" dependencies are ignored.
 
 The ``MayAlias`` response is used whenever the two pointers might refer to the
@@ -246,6 +246,20 @@ analysis run method (``run`` for a ``Pass``, ``runOnFunction`` for a
     return false;
   }
 
+Required methods to override
+----------------------------
+
+You must override the ``getAdjustedAnalysisPointer`` method on all subclasses
+of ``AliasAnalysis``. An example implementation of this method would look like:
+
+.. code-block:: c++
+
+  void *getAdjustedAnalysisPointer(const void* ID) override {
+    if (ID == &AliasAnalysis::ID)
+      return (AliasAnalysis*)this;
+    return this;
+  }
+
 Interfaces which may be specified
 ---------------------------------
 
diff --git a/docs/BigEndianNEON.rst b/docs/BigEndianNEON.rst
new file mode 100644
index 0000000..242eb0e
--- /dev/null
+++ b/docs/BigEndianNEON.rst
@@ -0,0 +1,205 @@
+==============================================
+Using ARM NEON instructions in big endian mode
+==============================================
+
+.. contents::
+    :local:
+
+Introduction
+============
+
+Generating code for big endian ARM processors is for the most part straightforward. NEON loads and stores however have some interesting properties that make code generation decisions less obvious in big endian mode.
+
+The aim of this document is to explain the problem with NEON loads and stores, and the solution that has been implemented in LLVM.
+
+In this document the term "vector" refers to what the ARM ABI calls a "short vector", which is a sequence of items that can fit in a NEON register. This sequence can be 64 or 128 bits in length, and can constitute 8, 16, 32 or 64 bit items. This document refers to A64 instructions throughout, but is almost applicable to the A32/ARMv7 instruction sets also. The ABI format for passing vectors in A32 is sligtly different to A64. Apart from that, the same concepts apply.
+
+Example: C-level intrinsics -> assembly
+---------------------------------------
+
+It may be helpful first to illustrate how C-level ARM NEON intrinsics are lowered to instructions.
+
+This trivial C function takes a vector of four ints and sets the zero'th lane to the value "42"::
+
+    #include <arm_neon.h>
+    int32x4_t f(int32x4_t p) {
+        return vsetq_lane_s32(42, p, 0);
+    }
+
+arm_neon.h intrinsics generate "generic" IR where possible (that is, normal IR instructions not ``llvm.arm.neon.*`` intrinsic calls). The above generates::
+
+    define <4 x i32> @f(<4 x i32> %p) {
+      %vset_lane = insertelement <4 x i32> %p, i32 42, i32 0
+      ret <4 x i32> %vset_lane
+    }
+
+Which then becomes the following trivial assembly::
+
+    f:                                      // @f
+            movz	w8, #0x2a
+            ins 	v0.s[0], w8
+            ret
+
+Problem
+=======
+
+The main problem is how vectors are represented in memory and in registers.
+
+First, a recap. The "endianness" of an item affects its representation in memory only. In a register, a number is just a sequence of bits - 64 bits in the case of AArch64 general purpose registers. Memory, however, is a sequence of addressable units of 8 bits in size. Any number greater than 8 bits must therefore be split up into 8-bit chunks, and endianness describes the order in which these chunks are laid out in memory.
+
+A "little endian" layout has the least significant byte first (lowest in memory address). A "big endian" layout has the *most* significant byte first. This means that when loading an item from big endian memory, the lowest 8-bits in memory must go in the most significant 8-bits, and so forth.
+
+``LDR`` and ``LD1``
+===================
+
+.. figure:: ARM-BE-ldr.png
+    :align: right
+    
+    Big endian vector load using ``LDR``.
+
+
+A vector is a consecutive sequence of items that are operated on simultaneously. To load a 64-bit vector, 64 bits need to be read from memory. In little endian mode, we can do this by just performing a 64-bit load - ``LDR q0, [foo]``. However if we try this in big endian mode, because of the byte swapping the lane indices end up being swapped! The zero'th item as laid out in memory becomes the n'th lane in the vector.
+
+.. figure:: ARM-BE-ld1.png
+    :align: right
+
+    Big endian vector load using ``LD1``. Note that the lanes retain the correct ordering.
+
+
+Because of this, the instruction ``LD1`` performs a vector load but performs byte swapping not on the entire 64 bits, but on the individual items within the vector. This means that the register content is the same as it would have been on a little endian system.
+
+It may seem that ``LD1`` should suffice to peform vector loads on a big endian machine. However there are pros and cons to the two approaches that make it less than simple which register format to pick.
+
+There are two options:
+
+    1. The content of a vector register is the same *as if* it had been loaded with an ``LDR`` instruction.
+    2. The content of a vector register is the same *as if* it had been loaded with an ``LD1`` instruction.
+
+Because ``LD1 == LDR + REV`` and similarly ``LDR == LD1 + REV`` (on a big endian system), we can simulate either type of load with the other type of load plus a ``REV`` instruction. So we're not deciding which instructions to use, but which format to use (which will then influence which instruction is best to use).
+
+.. The 'clearer' container is required to make the following section header come after the floated
+   images above.
+.. container:: clearer
+
+    Note that throughout this section we only mention loads. Stores have exactly the same problems as their associated loads, so have been skipped for brevity.
+ 
+
+Considerations
+==============
+
+LLVM IR Lane ordering
+---------------------
+
+LLVM IR has first class vector types. In LLVM IR, the zero'th element of a vector resides at the lowest memory address. The optimizer relies on this property in certain areas, for example when concatenating vectors together. The intention is for arrays and vectors to have identical memory layouts - ``[4 x i8]`` and ``<4 x i8>`` should be represented the same in memory. Without this property there would be many special cases that the optimizer would have to cleverly handle.
+
+Use of ``LDR`` would break this lane ordering property. This doesn't preclude the use of ``LDR``, but we would have to do one of two things:
+
+   1. Insert a ``REV`` instruction to reverse the lane order after every ``LDR``.
+   2. Disable all optimizations that rely on lane layout, and for every access to an individual lane (``insertelement``/``extractelement``/``shufflevector``) reverse the lane index.
+
+AAPCS
+-----
+
+The ARM procedure call standard (AAPCS) defines the ABI for passing vectors between functions in registers. It states:
+
+    When a short vector is transferred between registers and memory it is treated as an opaque object. That is a short vector is stored in memory as if it were stored with a single ``STR`` of the entire register; a short vector is loaded from memory using the corresponding ``LDR`` instruction. On a little-endian system this means that element 0 will always contain the lowest addressed element of a short vector; on a big-endian system element 0 will contain the highest-addressed element of a short vector.
+
+    -- Procedure Call Standard for the ARM 64-bit Architecture (AArch64), 4.1.2 Short Vectors
+
+The use of ``LDR`` and ``STR`` as the ABI defines has at least one advantage over ``LD1`` and ``ST1``. ``LDR`` and ``STR`` are oblivious to the size of the individual lanes of a vector. ``LD1`` and ``ST1`` are not - the lane size is encoded within them. This is important across an ABI boundary, because it would become necessary to know the lane width the callee expects. Consider the following code:
+
+.. code-block:: c
+
+    <callee.c>
+    void callee(uint32x2_t v) {
+      ...
+    }
+
+    <caller.c>
+    extern void callee(uint32x2_t);
+    void caller() {
+      callee(...);
+    }
+
+If ``callee`` changed its signature to ``uint16x4_t``, which is equivalent in register content, if we passed as ``LD1`` we'd break this code until ``caller`` was updated and recompiled.
+
+There is an argument that if the signatures of the two functions are different then the behaviour should be undefined. But there may be functions that are agnostic to the lane layout of the vector, and treating the vector as an opaque value (just loading it and storing it) would be impossible without a common format across ABI boundaries.
+
+So to preserve ABI compatibility, we need to use the ``LDR`` lane layout across function calls.
+
+Alignment
+---------
+
+In strict alignment mode, ``LDR qX`` requires its address to be 128-bit aligned, whereas ``LD1`` only requires it to be as aligned as the lane size. If we canonicalised on using ``LDR``, we'd still need to use ``LD1`` in some places to avoid alignment faults (the result of the ``LD1`` would then need to be reversed with ``REV``).
+
+Most operating systems however do not run with alignment faults enabled, so this is often not an issue.
+
+Summary
+-------
+
+The following table summarises the instructions that are required to be emitted for each property mentioned above for each of the two solutions.
+
++-------------------------------+-------------------------------+---------------------+
+|                               | ``LDR`` layout                | ``LD1`` layout      |
++===============================+===============================+=====================+
+| Lane ordering                 |   ``LDR + REV``               |    ``LD1``          |
++-------------------------------+-------------------------------+---------------------+
+| AAPCS                         |   ``LDR``                     |    ``LD1 + REV``    |
++-------------------------------+-------------------------------+---------------------+
+| Alignment for strict mode     |   ``LDR`` / ``LD1 + REV``     |    ``LD1``          |
++-------------------------------+-------------------------------+---------------------+
+
+Neither approach is perfect, and choosing one boils down to choosing the lesser of two evils. The issue with lane ordering, it was decided, would have to change target-agnostic compiler passes and would result in a strange IR in which lane indices were reversed. It was decided that this was worse than the changes that would have to be made to support ``LD1``, so ``LD1`` was chosen as the canonical vector load instruction (and by inference, ``ST1`` for vector stores).
+
+Implementation
+==============
+
+There are 3 parts to the implementation:
+
+    1. Predicate ``LDR`` and ``STR`` instructions so that they are never allowed to be selected to generate vector loads and stores. The exception is one-lane vectors [1]_ - these by definition cannot have lane ordering problems so are fine to use ``LDR``/``STR``. 
+
+    2. Create code generation patterns for bitconverts that create ``REV`` instructions.
+
+    3. Make sure appropriate bitconverts are created so that vector values get passed over call boundaries as 1-element vectors (which is the same as if they were loaded with ``LDR``).
+
+Bitconverts
+-----------
+
+.. image:: ARM-BE-bitcastfail.png
+    :align: right
+
+The main problem with the ``LD1`` solution is dealing with bitconverts (or bitcasts, or reinterpret casts). These are pseudo instructions that only change the compiler's interpretation of data, not the underlying data itself. A requirement is that if data is loaded and then saved again (called a "round trip"), the memory contents should be the same after the store as before the load. If a vector is loaded and is then bitconverted to a different vector type before storing, the round trip will currently be broken.
+
+Take for example this code sequence::
+
+    %0 = load <4 x i32> %x
+    %1 = bitcast <4 x i32> %0 to <2 x i64>
+         store <2 x i64> %1, <2 x i64>* %y
+
+This would produce a code sequence such as that in the figure on the right. The mismatched ``LD1`` and ``ST1`` cause the stored data to differ from the loaded data.
+
+.. container:: clearer
+
+    When we see a bitcast from type ``X`` to type ``Y``, what we need to do is to change the in-register representation of the data to be *as if* it had just been loaded by a ``LD1`` of type ``Y``.
+
+.. image:: ARM-BE-bitcastsuccess.png
+    :align: right
+
+Conceptually this is simple - we can insert a ``REV`` undoing the ``LD1`` of type ``X`` (converting the in-register representation to the same as if it had been loaded by ``LDR``) and then insert another ``REV`` to change the representation to be as if it had been loaded by an ``LD1`` of type ``Y``.
+
+For the previous example, this would be::
+
+    LD1   v0.4s, [x]
+
+    REV64 v0.4s, v0.4s                  // There is no REV128 instruction, so it must be synthesizedcd 
+    EXT   v0.16b, v0.16b, v0.16b, #8    // with a REV64 then an EXT to swap the two 64-bit elements.
+
+    REV64 v0.2d, v0.2d
+    EXT   v0.16b, v0.16b, v0.16b, #8
+
+    ST1   v0.2d, [y]
+
+It turns out that these ``REV`` pairs can, in almost all cases, be squashed together into a single ``REV``. For the example above, a ``REV128 4s`` + ``REV128 2d`` is actually a ``REV64 4s``, as shown in the figure on the right.
+
+.. [1] One lane vectors may seem useless as a concept but they serve to distinguish between values held in general purpose registers and values held in NEON/VFP registers. For example, an ``i64`` would live in an ``x`` register, but ``<1 x i64>`` would live in a ``d`` register.
+
diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst
index 86436ff..fce1e37 100644
--- a/docs/BitCodeFormat.rst
+++ b/docs/BitCodeFormat.rst
@@ -747,8 +747,6 @@ function. The operand fields are:
   * ``arm_apcscc``: code 66
   * ``arm_aapcscc``: code 67
   * ``arm_aapcs_vfpcc``: code 68
-  * ``x86_thiscallcc``: code 70
-  * ``x86_cdeclmethodcc``: code 80
 
 * isproto*: Non-zero if this entry represents a declaration rather than a
   definition
diff --git a/docs/BlockFrequencyTerminology.rst b/docs/BlockFrequencyTerminology.rst
new file mode 100644
index 0000000..41f89f8
--- /dev/null
+++ b/docs/BlockFrequencyTerminology.rst
@@ -0,0 +1,130 @@
+================================
+LLVM Block Frequency Terminology
+================================
+
+.. contents::
+   :local:
+
+Introduction
+============
+
+Block Frequency is a metric for estimating the relative frequency of different
+basic blocks.  This document describes the terminology that the
+``BlockFrequencyInfo`` and ``MachineBlockFrequencyInfo`` analysis passes use.
+
+Branch Probability
+==================
+
+Blocks with multiple successors have probabilities associated with each
+outgoing edge.  These are called branch probabilities.  For a given block, the
+sum of its outgoing branch probabilities should be 1.0.
+
+Branch Weight
+=============
+
+Rather than storing fractions on each edge, we store an integer weight.
+Weights are relative to the other edges of a given predecessor block.  The
+branch probability associated with a given edge is its own weight divided by
+the sum of the weights on the predecessor's outgoing edges.
+
+For example, consider this IR:
+
+.. code-block:: llvm
+
+   define void @foo() {
+       ; ...
+       A:
+           br i1 %cond, label %B, label %C, !prof !0
+       ; ...
+   }
+   !0 = metadata !{metadata !"branch_weights", i32 7, i32 8}
+
+and this simple graph representation::
+
+   A -> B  (edge-weight: 7)
+   A -> C  (edge-weight: 8)
+
+The probability of branching from block A to block B is 7/15, and the
+probability of branching from block A to block C is 8/15.
+
+See :doc:`BranchWeightMetadata` for details about the branch weight IR
+representation.
+
+Block Frequency
+===============
+
+Block frequency is a relative metric that represents the number of times a
+block executes.  The ratio of a block frequency to the entry block frequency is
+the expected number of times the block will execute per entry to the function.
+
+Block frequency is the main output of the ``BlockFrequencyInfo`` and
+``MachineBlockFrequencyInfo`` analysis passes.
+
+Implementation: a series of DAGs
+================================
+
+The implementation of the block frequency calculation analyses each loop,
+bottom-up, ignoring backedges; i.e., as a DAG.  After each loop is processed,
+it's packaged up to act as a pseudo-node in its parent loop's (or the
+function's) DAG analysis.
+
+Block Mass
+==========
+
+For each DAG, the entry node is assigned a mass of ``UINT64_MAX`` and mass is
+distributed to successors according to branch weights.  Block Mass uses a
+fixed-point representation where ``UINT64_MAX`` represents ``1.0`` and ``0``
+represents a number just above ``0.0``.
+
+After mass is fully distributed, in any cut of the DAG that separates the exit
+nodes from the entry node, the sum of the block masses of the nodes succeeded
+by a cut edge should equal ``UINT64_MAX``.  In other words, mass is conserved
+as it "falls" through the DAG.
+
+If a function's basic block graph is a DAG, then block masses are valid block
+frequencies.  This works poorly in practise though, since downstream users rely
+on adding block frequencies together without hitting the maximum.
+
+Loop Scale
+==========
+
+Loop scale is a metric that indicates how many times a loop iterates per entry.
+As mass is distributed through the loop's DAG, the (otherwise ignored) backedge
+mass is collected.  This backedge mass is used to compute the exit frequency,
+and thus the loop scale.
+
+Implementation: Getting from mass and scale to frequency
+========================================================
+
+After analysing the complete series of DAGs, each block has a mass (local to
+its containing loop, if any), and each loop pseudo-node has a loop scale and
+its own mass (from its parent's DAG).
+
+We can get an initial frequency assignment (with entry frequency of 1.0) by
+multiplying these masses and loop scales together.  A given block's frequency
+is the product of its mass, the mass of containing loops' pseudo nodes, and the
+containing loops' loop scales.
+
+Since downstream users need integers (not floating point), this initial
+frequency assignment is shifted as necessary into the range of ``uint64_t``.
+
+Block Bias
+==========
+
+Block bias is a proposed *absolute* metric to indicate a bias toward or away
+from a given block during a function's execution.  The idea is that bias can be
+used in isolation to indicate whether a block is relatively hot or cold, or to
+compare two blocks to indicate whether one is hotter or colder than the other.
+
+The proposed calculation involves calculating a *reference* block frequency,
+where:
+
+* every branch weight is assumed to be 1 (i.e., every branch probability
+  distribution is even) and
+
+* loop scales are ignored.
+
+This reference frequency represents what the block frequency would be in an
+unbiased graph.
+
+The bias is the ratio of the block frequency to this reference block frequency.
diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst
index 71ecd34..aff7923 100644
--- a/docs/BranchWeightMetadata.rst
+++ b/docs/BranchWeightMetadata.rst
@@ -8,10 +8,11 @@ LLVM Branch Weight Metadata
 Introduction
 ============
 
-Branch Weight Metadata represents branch weights as its likeliness to be
-taken. Metadata is assigned to the ``TerminatorInst`` as a ``MDNode`` of the
-``MD_prof`` kind. The first operator is always a ``MDString`` node with the
-string "branch_weights". Number of operators depends on the terminator type.
+Branch Weight Metadata represents branch weights as its likeliness to be taken
+(see :doc:`BlockFrequencyTerminology`). Metadata is assigned to the
+``TerminatorInst`` as a ``MDNode`` of the ``MD_prof`` kind. The first operator
+is always a ``MDString`` node with the string "branch_weights".  Number of
+operators depends on the terminator type.
 
 Branch weights might be fetch from the profiling file, or generated based on
 `__builtin_expect`_ instruction.
diff --git a/docs/Bugpoint.rst b/docs/Bugpoint.rst
index 1a5fc8c..8fa64bc 100644
--- a/docs/Bugpoint.rst
+++ b/docs/Bugpoint.rst
@@ -17,7 +17,7 @@ optimization (or combination of optimizations) that causes the crash, and reduce
 the file down to a small example which triggers the crash.
 
 For detailed case scenarios, such as debugging ``opt``, or one of the LLVM code
-generators, see `How To Submit a Bug Report document <HowToSubmitABug.html>`_.
+generators, see :doc:`HowToSubmitABug`.
 
 Design Philosophy
 =================
diff --git a/docs/CMake.rst b/docs/CMake.rst
index cbca1db..fed283d 100644
--- a/docs/CMake.rst
+++ b/docs/CMake.rst
@@ -87,7 +87,7 @@ names are case-sensitive. Example:
 
 .. code-block:: console
 
-  $ cmake -G "Visual Studio 10" path/to/llvm/source/root
+  $ cmake -G "Visual Studio 11" path/to/llvm/source/root
 
 For a given development platform there can be more than one adequate
 generator. If you use Visual Studio "NMake Makefiles" is a generator you can use
@@ -211,8 +211,8 @@ LLVM-specific variables
 **LLVM_ENABLE_THREADS**:BOOL
   Build with threads support, if available. Defaults to ON.
 
-**LLVM_ENABLE_CXX11**:BOOL
-  Build in C++11 mode, if available. Defaults to OFF.
+**LLVM_ENABLE_CXX1Y**:BOOL
+  Build in C++1y mode, if available. Defaults to OFF.
 
 **LLVM_ENABLE_ASSERTIONS**:BOOL
   Enables code assertions. Defaults to OFF if and only if ``CMAKE_BUILD_TYPE``
@@ -283,6 +283,12 @@ LLVM-specific variables
   are ``Address``, ``Memory`` and ``MemoryWithOrigins``. Defaults to empty
   string.
 
+**LLVM_BUILD_DOCS**:BOOL
+  Enables all enabled documentation targets (i.e. Doxgyen and Sphinx targets) to
+  be built as part of the normal build. If the ``install`` target is run then
+  this also enables all built documentation targets to be installed. Defaults to
+  OFF.
+
 **LLVM_ENABLE_DOXYGEN**:BOOL
   Enables the generation of browsable HTML documentation using doxygen.
   Defaults to OFF.
@@ -306,14 +312,13 @@ LLVM-specific variables
 
 **LLVM_DOXYGEN_QHP_NAMESPACE**:STRING
   Namespace under which the intermediate Qt Help Project file lives. See `Qt
-  Help Project <http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace>`_
+  Help Project`_
   for more information. Defaults to "org.llvm". This option is only useful in
   combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise
   this has no effect.
     
 **LLVM_DOXYGEN_QHP_CUST_FILTER_NAME**:STRING
-  See `Qt Help Project
-  <http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters>`_ for
+  See `Qt Help Project`_ for
   more information. Defaults to the CMake variable ``${PACKAGE_STRING}`` which
   is a combination of the package name and version string. This filter can then
   be used in Qt Creator to select only documentation from LLVM when browsing
@@ -321,12 +326,35 @@ LLVM-specific variables
   useful in combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``;
   otherwise this has no effect.
 
+.. _Qt Help Project: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters
+
 **LLVM_DOXYGEN_QHELPGENERATOR_PATH**:STRING
   The path to the ``qhelpgenerator`` executable. Defaults to whatever CMake's
   ``find_program()`` can find. This option is only useful in combination with
   ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise this has no
   effect.
 
+**LLVM_ENABLE_SPHINX**:BOOL
+  If enabled CMake will search for the ``sphinx-build`` executable and will make
+  the ``SPHINX_OUTPUT_HTML`` and ``SPHINX_OUTPUT_MAN`` CMake options available.
+  Defaults to OFF.
+
+**SPHINX_EXECUTABLE**:STRING
+  The path to the ``sphinx-build`` executable detected by CMake.
+
+**SPHINX_OUTPUT_HTML**:BOOL
+  If enabled (and ``LLVM_ENABLE_SPHINX`` is enabled) then the targets for
+  building the documentation as html are added (but not built by default unless
+  ``LLVM_BUILD_DOCS`` is enabled). There is a target for each project in the
+  source tree that uses sphinx (e.g.  ``docs-llvm-html``, ``docs-clang-html``
+  and ``docs-lld-html``). Defaults to ON.
+
+**SPHINX_OUTPUT_MAN**:BOOL
+  If enabled (and ``LLVM_ENABLE_SPHINX`` is enabled) the targets for building
+  the man pages are added (but not built by default unless ``LLVM_BUILD_DOCS``
+  is enabled). Currently the only target added is ``docs-llvm-man``. Defaults
+  to ON.
+
 Executing the test suite
 ========================
 
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index f0aa9c2..d310a0a 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -89,3 +89,18 @@ if (LLVM_ENABLE_DOXYGEN)
   endif()
 endif()
 endif()
+
+if (LLVM_ENABLE_SPHINX)
+  if (SPHINX_FOUND)
+    include(AddSphinxTarget)
+    if (${SPHINX_OUTPUT_HTML})
+      add_sphinx_target(html llvm)
+    endif()
+
+
+    if (${SPHINX_OUTPUT_MAN})
+      add_sphinx_target(man llvm)
+    endif()
+
+  endif()
+endif()
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
index d7d98bc..cc09946 100644
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -70,7 +70,7 @@ different pieces of this will be useful to you.  In any case, you should be
 familiar with the `target description`_ and `machine code representation`_
 classes.  If you want to add a backend for a new target, you will need to
 `implement the target description`_ classes for your new target and understand
-the `LLVM code representation <LangRef.html>`_.  If you are interested in
+the :doc:`LLVM code representation <LangRef>`.  If you are interested in
 implementing a new `code generation algorithm`_, it should only depend on the
 target-description and machine code representation classes, ensuring that it is
 portable.
@@ -172,7 +172,7 @@ architecture.  These target descriptions often have a large amount of common
 information (e.g., an ``add`` instruction is almost identical to a ``sub``
 instruction).  In order to allow the maximum amount of commonality to be
 factored out, the LLVM code generator uses the
-:doc:`TableGen <TableGenFundamentals>` tool to describe big chunks of the
+:doc:`TableGen/index` tool to describe big chunks of the
 target machine, which allows the use of domain-specific and target-specific
 abstractions to reduce the amount of repetition.
 
@@ -277,7 +277,7 @@ an associated register class.  When the register allocator runs, it replaces
 virtual registers with a physical register in the set.
 
 The target-specific implementations of these classes is auto-generated from a
-`TableGen <TableGenFundamentals.html>`_ description of the register file.
+:doc:`TableGen/index` description of the register file.
 
 .. _TargetInstrInfo:
 
@@ -1993,7 +1993,7 @@ Tail Calls
 
 This box indicates whether the target supports guaranteed tail calls.  These are
 calls marked "`tail <LangRef.html#i_call>`_" and use the fastcc calling
-convention.  Please see the `tail call section more more details`_.
+convention.  Please see the `tail call section`_ for more details.
 
 .. _feat_segstacks:
 
@@ -2011,7 +2011,7 @@ Basic support exists on the X86 backend. Currently vararg doesn't work and the
 object files are not marked the way the gold linker expects, but simple Go
 programs can be built by dragonegg.
 
-.. _tail call section more more details:
+.. _tail call section:
 
 Tail call optimization
 ----------------------
@@ -2145,10 +2145,6 @@ The following target-specific calling conventions are known to backend:
   others via stack. Callee is responsible for stack cleaning. This convention is
   used by MSVC by default for methods in its ABI (CC ID = 70).
 
-* **X86_CDeclMethod** --- Identical to the standard x86_32 C calling convention,
-  except that an sret paramter, if present, is placed on the stack after the
-  second parameter, which must an integer or pointer.  (CC ID = 80).
-
 .. _X86 addressing mode:
 
 Representing X86 addressing modes in MachineInstrs
diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst
index 2ebdfbc..edbef3a 100644
--- a/docs/CodingStandards.rst
+++ b/docs/CodingStandards.rst
@@ -76,10 +76,7 @@ implemented in the LLVM namespace following the expected standard interface.
 
 There are some exceptions such as the standard I/O streams library which are
 avoided. Also, there is much more detailed information on these subjects in the
-`Programmer's Manual`_.
-
-.. _Programmer's Manual:
-  http://llvm.org/docs/ProgrammersManual.html
+:doc:`ProgrammersManual`.
 
 Supported C++11 Language and Library Features
 ---------------------------------------------
@@ -111,6 +108,9 @@ unlikely to be supported by our host compilers.
 * Lambdas: N2927_
 
   * But *not* ``std::function``, until Clang implements `MSVC-compatible RTTI`_.
+    In many cases, you may be able to use ``llvm::function_ref`` instead, and it
+    is a superior choice in those cases.
+  * And *not* lambdas with default arguments.
 
 * ``decltype``: N2343_
 * Nested closing right angle brackets: N1757_
@@ -119,6 +119,11 @@ unlikely to be supported by our host compilers.
 * Strongly-typed and forward declarable enums: N2347_, N2764_
 * Local and unnamed types as template arguments: N2657_
 * Range-based for-loop: N2930_
+
+  * But ``{}`` are required around inner ``do {} while()`` loops.  As a result,
+    ``{}`` are required around function-like macros inside range-based for
+    loops.
+
 * ``override`` and ``final``: N2928_, N3206_, N3272_
 * Atomic operations and the C++11 memory model: N2429_
 
@@ -605,7 +610,7 @@ is never used for a class.  Because of this, we turn them off globally in the
 code.
 
 That said, LLVM does make extensive use of a hand-rolled form of RTTI that use
-templates like `isa<>, cast<>, and dyn_cast<> <ProgrammersManual.html#isa>`_.
+templates like :ref:`isa\<>, cast\<>, and dyn_cast\<> <isa>`.
 This form of RTTI is opt-in and can be
 :doc:`added to any class <HowToSetUpLLVMStyleRTTI>`. It is also
 substantially more efficient than ``dynamic_cast<>``.
@@ -1281,9 +1286,9 @@ method will never be implemented. This enables other checks like
 ``-Wunused-private-field`` to run correctly on classes that contain these
 methods.
 
-To maintain compatibility with C++03, ``LLVM_DELETED_FUNCTION`` should be used
-which will expand to ``= delete`` if the compiler supports it. These methods
-should still be declared private. Example of the uncopyable pattern:
+For compatibility with MSVC, ``LLVM_DELETED_FUNCTION`` should be used which
+will expand to ``= delete`` on compilers that support it. These methods should
+still be declared private. Example of the uncopyable pattern:
 
 .. code-block:: c++
 
diff --git a/docs/CommandGuide/index.rst b/docs/CommandGuide/index.rst
index ab4788a..ed18cd0 100644
--- a/docs/CommandGuide/index.rst
+++ b/docs/CommandGuide/index.rst
@@ -28,6 +28,7 @@ Basic Commands
    llvm-profdata
    llvm-stress
    llvm-symbolizer
+   llvm-dwarfdump
 
 Debugging Tools
 ~~~~~~~~~~~~~~~
diff --git a/docs/CommandGuide/llvm-cov.rst b/docs/CommandGuide/llvm-cov.rst
index 524f240..e0b2fe9 100644
--- a/docs/CommandGuide/llvm-cov.rst
+++ b/docs/CommandGuide/llvm-cov.rst
@@ -4,32 +4,120 @@ llvm-cov - emit coverage information
 SYNOPSIS
 --------
 
-:program:`llvm-cov` [-gcno=filename] [-gcda=filename] [dump]
+:program:`llvm-cov` [options] SOURCEFILE
 
 DESCRIPTION
 -----------
 
-The experimental :program:`llvm-cov` tool reads in description file generated
-by compiler and coverage data file generated by instrumented program.  This
-program assumes that the description and data file uses same format as gcov
-files.
+The :program:`llvm-cov` tool reads code coverage data files and displays the
+coverage information for a specified source file. It is compatible with the
+``gcov`` tool from version 4.2 of ``GCC`` and may also be compatible with
+some later versions of ``gcov``.
+
+To use llvm-cov, you must first build an instrumented version of your
+application that collects coverage data as it runs. Compile with the
+``-fprofile-arcs`` and ``-ftest-coverage`` options to add the
+instrumentation. (Alternatively, you can use the ``--coverage`` option, which
+includes both of those other options.) You should compile with debugging
+information (``-g``) and without optimization (``-O0``); otherwise, the
+coverage data cannot be accurately mapped back to the source code.
+
+At the time you compile the instrumented code, a ``.gcno`` data file will be
+generated for each object file. These ``.gcno`` files contain half of the
+coverage data. The other half of the data comes from ``.gcda`` files that are
+generated when you run the instrumented program, with a separate ``.gcda``
+file for each object file. Each time you run the program, the execution counts
+are summed into any existing ``.gcda`` files, so be sure to remove any old
+files if you do not want their contents to be included.
+
+By default, the ``.gcda`` files are written into the same directory as the
+object files, but you can override that by setting the ``GCOV_PREFIX`` and
+``GCOV_PREFIX_STRIP`` environment variables. The ``GCOV_PREFIX_STRIP``
+variable specifies a number of directory components to be removed from the
+start of the absolute path to the object file directory. After stripping those
+directories, the prefix from the ``GCOV_PREFIX`` variable is added. These
+environment variables allow you to run the instrumented program on a machine
+where the original object file directories are not accessible, but you will
+then need to copy the ``.gcda`` files back to the object file directories
+where llvm-cov expects to find them.
+
+Once you have generated the coverage data files, run llvm-cov for each main
+source file where you want to examine the coverage results. This should be run
+from the same directory where you previously ran the compiler. The results for
+the specified source file are written to a file named by appending a ``.gcov``
+suffix. A separate output file is also created for each file included by the
+main source file, also with a ``.gcov`` suffix added.
+
+The basic content of an llvm-cov output file is a copy of the source file with
+an execution count and line number prepended to every line. The execution
+count is shown as ``-`` if a line does not contain any executable code. If
+a line contains code but that code was never executed, the count is displayed
+as ``#####``.
+
 
 OPTIONS
 -------
 
-.. option:: -gcno=filename
+.. option:: -a, --all-blocks
+
+ Display all basic blocks. If there are multiple blocks for a single line of
+ source code, this option causes llvm-cov to show the count for each block
+ instead of just one count for the entire line.
+
+.. option:: -b, --branch-probabilities
+
+ Display conditional branch probabilities and a summary of branch information. 
+
+.. option:: -c, --branch-counts
+
+ Display branch counts instead of probabilities (requires -b).
+
+.. option:: -f, --function-summaries
+
+ Show a summary of coverage for each function instead of just one summary for
+ an entire source file.
+
+.. option:: --help
+
+ Display available options (--help-hidden for more).
+
+.. option:: -l, --long-file-names
+
+ For coverage output of files included from the main source file, add the
+ main file name followed by ``##`` as a prefix to the output file names. This
+ can be combined with the --preserve-paths option to use complete paths for
+ both the main file and the included file.
+
+.. option:: -n, --no-output
+
+ Do not output any ``.gcov`` files. Summary information is still
+ displayed.
+
+.. option:: -o=<DIR|FILE>, --object-directory=<DIR>, --object-file=<FILE>
+
+ Find objects in DIR or based on FILE's path. If you specify a particular
+ object file, the coverage data files are expected to have the same base name
+ with ``.gcno`` and ``.gcda`` extensions. If you specify a directory, the
+ files are expected in that directory with the same base name as the source
+ file.
+
+.. option:: -p, --preserve-paths
 
- This option selects input description file generated by compiler while
- instrumenting program.
+ Preserve path components when naming the coverage output files. In addition
+ to the source file name, include the directories from the path to that
+ file. The directories are separate by ``#`` characters, with ``.`` directories
+ removed and ``..`` directories replaced by ``^`` characters. When used with
+ the --long-file-names option, this applies to both the main file name and the
+ included file name.
 
-.. option:: -gcda=filename
+.. option:: -u, --unconditional-branches
 
- This option selects coverage data file generated by instrumented compiler.
+ Include unconditional branches in the output for the --branch-probabilities
+ option.
 
-.. option:: -dump
+.. option:: -version
 
- This options enables output dump that is suitable for a developer to help
- debug :program:`llvm-cov` itself.
+ Display the version of llvm-cov.
 
 EXIT STATUS
 -----------
diff --git a/docs/CommandGuide/llvm-dwarfdump.rst b/docs/CommandGuide/llvm-dwarfdump.rst
new file mode 100644
index 0000000..afaa0be
--- /dev/null
+++ b/docs/CommandGuide/llvm-dwarfdump.rst
@@ -0,0 +1,30 @@
+llvm-dwarfdump - print contents of DWARF sections
+=================================================
+
+SYNOPSIS
+--------
+
+:program:`llvm-dwarfdump` [*options*] [*filenames...*]
+
+DESCRIPTION
+-----------
+
+:program:`llvm-dwarfdump` parses DWARF sections in the object files
+and prints their contents in human-readable form.
+
+OPTIONS
+-------
+
+.. option:: -debug-dump=section
+
+  Specify the DWARF section to dump.
+  For example, use ``abbrev`` to dump the contents of ``.debug_abbrev`` section,
+  ``loc.dwo`` to dump the contents of ``.debug_loc.dwo`` etc.
+  See ``llvm-dwarfdump --help`` for the complete list of supported sections.
+  Use ``all`` to dump all DWARF sections. It is the default.
+
+EXIT STATUS
+-----------
+
+:program:`llvm-dwarfdump` returns 0. Other exit codes imply internal
+program error.
diff --git a/docs/CommandGuide/llvm-symbolizer.rst b/docs/CommandGuide/llvm-symbolizer.rst
index dfbdb3a..ce2d9c0 100644
--- a/docs/CommandGuide/llvm-symbolizer.rst
+++ b/docs/CommandGuide/llvm-symbolizer.rst
@@ -61,11 +61,14 @@ OPTIONS
 -------
 
 .. option:: -obj
+
   Path to object file to be symbolized.
 
-.. option:: -functions
+.. option:: -functions=[none|short|linkage]
 
-  Print function names as well as source file/line locations. Defaults to true.
+  Specify the way function names are printed (omit function name,
+  print short function name, or print full linkage name, respectively).
+  Defaults to ``linkage``.
 
 .. option:: -use-symbol-table
 
diff --git a/docs/CommandGuide/tblgen.rst b/docs/CommandGuide/tblgen.rst
index 1c46828..a42b04d 100644
--- a/docs/CommandGuide/tblgen.rst
+++ b/docs/CommandGuide/tblgen.rst
@@ -15,7 +15,8 @@ users of LLVM will not need to use this program.  It is only for assisting with
 writing an LLVM target backend.
 
 The input and output of :program:`tblgen` is beyond the scope of this short
-introduction.  Please see :doc:`../TableGenFundamentals`.
+introduction; please see the :doc:`introduction to TableGen
+<../TableGen/index>`.
 
 The *filename* argument specifies the name of a Target Description (``.td``)
 file to read as input.
diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst
index 240271a..606b5f5 100644
--- a/docs/CompilerWriterInfo.rst
+++ b/docs/CompilerWriterInfo.rst
@@ -113,7 +113,7 @@ XCore
 -----
 
 * `The XMOS XS1 Architecture (ISA) <https://www.xmos.com/en/download/public/The-XMOS-XS1-Architecture%28X7879A%29.pdf>`_
-* `Tools Developement Guide (includes ABI) <https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf>`_
+* `Tools Development Guide (includes ABI) <https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf>`_
 
 Other relevant lists
 --------------------
diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst
index b9ac576..74a8979 100644
--- a/docs/DeveloperPolicy.rst
+++ b/docs/DeveloperPolicy.rst
@@ -336,7 +336,7 @@ Making a Major Change
 ---------------------
 
 When a developer begins a major new project with the aim of contributing it back
-to LLVM, s/he should inform the community with an email to the `llvmdev
+to LLVM, they should inform the community with an email to the `llvmdev
 <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ email list, to the extent
 possible. The reason for this is to:
 
diff --git a/docs/Extensions.rst b/docs/Extensions.rst
index 7d8c521..a49485c 100644
--- a/docs/Extensions.rst
+++ b/docs/Extensions.rst
@@ -159,3 +159,34 @@ different COMDATs:
   .globl Symbol2
   Symbol2:
   .long 1
+
+Target Specific Behaviour
+=========================
+
+Windows on ARM
+--------------
+
+Stack Probe Emission
+^^^^^^^^^^^^^^^^^^^^
+
+The reference implementation (Microsoft Visual Studio 2012) emits stack probes
+in the following fashion:
+
+.. code-block:: gas
+
+  movw r4, #constant
+  bl __chkstk
+  sub.w sp, sp, r4
+
+However, this has the limitation of 32 MiB (±16MiB).  In order to accommodate
+larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 4GiB
+range via a slight deviation.  It will generate an indirect jump as follows:
+
+.. code-block:: gas
+
+  movw r4, #constant
+  movw r12, :lower16:__chkstk
+  movt r12, :upper16:__chkstk
+  blx r12
+  sub.w sp, sp, r4
+
diff --git a/docs/GettingStartedVS.rst b/docs/GettingStartedVS.rst
index 628bfdc..aa980d2 100644
--- a/docs/GettingStartedVS.rst
+++ b/docs/GettingStartedVS.rst
@@ -34,7 +34,7 @@ Most of the tools build and work.  ``bugpoint`` does build, but does
 not work.
 
 Additional information about the LLVM directory structure and tool chain
-can be found on the main `Getting Started <GettingStarted.html>`_ page.
+can be found on the main :doc:`GettingStarted` page.
 
 
 Requirements
@@ -97,7 +97,7 @@ Here's the short story for getting up and running quickly with LLVM:
      using LLVM.  Another important option is ``LLVM_TARGETS_TO_BUILD``,
      which controls the LLVM target architectures that are included on the
      build.
-   * See the `LLVM CMake guide <CMake.html>`_ for detailed information about
+   * See the :doc:`LLVM CMake guide <CMake>` for detailed information about
      how to configure the LLVM build.
 
 6. Start Visual Studio
@@ -215,8 +215,8 @@ An Example Using the LLVM Tool Chain
 Common Problems
 ===============
 If you are having problems building or using LLVM, or if you have any other
-general questions about LLVM, please consult the `Frequently Asked Questions
-<FAQ.html>`_ page.
+general questions about LLVM, please consult the :doc:`Frequently Asked Questions
+<FAQ>` page.
 
 
 Links
diff --git a/docs/LLVMBuild.rst b/docs/LLVMBuild.rst
index c0c96d3..58f6f4d 100644
--- a/docs/LLVMBuild.rst
+++ b/docs/LLVMBuild.rst
@@ -86,8 +86,8 @@ LLVM primarily uses the following types of components:
   libraries that they build on top of.
 - *Build Tools* - Build tools are applications which are designed to be run
   as part of the build process (typically to generate other source files).
-  Currently, LLVM uses one main build tool called :doc:`TableGen
-  <TableGenFundamentals>` to generate a variety of source files.
+  Currently, LLVM uses one main build tool called :doc:`TableGen/index`
+  to generate a variety of source files.
 - *Tools* - Command line applications which are built using the LLVM
   component libraries. Most LLVM tools are small and are primarily
   frontends to the library interfaces.
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 91692ad..fa40363 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -440,7 +440,10 @@ styles:
     defining module will bind to the local symbol. That is, the symbol
     cannot be overridden by another module.
 
-.. _namedtypes:
+A symbol with ``internal`` or ``private`` linkage must have ``default``
+visibility.
+
+.. _dllstorageclass:
 
 DLL Storage Classes
 -------------------
@@ -461,6 +464,8 @@ DLL storage class:
     exists for defining a dll interface, the compiler, assembler and linker know
     it is externally referenced and must refrain from deleting the symbol.
 
+.. _namedtypes:
+
 Structure Types
 ---------------
 
@@ -802,6 +807,9 @@ Currently, only the following parameter attributes are defined:
     not to trap and to be properly aligned. This may only be applied to
     the first parameter. This is not a valid attribute for return
     values.
+
+.. _noalias:
+
 ``noalias``
     This indicates that pointer values :ref:`based <pointeraliasing>` on
     the argument or return value do not alias pointer values which are
@@ -811,8 +819,8 @@ Currently, only the following parameter attributes are defined:
     "irrelevant" to the ``noalias`` keyword for the arguments and return
     value used in that call. The caller shares the responsibility with
     the callee for ensuring that these requirements are met. For further
-    details, please see the discussion of the NoAlias response in `alias
-    analysis <AliasAnalysis.html#MustMayNo>`_.
+    details, please see the discussion of the NoAlias response in :ref:`alias
+    analysis <Must, May, or No>`.
 
     Note that this definition of ``noalias`` is intentionally similar
     to the definition of ``restrict`` in C99 for function arguments,
@@ -841,6 +849,13 @@ Currently, only the following parameter attributes are defined:
     operands for the :ref:`bitcast instruction <i_bitcast>`. This is not a
     valid attribute for return values and can only be applied to one parameter.
 
+``nonnull``
+    This indicates that the parameter or return pointer is not null. This
+    attribute may only be applied to pointer typed parameters. This is not
+    checked or enforced by LLVM, the caller must ensure that the pointer
+    passed in is non-null, or the callee must ensure that the returned pointer 
+    is non-null.
+
 .. _gc:
 
 Garbage Collector Names
@@ -1986,6 +2001,8 @@ notion of a forward declared structure.
 | ``opaque``   | An opaque type.   |
 +--------------+-------------------+
 
+.. _constants:
+
 Constants
 =========
 
@@ -2770,15 +2787,29 @@ for optimizations are prefixed with ``llvm.mem``.
 '``llvm.mem.parallel_loop_access``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-For a loop to be parallel, in addition to using
-the ``llvm.loop`` metadata to mark the loop latch branch instruction,
-also all of the memory accessing instructions in the loop body need to be
-marked with the ``llvm.mem.parallel_loop_access`` metadata. If there
-is at least one memory accessing instruction not marked with the metadata,
-the loop must be considered a sequential loop. This causes parallel loops to be
-converted to sequential loops due to optimization passes that are unaware of
-the parallel semantics and that insert new memory instructions to the loop
-body.
+The ``llvm.mem.parallel_loop_access`` metadata refers to a loop identifier, 
+or metadata containing a list of loop identifiers for nested loops. 
+The metadata is attached to memory accessing instructions and denotes that 
+no loop carried memory dependence exist between it and other instructions denoted 
+with the same loop identifier.
+
+Precisely, given two instructions ``m1`` and ``m2`` that both have the 
+``llvm.mem.parallel_loop_access`` metadata, with ``L1`` and ``L2`` being the 
+set of loops associated with that metadata, respectively, then there is no loop 
+carried dependence between ``m1`` and ``m2`` for loops ``L1`` or 
+``L2``.
+
+As a special case, if all memory accessing instructions in a loop have 
+``llvm.mem.parallel_loop_access`` metadata that refers to that loop, then the 
+loop has no loop carried memory dependences and is considered to be a parallel 
+loop.  
+
+Note that if not all memory access instructions have such metadata referring to 
+the loop, then the loop is considered not being trivially parallel. Additional 
+memory dependence analysis is required to make that determination.  As a fail 
+safe mechanism, this causes loops that were originally parallel to be considered 
+sequential (if optimization passes that are unaware of the parallel semantics 
+insert new memory instructions into the loop body).
 
 Example of a loop that is considered parallel due to its correct use of
 both ``llvm.loop`` and ``llvm.mem.parallel_loop_access``
@@ -3144,14 +3175,18 @@ The '``llvm.global_ctors``' Global Variable
 
 .. code-block:: llvm
 
-    %0 = type { i32, void ()* }
-    @llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
+    %0 = type { i32, void ()*, i8* }
+    @llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor, i8* @data }]
 
 The ``@llvm.global_ctors`` array contains a list of constructor
-functions and associated priorities. The functions referenced by this
-array will be called in ascending order of priority (i.e. lowest first)
-when the module is loaded. The order of functions with the same priority
-is not defined.
+functions, priorities, and an optional associated global or function.
+The functions referenced by this array will be called in ascending order
+of priority (i.e. lowest first) when the module is loaded. The order of
+functions with the same priority is not defined.
+
+If the third field is present, non-null, and points to a global variable
+or function, the initializer function will only run if the associated
+data from the current module is not discarded.
 
 .. _llvmglobaldtors:
 
@@ -3160,14 +3195,18 @@ The '``llvm.global_dtors``' Global Variable
 
 .. code-block:: llvm
 
-    %0 = type { i32, void ()* }
-    @llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
+    %0 = type { i32, void ()*, i8* }
+    @llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor, i8* @data }]
+
+The ``@llvm.global_dtors`` array contains a list of destructor
+functions, priorities, and an optional associated global or function.
+The functions referenced by this array will be called in descending
+order of priority (i.e. highest first) when the module is unloaded. The
+order of functions with the same priority is not defined.
 
-The ``@llvm.global_dtors`` array contains a list of destructor functions
-and associated priorities. The functions referenced by this array will
-be called in descending order of priority (i.e. highest first) when the
-module is loaded. The order of functions with the same priority is not
-defined.
+If the third field is present, non-null, and points to a global variable
+or function, the destructor function will only run if the associated
+data from the current module is not discarded.
 
 Instruction Reference
 =====================
@@ -4465,7 +4504,7 @@ Syntax:
 
 ::
 
-      <result> = extractelement <n x <ty>> <val>, i32 <idx>    ; yields <ty>
+      <result> = extractelement <n x <ty>> <val>, <ty2> <idx>  ; yields <ty>
 
 Overview:
 """""""""
@@ -4479,7 +4518,7 @@ Arguments:
 The first operand of an '``extractelement``' instruction is a value of
 :ref:`vector <t_vector>` type. The second operand is an index indicating
 the position from which to extract the element. The index may be a
-variable.
+variable of any integer type.
 
 Semantics:
 """"""""""
@@ -4505,7 +4544,7 @@ Syntax:
 
 ::
 
-      <result> = insertelement <n x <ty>> <val>, <ty> <elt>, i32 <idx>    ; yields <n x <ty>>
+      <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>    ; yields <n x <ty>>
 
 Overview:
 """""""""
@@ -4520,7 +4559,7 @@ The first operand of an '``insertelement``' instruction is a value of
 :ref:`vector <t_vector>` type. The second operand is a scalar value whose
 type must equal the element type of the first operand. The third operand
 is an index indicating the position at which to insert the value. The
-index may be a variable.
+index may be a variable of any integer type.
 
 Semantics:
 """"""""""
@@ -6156,7 +6195,7 @@ Syntax:
 
 ::
 
-      <result> = [tail] call [cconv] [ret attrs] <ty> [<fnty>*] <fnptrval>(<function args>) [fn attrs]
+      <result> = [tail | musttail] call [cconv] [ret attrs] <ty> [<fnty>*] <fnptrval>(<function args>) [fn attrs]
 
 Overview:
 """""""""
@@ -6168,17 +6207,34 @@ Arguments:
 
 This instruction requires several arguments:
 
-#. The optional "tail" marker indicates that the callee function does
-   not access any allocas or varargs in the caller. Note that calls may
-   be marked "tail" even if they do not occur before a
-   :ref:`ret <i_ret>` instruction. If the "tail" marker is present, the
-   function call is eligible for tail call optimization, but `might not
-   in fact be optimized into a jump <CodeGenerator.html#tailcallopt>`_.
-   The code generator may optimize calls marked "tail" with either 1)
-   automatic `sibling call
-   optimization <CodeGenerator.html#sibcallopt>`_ when the caller and
-   callee have matching signatures, or 2) forced tail call optimization
-   when the following extra requirements are met:
+#. The optional ``tail`` and ``musttail`` markers indicate that the optimizers
+   should perform tail call optimization.  The ``tail`` marker is a hint that
+   `can be ignored <CodeGenerator.html#sibcallopt>`_.  The ``musttail`` marker
+   means that the call must be tail call optimized in order for the program to
+   be correct.  The ``musttail`` marker provides these guarantees:
+
+   #. The call will not cause unbounded stack growth if it is part of a
+      recursive cycle in the call graph.
+   #. Arguments with the :ref:`inalloca <attr_inalloca>` attribute are
+      forwarded in place.
+
+   Both markers imply that the callee does not access allocas or varargs from
+   the caller.  Calls marked ``musttail`` must obey the following additional
+   rules:
+
+   - The call must immediately precede a :ref:`ret <i_ret>` instruction,
+     or a pointer bitcast followed by a ret instruction.
+   - The ret instruction must return the (possibly bitcasted) value
+     produced by the call or void.
+   - The caller and callee prototypes must match.  Pointer types of
+     parameters or return types may differ in pointee type, but not
+     in address space.
+   - The calling conventions of the caller and callee must match.
+   - All ABI-impacting function attributes, such as sret, byval, inreg,
+     returned, and inalloca, must match.
+
+   Tail call optimization for calls marked ``tail`` is guaranteed to occur if
+   the following conditions are met:
 
    -  Caller and callee both have the calling convention ``fastcc``.
    -  The call is in tail position (ret immediately follows call and ret
@@ -6782,6 +6838,51 @@ Note that calling this intrinsic does not prevent function inlining or
 other aggressive transformations, so the value returned may not be that
 of the obvious source-language caller.
 
+.. _int_read_register:
+.. _int_write_register:
+
+'``llvm.read_register``' and '``llvm.write_register``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i32 @llvm.read_register.i32(metadata)
+      declare i64 @llvm.read_register.i64(metadata)
+      declare void @llvm.write_register.i32(metadata, i32 @value)
+      declare void @llvm.write_register.i64(metadata, i64 @value)
+      !0 = metadata !{metadata !"sp\00"}
+
+Overview:
+"""""""""
+
+The '``llvm.read_register``' and '``llvm.write_register``' intrinsics
+provides access to the named register. The register must be valid on
+the architecture being compiled to. The type needs to be compatible
+with the register being read.
+
+Semantics:
+""""""""""
+
+The '``llvm.read_register``' intrinsic returns the current value of the
+register, where possible. The '``llvm.write_register``' intrinsic sets
+the current value of the register, where possible.
+
+This is useful to implement named register global variables that need
+to always be mapped to a specific register, as is common practice on
+bare-metal programs including OS kernels.
+
+The compiler doesn't check for register availability or use of the used
+register in surrounding code, including inline assembly. Because of that,
+allocatable registers are not supported.
+
+Warning: So far it only works with the stack pointer on selected
+architectures (ARM, AArch64, PowerPC and x86_64). Significant amount of
+work is needed to support other registers and even more so, allocatable
+registers.
+
 .. _int_stacksave:
 
 '``llvm.stacksave``' Intrinsic
@@ -6964,11 +7065,11 @@ Semantics:
 
 On platforms with coherent instruction and data caches (e.g. x86), this
 intrinsic is a nop. On platforms with non-coherent instruction and data
-cache (e.g. ARM, MIPS), the intrinsic is lowered either to appropiate
+cache (e.g. ARM, MIPS), the intrinsic is lowered either to appropriate
 instructions or a system call, if cache flushing requires special
 privileges.
 
-The default behavior is to emit a call to ``__clear_cache'' from the run
+The default behavior is to emit a call to ``__clear_cache`` from the run
 time library.
 
 This instrinsic does *not* empty the instruction pipeline. Modifications
diff --git a/docs/Passes.rst b/docs/Passes.rst
index a288933..b51829d 100644
--- a/docs/Passes.rst
+++ b/docs/Passes.rst
@@ -302,15 +302,6 @@ standard error in a human-readable form.
 This pass, only available in ``opt``, printsthe SCCs of each function CFG to
 standard error in a human-readable fom.
 
-``-print-dbginfo``: Print debug info in human readable form
------------------------------------------------------------
-
-Pass that prints instructions, and associated debug info:
-
-#. source/line/col information
-#. original variable name
-#. original type name
-
 ``-print-dom-info``: Dominator Info Printer
 -------------------------------------------
 
@@ -549,6 +540,8 @@ instructions that are obviously dead.
 A trivial dead store elimination that only considers basic-block local
 redundant stores.
 
+.. _passes-functionattrs:
+
 ``-functionattrs``: Deduce function attributes
 ----------------------------------------------
 
@@ -657,7 +650,7 @@ program, and is used for a wide variety of program transformations.
 ------------------------------------------------
 
 Combine instructions to form fewer, simple instructions.  This pass does not
-modify the CFG This pass is where algebraic simplification happens.
+modify the CFG. This pass is where algebraic simplification happens.
 
 This pass combines things like:
 
@@ -690,6 +683,13 @@ program:
    shifts.
 #. … etc.
 
+This pass can also simplify calls to specific well-known function calls (e.g.
+runtime library functions).  For example, a call ``exit(3)`` that occurs within
+the ``main()`` function can be transformed into simply ``return 3``. Whether or
+not library calls are simplified is controlled by the
+:ref:`-functionattrs <passes-functionattrs>` pass and LLVM's knowledge of
+library calls on different targets.
+
 ``-internalize``: Internalize Global Symbols
 --------------------------------------------
 
@@ -1020,14 +1020,6 @@ as:
 Note that this pass has a habit of making definitions be dead.  It is a good
 idea to run a :ref:`DCE <passes-dce>` pass sometime after running this pass.
 
-``-simplify-libcalls``: Simplify well-known library calls
----------------------------------------------------------
-
-Applies a variety of small optimizations for calls to specific well-known
-function calls (e.g. runtime library functions).  For example, a call
-``exit(3)`` that occurs within the ``main()`` function can be transformed into
-simply ``return 3``.
-
 .. _passes-simplifycfg:
 
 ``-simplifycfg``: Simplify the CFG
diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst
index 581c9e5..18b2817 100644
--- a/docs/Phabricator.rst
+++ b/docs/Phabricator.rst
@@ -99,7 +99,7 @@ line:
   Differential Revision: <URL>
 
 where ``<URL>`` is the URL for the code review, starting with
-``http://llvm-reviews.chandlerc.com/``.
+``http://reviews.llvm.org/``.
 
 Note that Arcanist will add this automatically.
 
@@ -110,10 +110,9 @@ review, and add a link from the review to the commit.
 Status
 ------
 
-Currently, we're testing Phabricator for use with Clang/LLVM. Please let us
-know whether you like it and what could be improved!
+Please let us know whether you like it and what could be improved!
 
-.. _LLVM's Phabricator: http://llvm-reviews.chandlerc.com
-.. _Code Repository Browser: http://llvm-reviews.chandlerc.com/diffusion/
+.. _LLVM's Phabricator: http://reviews.llvm.org
+.. _Code Repository Browser: http://reviews.llvm.org/diffusion/
 .. _Arcanist Quick Start: http://www.phabricator.com/docs/phabricator/article/Arcanist_Quick_Start.html
 .. _Arcanist User Guide: http://www.phabricator.com/docs/phabricator/article/Arcanist_User_Guide.html
diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
index 9f388cc..7e46ac4 100644
--- a/docs/ProgrammersManual.rst
+++ b/docs/ProgrammersManual.rst
@@ -263,6 +263,78 @@ almost never be stored or mentioned directly.  They are intended solely for use
 when defining a function which should be able to efficiently accept concatenated
 strings.
 
+.. _function_apis:
+
+Passing functions and other callable objects
+--------------------------------------------
+
+Sometimes you may want a function to be passed a callback object. In order to
+support lambda expressions and other function objects, you should not use the
+traditional C approach of taking a function pointer and an opaque cookie:
+
+.. code-block:: c++
+
+    void takeCallback(bool (*Callback)(Function *, void *), void *Cookie);
+
+Instead, use one of the following approaches:
+
+Function template
+^^^^^^^^^^^^^^^^^
+
+If you don't mind putting the definition of your function into a header file,
+make it a function template that is templated on the callable type.
+
+.. code-block:: c++
+
+    template<typename Callable>
+    void takeCallback(Callable Callback) {
+      Callback(1, 2, 3);
+    }
+
+The ``function_ref`` class template
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``function_ref``
+(`doxygen <http://llvm.org/doxygen/classllvm_1_1function_ref.html>`__) class
+template represents a reference to a callable object, templated over the type
+of the callable. This is a good choice for passing a callback to a function,
+if you don't need to hold onto the callback after the function returns.
+
+``function_ref<Ret(Param1, Param2, ...)>`` can be implicitly constructed from
+any callable object that can be called with arguments of type ``Param1``,
+``Param2``, ..., and returns a value that can be converted to type ``Ret``.
+For example:
+
+.. code-block:: c++
+
+    void visitBasicBlocks(Function *F, function_ref<bool (BasicBlock*)> Callback) {
+      for (BasicBlock &BB : *F)
+        if (Callback(&BB))
+          return;
+    }
+
+can be called using:
+
+.. code-block:: c++
+
+    visitBasicBlocks(F, [&](BasicBlock *BB) {
+      if (process(BB))
+        return isEmpty(BB);
+      return false;
+    });
+
+Note that a ``function_ref`` object contains pointers to external memory, so
+it is not generally safe to store an instance of the class (unless you know
+that the external storage will not be freed).
+``function_ref`` is small enough that it should always be passed by value.
+
+``std::function``
+^^^^^^^^^^^^^^^^^
+
+You cannot use ``std::function`` within LLVM code, because it is not supported
+by all our target toolchains.
+
+
 .. _DEBUG:
 
 The ``DEBUG()`` macro and ``-debug`` option
@@ -1559,14 +1631,14 @@ Iterating over the ``Instruction`` in a ``Function``
 If you're finding that you commonly iterate over a ``Function``'s
 ``BasicBlock``\ s and then that ``BasicBlock``'s ``Instruction``\ s,
 ``InstIterator`` should be used instead.  You'll need to include
-``llvm/Support/InstIterator.h`` (`doxygen
-<http://llvm.org/doxygen/InstIterator_8h-source.html>`__) and then instantiate
+``llvm/IR/InstIterator.h`` (`doxygen
+<http://llvm.org/doxygen/InstIterator_8h.html>`__) and then instantiate
 ``InstIterator``\ s explicitly in your code.  Here's a small example that shows
 how to dump all instructions in a function to the standard error stream:
 
 .. code-block:: c++
 
-  #include "llvm/Support/InstIterator.h"
+  #include "llvm/IR/InstIterator.h"
 
   // F is a pointer to a Function instance
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
@@ -1738,16 +1810,12 @@ chain of ``F``:
 
   Function *F = ...;
 
-  for (Value::use_iterator i = F->use_begin(), e = F->use_end(); i != e; ++i)
-    if (Instruction *Inst = dyn_cast<Instruction>(*i)) {
+  for (User *U : GV->users()) {    
+    if (Instruction *Inst = dyn_cast<Instruction>(U)) {
       errs() << "F is used in instruction:\n";
       errs() << *Inst << "\n";
     }
 
-Note that dereferencing a ``Value::use_iterator`` is not a very cheap operation.
-Instead of performing ``*i`` above several times, consider doing it only once in
-the loop body and reusing its result.
-
 Alternatively, it's common to have an instance of the ``User`` Class (`doxygen
 <http://llvm.org/doxygen/classllvm_1_1User.html>`__) and need to know what
 ``Value``\ s are used by it.  The list of all ``Value``\ s used by a ``User`` is
@@ -1759,8 +1827,8 @@ instruction uses (that is, the operands of the particular ``Instruction``):
 
   Instruction *pi = ...;
 
-  for (User::op_iterator i = pi->op_begin(), e = pi->op_end(); i != e; ++i) {
-    Value *v = *i;
+  for (Use &U : pi->operands()) {
+    Value *v = U.get();
     // ...
   }
 
diff --git a/docs/README.txt b/docs/README.txt
index 22cf930..3d63429 100644
--- a/docs/README.txt
+++ b/docs/README.txt
@@ -40,3 +40,12 @@ The correspondence between .rst files and man pages is
 These .rst files are also included during HTML generation so they are also
 viewable online (as noted above) at e.g.
 `http://llvm.org/docs/CommandGuide/Foo.html`.
+
+Checking links
+==============
+
+The reachibility of external links in the documentation can be checked by
+running:
+
+    cd docs/
+    make -f Makefile.sphinx linkcheck
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 723e7cf..8dc1681 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -52,6 +52,9 @@ Non-comprehensive list of changes in this release
 * llvm-ar now handles IR files like regular object files. In particular, a
   regular symbol table is created for symbols defined in IR files.
 
+* LLVM now always uses cfi directives for producing most stack
+  unwinding information.
+
 .. NOTE
    For small 1-3 sentence descriptions, just add an entry at the end of
    this list. If your description won't fit comfortably in one bullet
diff --git a/docs/SegmentedStacks.rst b/docs/SegmentedStacks.rst
index e44ce423..c0bf32b 100644
--- a/docs/SegmentedStacks.rst
+++ b/docs/SegmentedStacks.rst
@@ -13,9 +13,8 @@ monolithic chunk (of some worst case size) at thread initialization. This is
 done by allocating stack blocks (henceforth called *stacklets*) and linking them
 into a doubly linked list. The function prologue is responsible for checking if
 the current stacklet has enough space for the function to execute; and if not,
-call into the libgcc runtime to allocate more stack space. When using ``llc``,
-segmented stacks can be enabled by adding ``-segmented-stacks`` to the command
-line.
+call into the libgcc runtime to allocate more stack space. Segmented stacks are
+enabled with the ``"split-stack"`` attribute on LLVM functions.
 
 The runtime functionality is `already there in libgcc
 <http://gcc.gnu.org/wiki/SplitStacks>`_.
diff --git a/docs/TableGen/LangIntro.rst b/docs/TableGen/LangIntro.rst
index f139f35..3e74dff 100644
--- a/docs/TableGen/LangIntro.rst
+++ b/docs/TableGen/LangIntro.rst
@@ -160,8 +160,16 @@ supported include:
     remaining elements in the list may be arbitrary other values, including
     nested ```dag``' values.
 
-``!strconcat(a, b)``
+``!listconcat(a, b, ...)``
+    A list value that is the result of concatenating the 'a' and 'b' lists.
+    The lists must have the same element type.
+    More than two arguments are accepted with the result being the concatenation
+    of all the lists given.
+
+``!strconcat(a, b, ...)``
     A string value that is the result of concatenating the 'a' and 'b' strings.
+    More than two arguments are accepted with the result being the concatenation
+    of all the strings given.
 
 ``str1#str2``
     "#" (paste) is a shorthand for !strconcat.  It may concatenate things that
diff --git a/docs/TableGen/LangRef.rst b/docs/TableGen/LangRef.rst
index e3db3aa..9b074be 100644
--- a/docs/TableGen/LangRef.rst
+++ b/docs/TableGen/LangRef.rst
@@ -2,8 +2,6 @@
 TableGen Language Reference
 ===========================
 
-.. sectionauthor:: Sean Silva <silvas@purdue.edu>
-
 .. contents::
    :local:
 
@@ -18,7 +16,7 @@ This document is meant to be a normative spec about the TableGen language
 in and of itself (i.e. how to understand a given construct in terms of how
 it affects the final set of records represented by the TableGen file). If
 you are unsure if this document is really what you are looking for, please
-read :doc:`/TableGenFundamentals` first.
+read the :doc:`introduction to TableGen <index>` first.
 
 Notation
 ========
@@ -95,7 +93,7 @@ wide variety of meanings:
    BangOperator: one of
                :!eq     !if      !head    !tail      !con
                :!add    !shl     !sra     !srl
-               :!cast   !empty   !subst   !foreach   !strconcat
+               :!cast   !empty   !subst   !foreach   !listconcat   !strconcat
 
 Syntax
 ======
diff --git a/docs/WritingAnLLVMBackend.rst b/docs/WritingAnLLVMBackend.rst
index 429f52a..fb7c16f 100644
--- a/docs/WritingAnLLVMBackend.rst
+++ b/docs/WritingAnLLVMBackend.rst
@@ -51,7 +51,7 @@ These essential documents must be read before reading this document:
   Formation, SSA-based Optimization, Register Allocation, Prolog/Epilog Code
   Insertion, Late Machine Code Optimizations, and Code Emission.
 
-* :doc:`TableGenFundamentals` --- a document that describes the TableGen
+* :doc:`TableGen/index` --- a document that describes the TableGen
   (``tblgen``) application that manages domain-specific information to support
   LLVM code generation.  TableGen processes input from a target description
   file (``.td`` suffix) and generates C++ code that can be used for code
diff --git a/docs/YamlIO.rst b/docs/YamlIO.rst
index b1917b6..76dd021 100644
--- a/docs/YamlIO.rst
+++ b/docs/YamlIO.rst
@@ -399,6 +399,42 @@ the above schema, a same valid YAML document is:
     name:    Tom
     flags:   [ pointy, flat ]
 
+Sometimes a "flags" field might contains an enumeration part
+defined by a bit-mask.
+
+.. code-block:: c++
+
+    enum {
+      flagsFeatureA = 1,
+      flagsFeatureB = 2,
+      flagsFeatureC = 4,
+
+      flagsCPUMask = 24,
+
+      flagsCPU1 = 8,
+      flagsCPU2 = 16
+    };
+
+To support reading and writing such fields, you need to use the maskedBitSet()
+method and provide the bit values, their names and the enumeration mask.
+
+.. code-block:: c++
+
+    template <>
+    struct ScalarBitSetTraits<MyFlags> {
+      static void bitset(IO &io, MyFlags &value) {
+        io.bitSetCase(value, "featureA",  flagsFeatureA);
+        io.bitSetCase(value, "featureB",  flagsFeatureB);
+        io.bitSetCase(value, "featureC",  flagsFeatureC);
+        io.maskedBitSetCase(value, "CPU1",  flagsCPU1, flagsCPUMask);
+        io.maskedBitSetCase(value, "CPU2",  flagsCPU2, flagsCPUMask);
+      }
+    };
+
+YAML I/O (when writing) will apply the enumeration mask to the flags field,
+and compare the result and values from the bitset. As in case of a regular
+bitset, each that matches will cause the corresponding string to be added
+to the flow sequence.
 
 Custom Scalar
 -------------
@@ -426,8 +462,10 @@ looks like:
       static StringRef input(StringRef scalar, T &value) {
         // do custom parsing here.  Return the empty string on success,
         // or an error message on failure.
-        return StringRef(); 
+        return StringRef();
       }
+      // Determine if this scalar needs quotes.
+      static bool mustQuote(StringRef) { return true; }
     };
     
 
diff --git a/docs/index.rst b/docs/index.rst
index 726a392..1d4fbd9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -215,6 +215,7 @@ For API clients and LLVM developers.
 
    AliasAnalysis
    BitCodeFormat
+   BlockFrequencyTerminology
    BranchWeightMetadata
    Bugpoint
    CodeGenerator
@@ -236,6 +237,7 @@ For API clients and LLVM developers.
    NVPTXUsage
    StackMaps
    InAlloca
+   BigEndianNEON
 
 :doc:`WritingAnLLVMPass`
    Information on how to write LLVM transformations and analyses.
@@ -248,7 +250,7 @@ For API clients and LLVM developers.
    working on retargetting LLVM to a new architecture, designing a new codegen
    pass, or enhancing existing components.
 
-:doc:`TableGen Fundamentals <TableGen/index>`
+:doc:`TableGen <TableGen/index>`
    Describes the TableGen tool, which is used heavily by the LLVM code
    generator.
 
@@ -298,6 +300,10 @@ For API clients and LLVM developers.
 :doc:`BranchWeightMetadata`
    Provides information about Branch Prediction Information.
 
+:doc:`BlockFrequencyTerminology`
+   Provides information about terminology used in the ``BlockFrequencyInfo``
+   analysis pass.
+
 :doc:`SegmentedStacks`
    This document describes segmented stacks and how they are used in LLVM.
 
@@ -314,6 +320,11 @@ For API clients and LLVM developers.
   LLVM support for mapping instruction addresses to the location of
   values and allowing code to be patched.
 
+:doc:`BigEndianNEON`
+  LLVM's support for generating NEON instructions on big endian ARM targets is
+  somewhat nonintuitive. This document explains the implementation and rationale.
+
+
 Development Process Documentation
 =================================
 
-- 
cgit v1.1